In [None]:
import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))

from database.connection import sql_connection
from database.dataset import dataset_config
import lightgbm as lgb
from skopt.space import Real, Integer
from modules.interface import RunModel, RunOptimization

In [None]:
stdrings = sql_connection.set_standard_rings_data()
params=[[{'cluster_eta':[0.6, 0.8]}, {'cluster_et':[20000,30000]}],
        [{'cluster_eta':[1.2, 1.8]}, {'cluster_et':[100000, 250000]}]]

dataset=dataset_config.split_dataframe(stdrings, params)
data = dataset_config.prepare_and_split_data(input_type='std_rings',stdrings_df=dataset, random_state=42)

In [None]:
space_lgbm = [
    Real(name='learning_rate', low = 0.01 , high = 0.9),
    Integer(name='num_leaves', low = 200, high = 700),
    Integer(name='max_depth', low = 200, high = 700),
    Real(name='feature_fraction', low = 0.5 , high = 1),
    Real(name='bagging_fraction', low = 0.7 , high = 1),
    Integer(name='bagging_freq', low = 1, high = 10),
    Real(name='lambda_l1',low = 0.0, high = 1),
    Real(name='lambda_l2',low = 0.5, high = 1)
]

fixed_params_lgbm = {
    'objective': 'mae', 
    'metric': 'mae', 
    'num_iterations ': 300,
    'random_state': 42, 
    'n_jobs': -1, 
    'verbose': -1
}

calibration_kwargs = {'callbacks':[lgb.early_stopping(stopping_rounds=10, verbose=False)]}
optimization_kwargs = {'n_initial_points': 15,
                        'n_calls': 30,
                        'initial_point_generator': 'lhs',
                        'random_state': 42,

                       
                       }

# Simplesmente passe 'model_type="lightgbm"'
optimizer_lgbm = RunOptimization()
results=optimizer_lgbm.run_multiple_optimizations(opt_class='gp_minimize',
    model_class="lgbm",
    datasets=data,
    space=space_lgbm, 
    fixed_params=fixed_params_lgbm,
    metric='mae',
    calibration_kwargs=calibration_kwargs,
    optimization_kwargs= optimization_kwargs

)

Optimization finished.
Best score (mean_absolute_error): 0.0419
Best parameters: {'learning_rate': 0.4671575607628733, 'num_leaves': np.int64(547), 'max_depth': np.int64(566), 'feature_fraction': 0.5167143329975306, 'bagging_fraction': 0.8564095270847129, 'bagging_freq': np.int64(3), 'lambda_l1': 0.8123876211864385, 'lambda_l2': 0.7845021003033774}

To train the final model with these parameters, call the '.fit_best_model()' method.
Optimization finished.
Best score (mean_absolute_error): 0.0585
Best parameters: {'learning_rate': 0.1042252838323025, 'num_leaves': np.int64(200), 'max_depth': np.int64(249), 'feature_fraction': 1.0, 'bagging_fraction': 1.0, 'bagging_freq': np.int64(2), 'lambda_l1': 1.0, 'lambda_l2': 0.5}

To train the final model with these parameters, call the '.fit_best_model()' method.
Optimization finished.
Best score (mean_absolute_error): 0.0530
Best parameters: {'learning_rate': 0.11880326417260266, 'num_leaves': np.int64(514), 'max_depth': np.int64(601), 'feature_

In [None]:
y_test = []
y_pred = []

for i in range(len(optimizer_lgbm.optimizer)):   
    optimizer_lgbm.optimizer[i].get_best_model() 
    # print(f"\nMelhor MAE para LightGBM: {len(optimizer_lgbm.optimizer[i].X_test)}")
    # print(f"\nMelhor MAE para LightGBM: {len(optimizer_lgbm.optimizer[i].y_test)}")
    # print(f"\nMelhor MAE para LightGBM: {len(optimizer_lgbm.optimizer[i].y_pred)}")

    print(f"\nMelhor MAE para LightGBM: {type(optimizer_lgbm.optimizer[i].X_test)}")
    print(f"\nMelhor MAE para LightGBM: {type(optimizer_lgbm.optimizer[i].y_test)}")
    print(f"\nMelhor MAE para LightGBM: {type(optimizer_lgbm.optimizer[i].y_pred)}")

    y_test.append(optimizer_lgbm.optimizer[i].y_test)
    y_pred.append(optimizer_lgbm.optimizer[i].y_pred)

# test_size = len(optimizer_lgbm.optimizer[0].X_test)+len(optimizer_lgbm.optimizer[1].X_test)+len(optimizer_lgbm.optimizer[2].X_test)
# train_size = len(optimizer_lgbm.optimizer[0].X_train)+len(optimizer_lgbm.optimizer[1].X_train)+len(optimizer_lgbm.optimizer[2].X_train)

# print(train_size+test_size)

Training the final model with the best parameters...
Final model has been trained and is stored in the '.best_model_' attribute.

Melhor MAE para LightGBM: <class 'pandas.core.frame.DataFrame'>

Melhor MAE para LightGBM: <class 'pandas.core.series.Series'>

Melhor MAE para LightGBM: <class 'numpy.ndarray'>
Training the final model with the best parameters...
Final model has been trained and is stored in the '.best_model_' attribute.

Melhor MAE para LightGBM: <class 'pandas.core.frame.DataFrame'>

Melhor MAE para LightGBM: <class 'pandas.core.series.Series'>

Melhor MAE para LightGBM: <class 'numpy.ndarray'>
Training the final model with the best parameters...
Final model has been trained and is stored in the '.best_model_' attribute.

Melhor MAE para LightGBM: <class 'pandas.core.frame.DataFrame'>

Melhor MAE para LightGBM: <class 'pandas.core.series.Series'>

Melhor MAE para LightGBM: <class 'numpy.ndarray'>


In [None]:
from plots import plots
from sklearn.model_selection import train_test_split
X_test_lists=[]
X_test = float()
for stdrings_cluster_dataframe in dataset:
    alpha = stdrings_cluster_dataframe["alpha"]
    X_train, X_test, y_train, y_test_ = train_test_split(
            stdrings_cluster_dataframe,
            alpha,
            train_size=0.8,
            random_state=42
        )
    X_test_lists.append(X_test)

print(y_test[0])
print(X_test_lists[0].columns)


184     0.979848
1724    0.981707
251     0.992766
1121    1.062317
1486    1.253747
          ...   
1232    0.994171
266     1.067189
1170    1.024850
1473    0.976328
1366    1.106981
Name: alpha, Length: 412, dtype: float64
Index(['StdRings_0', 'StdRings_1', 'StdRings_2', 'StdRings_3', 'StdRings_4',
       'StdRings_5', 'StdRings_6', 'StdRings_7', 'StdRings_8', 'StdRings_9',
       ...
       'cluster_phi', 'cluster_et', 'cluster_e237', 'cluster_e277', 'mc_et',
       'delta_eta_calib', 'delta_phi_calib', 'hotCellEta', 'hotCellPhi',
       'alpha'],
      dtype='object', length=111)


In [None]:
print(X_test_lists[0].columns.tolist())

print(len(y_test))
print(len(y_pred))

print(type(y_test))
print(type(y_pred))
print(list(y_test[0].index))


['StdRings_0', 'StdRings_1', 'StdRings_2', 'StdRings_3', 'StdRings_4', 'StdRings_5', 'StdRings_6', 'StdRings_7', 'StdRings_8', 'StdRings_9', 'StdRings_10', 'StdRings_11', 'StdRings_12', 'StdRings_13', 'StdRings_14', 'StdRings_15', 'StdRings_16', 'StdRings_17', 'StdRings_18', 'StdRings_19', 'StdRings_20', 'StdRings_21', 'StdRings_22', 'StdRings_23', 'StdRings_24', 'StdRings_25', 'StdRings_26', 'StdRings_27', 'StdRings_28', 'StdRings_29', 'StdRings_30', 'StdRings_31', 'StdRings_32', 'StdRings_33', 'StdRings_34', 'StdRings_35', 'StdRings_36', 'StdRings_37', 'StdRings_38', 'StdRings_39', 'StdRings_40', 'StdRings_41', 'StdRings_42', 'StdRings_43', 'StdRings_44', 'StdRings_45', 'StdRings_46', 'StdRings_47', 'StdRings_48', 'StdRings_49', 'StdRings_50', 'StdRings_51', 'StdRings_52', 'StdRings_53', 'StdRings_54', 'StdRings_55', 'StdRings_56', 'StdRings_57', 'StdRings_58', 'StdRings_59', 'StdRings_60', 'StdRings_61', 'StdRings_62', 'StdRings_63', 'StdRings_64', 'StdRings_65', 'StdRings_66', 'Std

In [None]:
import numpy as np
merged_dataframe=[]
for i in range(len(optimizer_lgbm.optimizer)):
    merged_dataframe[i]=plots.merge_dataframes( X_test_lists[i], y_test[i], y_pred[i])

bins_et = np.array([5, 10, 20, 30, 40, 50, 70, 100, 150, 200, 250, 900, 3000])*1000
bins_eta = np.array([0, 0.6, 0.8, 1.2, 1.37, 1.52, 1.8, 2.0, 2.2, 2.5])