# LGBM Model development

In [29]:
import pandas as pd
import seaborn as sns
import openpyxl
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle

In [30]:
from sklearn.model_selection import KFold, train_test_split, cross_val_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error
import optuna

In [31]:
from Functions.helper_functions import * 

In [32]:
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)
pd.options.mode.chained_assignment = None  # default='warn'

In [33]:
np.random.seed(42)

# Data

In [34]:
train_wp1 = pd.read_csv('Data/Preprocessing/WP1_train_preprocessed.csv', sep=',')
train_wp2 = pd.read_csv('Data/Preprocessing/WP2_train_preprocessed.csv', sep=',')
train_wp3 = pd.read_csv('Data/Preprocessing/WP3_train_preprocessed.csv', sep=',')
train_wp4 = pd.read_csv('Data/Preprocessing/WP4_train_preprocessed.csv', sep=',')
train_wp5 = pd.read_csv('Data/Preprocessing/WP5_train_preprocessed.csv', sep=',')
train_wp6 = pd.read_csv('Data/Preprocessing/WP6_train_preprocessed.csv', sep=',')

In [35]:
test_wp1 = pd.read_csv('Data/Preprocessing/WP1_test_preprocessed.csv', sep=',')
test_wp2 = pd.read_csv('Data/Preprocessing/WP2_test_preprocessed.csv', sep=',')
test_wp3 = pd.read_csv('Data/Preprocessing/WP3_test_preprocessed.csv', sep=',')
test_wp4 = pd.read_csv('Data/Preprocessing/WP4_test_preprocessed.csv', sep=',')
test_wp5 = pd.read_csv('Data/Preprocessing/WP5_test_preprocessed.csv', sep=',')
test_wp6 = pd.read_csv('Data/Preprocessing/WP6_test_preprocessed.csv', sep=',')
test_dates = pd.read_csv('Data/Initial/test.csv', sep=',').date.values

In [36]:
to_drop = ['date','wd','forecast_time', 'forecast', "forecast_dist"]

In [37]:
u_to_drop = [
    'u_T_1', 'u_T_2', 'u_T_3', 'u_T_4', 'u_T_5', 'u_T_6', 
    'u_T_2_mean', 'u_T_3_mean', 'u_T_4_mean', 'u_T_5_mean', 'u_T_6_mean', 'u_T_7_mean',
    'u_T_8_mean', 'u_T_9_mean', 'u_T_10_mean', 'u_T_11_mean', 'u_T_12_mean','u_T_24_mean',
    'u_T_2_std', 'u_T_4_std', 'u_T_5_std', 'u_T_6_std',
    'u_T_2_median', 'u_T_3_median', 'u_T_4_median', 'u_T_5_median', 'u_T_6_median', 'u_T_12_median','u_T_24_median', 'u_T_36_median',
    'u_T_2_max', 'u_T_3_max', 'u_T_4_max', 'u_T_5_max', 'u_T_6_max', 'u_T_12_max',
    'u_T_2_min', 'u_T_3_min', 'u_T_4_min', 'u_T_5_min', 'u_T_6_min', 'u_T_12_min',
    'u2_T_1', 'u2_T_2', 'u2_T_3', 'u2_T_4', 'u2_T_5', 'u2_T_6', 
    'u2_T_2_mean', 'u2_T_3_mean', 'u2_T_4_mean', 'u2_T_5_mean', 'u2_T_6_mean', 'u2_T_7_mean',
    'u2_T_8_mean', 'u2_T_9_mean', 'u2_T_10_mean', 'u2_T_11_mean', 'u2_T_12_mean','u2_T_24_mean',
    'u2_T_2_std', 'u2_T_4_std', 'u2_T_5_std', 'u2_T_6_std', 'u2_T_24_std',
    'u2_T_2_median', 'u2_T_3_median', 'u2_T_4_median', 'u2_T_5_median', 'u2_T_6_median', 'u2_T_12_median',
    'u2_T_2_max','u2_T_3_max', 'u2_T_4_max','u2_T_5_max', 'u2_T_6_max', 'u2_T_12_max',
    'u2_T_2_min', 'u2_T_3_min', 'u2_T_4_min', 'u2_T_5_min', 'u2_T_6_min',
    'u2_T_12', 'u2_T_36_mean', 'u2_T_36_std', 'u2_T_24_median', 'u2_T_24_max',
    'u_T_36_mean','u_T_12','u_T_24_max','u2_T_36_median','u_T_24_min'
]
ws_to_drop = [
    'ws_T_1', 'ws_T_2', 'ws_T_3', 'ws_T_4', 'ws_T_5', 'ws_T_6', 'ws_T_7', 'ws_T_8', 'ws_T_10','ws_T_11', 'ws_T_12',
    'ws_T_2_mean', 'ws_T_3_mean', 'ws_T_4_mean', 'ws_T_5_mean', 'ws_T_6_mean', 'ws_T_7_mean', 'ws_T_8_mean', 'ws_T_9_mean', 
    'ws_T_10_mean', 'ws_T_11_mean', 'ws_T_12_mean', 'ws_T_24_mean', 
    'ws_T_2_std', 'ws_T_3_std', 'ws_T_4_std', 'ws_T_5_std', 
    'ws_T_2_median', 'ws_T_3_median', 'ws_T_4_median', 'ws_T_5_median', 'ws_T_6_median',
    'ws_T_12_median', 'ws_T_24_median', 'ws_T_36_median',
    'ws_T_2_max', 'ws_T_3_max', 'ws_T_4_max', 'ws_T_5_max','ws_T_6_max', 'ws_T_12_max',
     'ws_T_2_min', 'ws_T_3_min', 'ws_T_4_min', 'ws_T_5_min', 'ws_T_6_min', 'ws_T_12_min','ws_T_24_max','ws_T_24_min'
]

v_to_drop = [
    'v_T_1', 'v_T_2', 'v_T_3', 'v_T_4', 'v_T_5', 'v_T_6', 
    'v_T_2_mean', 'v_T_3_mean', 'v_T_4_mean', 'v_T_5_mean', 'v_T_6_mean', 'v_T_7_mean',
    'v_T_8_mean', 'v_T_9_mean', 'v_T_10_mean', 'v_T_11_mean', 'v_T_12_mean', 'v_T_24_mean','v_T_36_mean',
    'v_T_3_std', 'v_T_4_std', 'v_T_5_std','v_T_6_std','v_T_24_std', 'v_T_36_median',
    'v_T_2_median', 'v_T_3_median', 'v_T_4_median', 'v_T_5_median', 'v_T_6_median', 
    'v_T_2_max', 'v_T_3_max', 'v_T_4_max', 'v_T_5_max', 'v_T_6_max', 'v_T_12_max', 
    'v_T_2_min', 'v_T_3_min', 'v_T_4_min', 'v_T_5_min', 'v_T_6_min', 'v_T_12_min', 
    'v_T_36_min', 'v_T_36', 'v_T_24_max',  'v_T_12_median', 'v_T_24_median',
]

wd_to_drop = [
    'coswd_1', 'coswd_2', 'coswd_3', 'coswd_4', 'coswd_5', 'coswd_6',
    'coswd_2_mean', 'coswd_3_mean', 'coswd_4_mean', 'coswd_5_mean', 'coswd_6_mean', 'coswd_7_mean', 
    'coswd_8_mean', 'coswd_9_mean', 'coswd_10_mean', 'coswd_11_mean', 'coswd_12_mean', 'coswd_24_mean', 
    'coswd_3_std', 'coswd_4_std','coswd_5_std','coswd_2_median', 'coswd_3_median','coswd_4_median', 
    'coswd_5_median', 'coswd_6_median', 'coswd_36_median', 'coswd_24_median', 'coswd_12_median',
    'coswd_2_max', 'coswd_3_max', 'coswd_4_max', 'coswd_5_max', 'coswd_6_max', 'coswd_12_max', 'coswd_24_max',
    'coswd_2_min', 'coswd_3_min', 'coswd_4_min', 'coswd_5_min', 'coswd_6_min', 'coswd_12_min', 'coswd_24_min',
    'ws_T_36_max', 'ws_T_36_min', 'coswd_12', 'coswd_24'
]

other_to_drop = [
    'cos_day', 'u', 'v'
]

feature_corr = u_to_drop+ws_to_drop+v_to_drop+wd_to_drop+other_to_drop
to_drop = feature_corr+to_drop

# LGBM functions

In [38]:
from lightgbm import LGBMRegressor

In [39]:
# def lgbm_cross_validation(X, y, params):
#     if params == None:
#         model = LGBMRegressor()
#     else:
#         model = LGBMRegressor(**params)

#     print('-----------LGBM CROSS VALIDATION BEGINNING-----------')
#     split = 10
#     kf = KFold(n_splits=split, shuffle=True)       
#     lgbm_rmse_scores = []
#     lgbm_mae_scores = []
#     i = 1
#     for (train_index, test_index) in kf.split(pd.DataFrame(X), pd.DataFrame(y)):
#         X_train, X_test = pd.DataFrame(X).iloc[train_index], pd.DataFrame(X).iloc[test_index]
#         Y_train, Y_test = pd.DataFrame(y).iloc[train_index],pd.DataFrame(y).iloc[test_index]

#         model.fit(X_train, Y_train, eval_set=[(X_test, Y_test)], verbose=100)

#         prediction = model.predict(X_test)
#         lgbm_rmse_scores.append(mean_squared_error(Y_test, prediction,squared=False))
#         lgbm_mae_scores.append(mean_absolute_error(Y_test, prediction))
        
#         print(show_evaluation(prediction, Y_test))
#         print(f'-------------------FOLD {i}-----------------')
#         i+=1

#     print('---------------CROSS VALIDATION COMPLETE-------------')
#     print('--------------------------RMSE-----------------------')
#     display_scores(lgbm_rmse_scores)
#     print('--------------------------MAE------------------------')
#     display_scores(lgbm_mae_scores)

In [40]:
def lgbm_scaled_cross_validation(X, y, params):
    if params == None:
#         model = Pipeline([('scaler', MaxAbsScaler()),('ridge', LGBMRegressor())])
        model = Pipeline([('scaler', RobustScaler()),('ridge', LGBMRegressor())])
    else:
#         model = Pipeline([('scaler', MaxAbsScaler()),('ridge', LGBMRegressor(**params))])
        model = Pipeline([('scaler', RobustScaler()),('ridge', LGBMRegressor(**params))])

    print('-----------LGBM CROSS VALIDATION BEGINNING-----------')
    split = 10
    kf = KFold(n_splits=split, shuffle=True)       
    lgbm_rmse_scores = []
    lgbm_mae_scores = []
    i = 1
    for (train_index, test_index) in kf.split(pd.DataFrame(X), pd.DataFrame(y)):
        X_train, X_test = pd.DataFrame(X).iloc[train_index], pd.DataFrame(X).iloc[test_index]
        Y_train, Y_test = pd.DataFrame(y).iloc[train_index],pd.DataFrame(y).iloc[test_index]

        model.fit(X_train, Y_train)

        prediction = model.predict(X_test)
        lgbm_rmse_scores.append(mean_squared_error(Y_test, prediction,squared=False))
        lgbm_mae_scores.append(mean_absolute_error(Y_test, prediction))
        
        print(show_evaluation(prediction, Y_test))
        print(f'-------------------FOLD {i}-----------------')
        i+=1

    print('---------------CROSS VALIDATION COMPLETE-------------')
    print('--------------------------RMSE-----------------------')
    display_scores(lgbm_rmse_scores)
    print('--------------------------MAE------------------------')
    display_scores(lgbm_mae_scores)

In [41]:
def hyperparametrization(trial, train_x, test_x, train_y, test_y):
    param = {
        'metric': 'rmse', 
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-3, 10.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 10.0),
        'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.3,0.4,0.5,0.6,0.7,0.8,0.9, 1.0]),
        'subsample': trial.suggest_categorical('subsample', [0.4,0.5,0.6,0.7,0.8,1.0]),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.3),
        'max_depth': trial.suggest_int("max_depth", 20, 100),
        'num_leaves' : trial.suggest_int('num_leaves', 1, 1000),
        'min_child_samples': trial.suggest_int('min_child_samples', 1, 300),
    }
    
#     model = LGBMRegressor(**param)  
    model = Pipeline([('scaler', MinMaxScaler()),('ridge', LGBMRegressor(**param))])
#     model.fit(train_x,train_y,eval_set=[(test_x,test_y)],early_stopping_rounds=100,verbose=False)
    model.fit(train_x,train_y)
    
    preds = model.predict(test_x)
    
    rmse =  mean_squared_error(test_y, preds,squared=False)
    
    return rmse

In [42]:
def hyperparametrization_cv(trial, X, y):
    param = {
        'metric': 'rmse', 
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-3, 10.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 10.0),
        'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.3,0.4,0.5,0.6,0.7,0.8,0.9, 1.0]),
        'subsample': trial.suggest_categorical('subsample', [0.4,0.5,0.6,0.7,0.8,1.0]),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.3),
        'max_depth': trial.suggest_int("max_depth", 20, 100),
        'num_leaves' : trial.suggest_int('num_leaves', 1, 1000),
        'min_child_samples': trial.suggest_int('min_child_samples', 1, 300),
    }
    
    model = LGBMRegressor(**param)
    rmse = - cross_val_score(model, X, y, cv=3, scoring = 'neg_root_mean_squared_error').mean()
    
    return rmse

## WP1 

| |  | Mean | Std | Sum up |
| --- | --- | --- | --- | --- |
| No params | RMSE | 0.10344875448880764 | 0.0019070131550065564 |  |
| No params - StandardScaler | RMSE | 0.10355855436437653 | 0.0019043784372002228 |  |
| After tuning 50trials| RMSE | 0.06830085723562579 | 0.0012998671387256361 | To keep, maybe redo optuna with warm start with it - first sub|
| After tuning 100trials| RMSE | 0.0695930431604128 | 0.0015123892627707553 | |
| After tuning W.S 60trials | RMSE | 0.0671868765178121 | 0.0019357892229500213 | second sub - BETTER |
| After tuning W.S 50trials - StandardScaler | RMSE | 0.06705406618054667 | 0.0013509038914922506 | with warm start 1 parameters |
| After tuning W.S 50trials - RobustScaler | RMSE | 0.06736753027260156 | 0.0009971394767292064 | |
| --- | --- | --- | --- | --- |
| No params | MAE | 0.07306057409517844 | 0.0009857628815465762 |  |
| No params - StandardScaler | MAE | 0.07310137331348271 | 0.0010945145617042643 |  |
| After tuning 50trials| MAE | 0.04481696427654311 | 0.000727722171899004 |  |
| After tuning 100trials| MAE | 0.045696725242359994 | 0.0008508558100930331 |  |
| After tuning W.S 60trials | MAE | 0.04322865305311156 | 0.0009454253743042544 | |
| After tuning W.S 50trials - StandardScaler | MAE | 0.043173032898780175 | 0.0007696056550296055 | with warm start 1 parameters |
| After tuning W.S 50trials - RobustScaler | MAE | 0.043246198690701294 | 0.000596545751975941 | |

In [43]:
wp1_X = train_wp1[[c for c in train_wp1 if c not in ["wp"]] + ["wp"]].drop(to_drop+feature_corr, axis = 1)
X1 = wp1_X.drop('wp', axis=1)
y1 = wp1_X['wp']

def objective_wp1(trial,data=X1,target=y1):
    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.15,random_state=42)
    return hyperparametrization(trial, train_x, test_x, train_y, test_y)

In [44]:
# lgbm_cross_validation(X1, y1, None)
lgbm_scaled_cross_validation(X1, y1, None)

-----------LGBM CROSS VALIDATION BEGINNING-----------
RMSE score: 0.10673394295125199
MAE score: 0.07538804107084769
None
-------------------FOLD 1-----------------
RMSE score: 0.10878477339303502
MAE score: 0.07651142955181095
None
-------------------FOLD 2-----------------
RMSE score: 0.10741580059761964
MAE score: 0.07544854425678599
None
-------------------FOLD 3-----------------
RMSE score: 0.10874503918978277
MAE score: 0.07756358640866627
None
-------------------FOLD 4-----------------
RMSE score: 0.10773881942674152
MAE score: 0.07681605858992606
None
-------------------FOLD 5-----------------
RMSE score: 0.10738854648859955
MAE score: 0.0755259174514674
None
-------------------FOLD 6-----------------
RMSE score: 0.11060736094755703
MAE score: 0.07764645493616058
None
-------------------FOLD 7-----------------
RMSE score: 0.10812659200067887
MAE score: 0.07638246361872644
None
-------------------FOLD 8-----------------
RMSE score: 0.11055504988722631
MAE score: 0.07855139547735

In [83]:
# try_these_first = [{
#     'reg_alpha': 0.664265743859848,
#     'reg_lambda': 9.83047434398735,
#     'colsample_bytree': 1.0,
#     'subsample': 1.0,
#     'learning_rate': 0.24237997149103074,
#     'max_depth': 77,
#     'num_leaves': 389,
#     'min_child_samples': 2,
# },  {
#     'reg_alpha': 0.8314449043001416,
#     'reg_lambda': 9.093012403173608,
#     'colsample_bytree': 0.9,
#     'subsample': 0.4,
#     'learning_rate': 0.2033256175102991,
#     'max_depth': 55,
#     'num_leaves': 964,
#     'min_child_samples': 25,
# }, {
#     'reg_alpha': 0.25020407037516895,
#     'reg_lambda': 7.183180037262842,
#     'colsample_bytree': 1.0,
#     'subsample': 1.0,
#     'learning_rate': 0.11751089382716717,
#     'max_depth': 84,
#     'num_leaves': 596,
#     'min_child_samples': 15,
# }]

# study = optuna.create_study(direction='minimize')
# study.enqueue_trial(try_these_first[0])
# study.enqueue_trial(try_these_first[1])
# study.enqueue_trial(try_these_first[2])

[32m[I 2021-09-07 11:09:32,102][0m A new study created in memory with name: no-name-98124f97-6686-403c-926d-a4204c5c140a[0m
  study.enqueue_trial(try_these_first[0])
  create_trial(state=TrialState.WAITING, system_attrs={"fixed_params": params})
  self.add_trial(
  study.enqueue_trial(try_these_first[1])
  study.enqueue_trial(try_these_first[2])


In [84]:
# # # study = optuna.create_study(direction='minimize')
# study.optimize(objective_wp1, n_trials=30)
# # write_results('Data/Hyperparametrization/lgbm_50trials_cv.xlsx', 'wp1', study.trials_dataframe())
# best_trial = study.best_trial.params
# best_trial

[32m[I 2021-09-07 11:09:37,212][0m Trial 0 finished with value: 0.0679311388874414 and parameters: {'reg_alpha': 0.664265743859848, 'reg_lambda': 9.83047434398735, 'colsample_bytree': 1.0, 'subsample': 1.0, 'learning_rate': 0.24237997149103074, 'max_depth': 77, 'num_leaves': 389, 'min_child_samples': 2}. Best is trial 0 with value: 0.0679311388874414.[0m
[32m[I 2021-09-07 11:09:40,584][0m Trial 1 finished with value: 0.06794682477114562 and parameters: {'reg_alpha': 0.8314449043001416, 'reg_lambda': 9.093012403173608, 'colsample_bytree': 0.9, 'subsample': 0.4, 'learning_rate': 0.2033256175102991, 'max_depth': 55, 'num_leaves': 964, 'min_child_samples': 25}. Best is trial 0 with value: 0.0679311388874414.[0m
[32m[I 2021-09-07 11:09:45,211][0m Trial 2 finished with value: 0.06773921527333406 and parameters: {'reg_alpha': 0.25020407037516895, 'reg_lambda': 7.183180037262842, 'colsample_bytree': 1.0, 'subsample': 1.0, 'learning_rate': 0.11751089382716717, 'max_depth': 84, 'num_leav

{'reg_alpha': 0.25020407037516895,
 'reg_lambda': 7.183180037262842,
 'colsample_bytree': 1.0,
 'subsample': 1.0,
 'learning_rate': 0.11751089382716717,
 'max_depth': 84,
 'num_leaves': 596,
 'min_child_samples': 15}

In [113]:
# # 100
# params_1 = {
#     'reg_alpha': 0.8314449043001416,
#     'reg_lambda': 9.093012403173608,
#     'colsample_bytree': 0.9,
#     'subsample': 0.4,
#     'learning_rate': 0.2033256175102991,
#     'max_depth': 55,
#     'num_leaves': 964,
#     'min_child_samples': 25,
# }

# #50
# params_1 = {
#     'reg_alpha': 0.664265743859848,
#     'reg_lambda': 9.83047434398735,
#     'colsample_bytree': 1.0,
#     'subsample': 1.0,
#     'learning_rate': 0.24237997149103074,
#     'max_depth': 77,
#     'num_leaves': 389,
#     'min_child_samples': 2,
# }

# warm start
# params_1 = {
#     'reg_alpha': 0.25020407037516895,
#     'reg_lambda': 7.183180037262842,
#     'colsample_bytree': 1.0,
#     'subsample': 1.0,
#     'learning_rate': 0.11751089382716717,
#     'max_depth': 84,
#     'num_leaves': 596,
#     'min_child_samples': 15,
# }

# robust scaler with warm start
params_1 = {
    'reg_alpha': 0.25020407037516895,
    'reg_lambda': 7.183180037262842,
    'colsample_bytree': 1.0,
    'subsample': 1.0,
    'learning_rate': 0.11751089382716717,
    'max_depth': 84,
    'num_leaves': 596,
    'min_child_samples': 15
}

In [86]:
# lgbm_cross_validation(X1, y1, params_1)
lgbm_scaled_cross_validation(X1, y1, params_1)

-----------LGBM CROSS VALIDATION BEGINNING-----------
RMSE score: 0.06717849943315866
MAE score: 0.04295040911537695
None
-------------------FOLD 1-----------------
RMSE score: 0.06656592895845817
MAE score: 0.0427058814549603
None
-------------------FOLD 2-----------------
RMSE score: 0.0685985271202238
MAE score: 0.044247394394914624
None
-------------------FOLD 3-----------------
RMSE score: 0.0673729928421063
MAE score: 0.04373887190080029
None
-------------------FOLD 4-----------------
RMSE score: 0.0678651281207079
MAE score: 0.04270283283224531
None
-------------------FOLD 5-----------------
RMSE score: 0.06594548251098918
MAE score: 0.04253764531270707
None
-------------------FOLD 6-----------------
RMSE score: 0.06685366059114577
MAE score: 0.04274888757718109
None
-------------------FOLD 7-----------------
RMSE score: 0.06616759457408684
MAE score: 0.04313831000299249
None
-------------------FOLD 8-----------------
RMSE score: 0.06923961551526447
MAE score: 0.0441275092206849

## WP2

| |  | Mean | Std | Sum up|
| --- | --- | --- | --- | |
| No params | RMSE | 0.10935335541057582 | 0.0014425096116734836 | |
| No params - StandardScaler | RMSE | 0.10938240918068962 | 0.002563571983412213 |  |
| After tuning - 50trials| RMSE | 0.0725081520968898 | 0.0016974702626377217 | |
| After tuning 100trials| RMSE | 0.0707064364904941 | 0.001396820290618349 | More stable, to keep |
| After tuning W.S. 50trials - StandardScaler | RMSE | 0.07072007438762447 | 0.0010396359382112443 | With 100trials parameters |
| After tuning W.S. 50trials - RobustScaler | RMSE | 0.07014346508497502 | 0.0013671602187583412 | |
| --- | --- | --- | --- |---|
| No params | MAE | 0.07681923856705511 | 0.0008670825615244791 | |
| No params - StandardScaler | MAE | 0.0768829843356125 | 0.0017568758903956435 |  |
| After tuning - 50trials| MAE | 0.04512164110351975 |  0.0006579433030966575 | |
| After tuning 100trials| MAE | 0.04457902842458915 | 0.0006807155447311589 | |
| After tuning W.S. 50trials - StandardScaler | MAE | 0.04456161763710905 | 0.0005342365137369251 | With 100trials parameters |
| After tuning W.S. 50trials - RobustScaler | MAE | 0.044471464183373825 | 0.0007098731360466324 | |

In [87]:
wp2_X = train_wp2[[c for c in train_wp2 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X2 = wp2_X.drop('wp', axis=1)
y2 = wp2_X['wp']

def objective_wp2(trial,data = X2,target = y2):
    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.15,random_state=42)
    return hyperparametrization(trial, train_x, test_x, train_y, test_y)

In [50]:
# lgbm_cross_validation(X2, y2, None)
lgbm_scaled_cross_validation(X2, y2, None)

-----------LGBM CROSS VALIDATION BEGINNING-----------
RMSE score: 0.11402888619107272
MAE score: 0.07963794352868463
None
-------------------FOLD 1-----------------
RMSE score: 0.11141340439487576
MAE score: 0.07839824043459855
None
-------------------FOLD 2-----------------
RMSE score: 0.11320267516145571
MAE score: 0.07925618975223588
None
-------------------FOLD 3-----------------
RMSE score: 0.11439731977703196
MAE score: 0.0791782503104171
None
-------------------FOLD 4-----------------
RMSE score: 0.11598329629623258
MAE score: 0.08158136796727551
None
-------------------FOLD 5-----------------
RMSE score: 0.11626585634812774
MAE score: 0.08130743886734008
None
-------------------FOLD 6-----------------
RMSE score: 0.11479460869406412
MAE score: 0.08143363149534559
None
-------------------FOLD 7-----------------
RMSE score: 0.11236475618580724
MAE score: 0.08023735021214357
None
-------------------FOLD 8-----------------
RMSE score: 0.11216017414861613
MAE score: 0.07972013970798

In [88]:
try_these_first = [{
    'reg_alpha': 0.18268883436586145,
    'reg_lambda': 0.15916821051528962,
    'colsample_bytree': 1.0,
    'subsample': 0.6,
    'learning_rate': 0.18007000714755378,
    'max_depth': 77,
    'num_leaves': 425,
    'min_child_samples': 10,
    'min_data_per_groups': 19
},  {
    'reg_alpha': 0.04439450895032273,
    'reg_lambda': 0.7790968728875318,
    'colsample_bytree': 0.4,
    'subsample': 1.0,
    'learning_rate': 0.09520041095092219,
    'max_depth': 31,
    'num_leaves': 883,
    'min_child_samples': 18,
    'min_data_per_groups': 56
},  {
    'reg_alpha': 0.34026994469471555,
    'reg_lambda': 1.1032197453137866,
    'colsample_bytree': 0.9,
    'subsample': 0.6,
    'learning_rate': 0.13414826176962302,
    'max_depth': 81,
    'num_leaves': 987,
    'min_child_samples': 39
}]

study = optuna.create_study(direction='minimize')
study.enqueue_trial(try_these_first[0])
study.enqueue_trial(try_these_first[1])
study.enqueue_trial(try_these_first[2])

[32m[I 2021-09-07 11:11:26,112][0m A new study created in memory with name: no-name-1183b78d-4fa2-4d82-9772-1f3e5c7305de[0m
  study.enqueue_trial(try_these_first[0])
  create_trial(state=TrialState.WAITING, system_attrs={"fixed_params": params})
  self.add_trial(
  study.enqueue_trial(try_these_first[1])
  study.enqueue_trial(try_these_first[2])


In [89]:
# study = optuna.create_study(direction='minimize')
study.optimize(objective_wp2, n_trials=50)
# write_results('Data/Hyperparametrization/lgbm_100trials.xlsx', 'wp2', study.trials_dataframe())
best_trial = study.best_trial.params
best_trial

[32m[I 2021-09-07 11:11:30,099][0m Trial 0 finished with value: 0.07149250538746527 and parameters: {'reg_alpha': 0.18268883436586145, 'reg_lambda': 0.15916821051528962, 'colsample_bytree': 1.0, 'subsample': 0.6, 'learning_rate': 0.18007000714755378, 'max_depth': 77, 'num_leaves': 425, 'min_child_samples': 10}. Best is trial 0 with value: 0.07149250538746527.[0m
[32m[I 2021-09-07 11:11:34,015][0m Trial 1 finished with value: 0.07251265314809148 and parameters: {'reg_alpha': 0.04439450895032273, 'reg_lambda': 0.7790968728875318, 'colsample_bytree': 0.4, 'subsample': 1.0, 'learning_rate': 0.09520041095092219, 'max_depth': 31, 'num_leaves': 883, 'min_child_samples': 18}. Best is trial 0 with value: 0.07149250538746527.[0m
[32m[I 2021-09-07 11:11:37,834][0m Trial 2 finished with value: 0.07259107229157469 and parameters: {'reg_alpha': 0.34026994469471555, 'reg_lambda': 1.1032197453137866, 'colsample_bytree': 0.9, 'subsample': 0.6, 'learning_rate': 0.13414826176962302, 'max_depth': 

{'reg_alpha': 0.18268883436586145,
 'reg_lambda': 0.15916821051528962,
 'colsample_bytree': 1.0,
 'subsample': 0.6,
 'learning_rate': 0.18007000714755378,
 'max_depth': 77,
 'num_leaves': 425,
 'min_child_samples': 10}

In [114]:
# # Std warm start 
# params_2 = {
#     'reg_alpha': 0.34026994469471555,
#     'reg_lambda': 1.1032197453137866,
#     'colsample_bytree': 0.9,
#     'subsample': 0.6,
#     'learning_rate': 0.13414826176962302,
#     'max_depth': 81,
#     'num_leaves': 987,
#     'min_child_samples': 39
# }

# 100
# params_2 = {
#     'reg_alpha': 0.18268883436586145,
#     'reg_lambda': 0.15916821051528962,
#     'colsample_bytree': 1.0,
#     'subsample': 0.6,
#     'learning_rate': 0.18007000714755378,
#     'max_depth': 77,
#     'num_leaves': 425,
#     'min_child_samples': 10,
# }

# 50
# params_2 = {
#  'reg_alpha': 0.04439450895032273,
#  'reg_lambda': 0.7790968728875318,
#  'colsample_bytree': 0.4,
#  'subsample': 1.0,
#  'learning_rate': 0.09520041095092219,
#  'max_depth': 31,
#  'num_leaves': 883,
#  'min_child_samples': 18
# }


# robust scaler with warm start
params_2 = {
    'reg_alpha': 0.18268883436586145,
    'reg_lambda': 0.15916821051528962,
    'colsample_bytree': 1.0,
    'subsample': 0.6,
    'learning_rate': 0.18007000714755378,
    'max_depth': 77,
    'num_leaves': 425,
    'min_child_samples': 10
}

In [91]:
# lgbm_cross_validation(X2, y2, params_2)
lgbm_scaled_cross_validation(X2, y2, params_2)

-----------LGBM CROSS VALIDATION BEGINNING-----------
RMSE score: 0.07104300308934304
MAE score: 0.0455226067353088
None
-------------------FOLD 1-----------------
RMSE score: 0.06900433105779964
MAE score: 0.04395819566162341
None
-------------------FOLD 2-----------------
RMSE score: 0.07006814246944029
MAE score: 0.044146504366775294
None
-------------------FOLD 3-----------------
RMSE score: 0.06941898518594897
MAE score: 0.0441689727077893
None
-------------------FOLD 4-----------------
RMSE score: 0.06879222552665089
MAE score: 0.04404154532239421
None
-------------------FOLD 5-----------------
RMSE score: 0.07126433741583171
MAE score: 0.04509508348151645
None
-------------------FOLD 6-----------------
RMSE score: 0.07067185696910876
MAE score: 0.04418925412439994
None
-------------------FOLD 7-----------------
RMSE score: 0.06991148710195359
MAE score: 0.0440879238677574
None
-------------------FOLD 8-----------------
RMSE score: 0.06816386758869793
MAE score: 0.043630768983523

## WP3

| |  | Mean | Std ||
| --- | --- | --- | --- ||
| No params | RMSE | 0.10392558077951244 | 0.0019038044796542812 ||
| No params - StandardScaler | RMSE | 0.10354114984428979 | 0.0013629826554706927 ||
| After tuning - 50trials| RMSE | 0.058253804820626545 | 0.0009893279354834155 | More stable, to keep |
| After tuning - 100trials| RMSE | 0.058338944346627106 | 0.0017133930174837203 ||
| After tuning W.S. - 50trials - StandardScaler | RMSE | 0.05839355310487706 | 0.0009599824558874801 ||
| After tuning - 50trial - StandardScaler | RMSE | 0.05828701989178382 | 0.0013469437248627486 | with 50trials best|
| After tuning - 50trial W.S. - RobustScaler | RMSE | 0.05907568414783913 | 0.0010120672015320588 | |
| --- | --- | --- | --- | --- |
| No params | MAE | 0.07550802464973318 | 0.0012006073434917633 ||
| No params - StandardScaler | MAE | 0.0753220326933334 | 0.0007353049410651828 | |
| After tuning - 50trials| MAE | 0.03787310900962521 | 0.000442034368456366 ||
| After tuning - 100trials| MAE | 0.03838030476025398 | 0.0007480100565996748 ||
| After tuning W.S. - 50trials -StandardScaler | MAE | 0.03838277636708219 | 0.0006195357541130345 ||
| After tuning - 50trials - StandardScaler | MAE | 0.03796602386426936 | 0.0006588818520419181 | with 50trials best|
| After tuning - 50trial W.S. - RobustScaler | MAE | 0.03824961618901729 | 0.0007043585347168141 | |


In [55]:
wp3_X = train_wp3[[c for c in train_wp3 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X3 = wp3_X.drop('wp', axis = 1)
y3 = wp3_X['wp']

def objective_wp3(trial,data = X3,target = y3):
    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.15,random_state=42)
    return hyperparametrization(trial, train_x, test_x, train_y, test_y)

In [56]:
# lgbm_cross_validation(X3, y3, None)
lgbm_scaled_cross_validation(X3, y3, None)

-----------LGBM CROSS VALIDATION BEGINNING-----------
RMSE score: 0.10784104982964099
MAE score: 0.0779862795420197
None
-------------------FOLD 1-----------------
RMSE score: 0.11005158383080127
MAE score: 0.07964961198847581
None
-------------------FOLD 2-----------------
RMSE score: 0.10976714666270865
MAE score: 0.079575174022256
None
-------------------FOLD 3-----------------
RMSE score: 0.10860242836368786
MAE score: 0.07877072106913185
None
-------------------FOLD 4-----------------
RMSE score: 0.10865226958512926
MAE score: 0.07872170540355627
None
-------------------FOLD 5-----------------
RMSE score: 0.1091451770259513
MAE score: 0.0796468386781163
None
-------------------FOLD 6-----------------
RMSE score: 0.11042219371995066
MAE score: 0.08052251239006873
None
-------------------FOLD 7-----------------
RMSE score: 0.1121371644824601
MAE score: 0.08132981807980641
None
-------------------FOLD 8-----------------
RMSE score: 0.11085648281615004
MAE score: 0.08031359003713218
N

In [92]:
try_these_first = [{
    'reg_alpha': 0.26013926149282945,
    'reg_lambda': 0.002325658512162904,
    'colsample_bytree': 1.0,
    'subsample': 0.7,
    'learning_rate': 0.10619054458258967,
    'max_depth': 83,
    'num_leaves': 647,
    'min_child_samples': 3,
},  {
    'reg_alpha': 0.002937356908910416,
    'reg_lambda': 0.003822180117262245,
    'colsample_bytree': 0.8,
    'subsample': 1.0,
    'learning_rate': 0.09489749817678472,
    'max_depth': 41,
    'num_leaves': 842,
    'min_child_samples': 18,
}, {
    'reg_alpha': 0.2380367567801365,
    'reg_lambda': 0.005052844767806766,
    'colsample_bytree': 0.9,
    'subsample': 0.5,
    'learning_rate': 0.11958787026894079,
    'max_depth': 41,
    'num_leaves': 690,
    'min_child_samples': 16
}]

study = optuna.create_study(direction='minimize')
study.enqueue_trial(try_these_first[0])
study.enqueue_trial(try_these_first[1])
study.enqueue_trial(try_these_first[2])

[32m[I 2021-09-07 11:14:02,372][0m A new study created in memory with name: no-name-6344541c-33af-42df-969d-8bae9e5074a2[0m
  study.enqueue_trial(try_these_first[0])
  create_trial(state=TrialState.WAITING, system_attrs={"fixed_params": params})
  self.add_trial(
  study.enqueue_trial(try_these_first[1])
  study.enqueue_trial(try_these_first[2])


In [93]:
# study = optuna.create_study(direction='minimize')
study.optimize(objective_wp3, n_trials=50)
# write_results('Data/Hyperparametrization/lgbm_100trials.xlsx', 'wp3', study.trials_dataframe())
best_trial = study.best_trial.params
best_trial

[32m[I 2021-09-07 11:14:09,836][0m Trial 0 finished with value: 0.06097317755060667 and parameters: {'reg_alpha': 0.26013926149282945, 'reg_lambda': 0.002325658512162904, 'colsample_bytree': 1.0, 'subsample': 0.7, 'learning_rate': 0.10619054458258967, 'max_depth': 83, 'num_leaves': 647, 'min_child_samples': 3}. Best is trial 0 with value: 0.06097317755060667.[0m
[32m[I 2021-09-07 11:14:15,991][0m Trial 1 finished with value: 0.059834159129533125 and parameters: {'reg_alpha': 0.002937356908910416, 'reg_lambda': 0.003822180117262245, 'colsample_bytree': 0.8, 'subsample': 1.0, 'learning_rate': 0.09489749817678472, 'max_depth': 41, 'num_leaves': 842, 'min_child_samples': 18}. Best is trial 1 with value: 0.059834159129533125.[0m
[32m[I 2021-09-07 11:14:21,119][0m Trial 2 finished with value: 0.060331851442134515 and parameters: {'reg_alpha': 0.2380367567801365, 'reg_lambda': 0.005052844767806766, 'colsample_bytree': 0.9, 'subsample': 0.5, 'learning_rate': 0.11958787026894079, 'max_d

{'reg_alpha': 0.013728350276458868,
 'reg_lambda': 0.002557827949836765,
 'colsample_bytree': 0.8,
 'subsample': 0.4,
 'learning_rate': 0.08404139563379286,
 'max_depth': 80,
 'num_leaves': 886,
 'min_child_samples': 1}

In [116]:
# # 100
# params_3 = {
#     'reg_alpha': 0.26013926149282945,
#     'reg_lambda': 0.002325658512162904,
#     'colsample_bytree': 1.0,
#     'subsample': 0.7,
#     'learning_rate': 0.10619054458258967,
#     'max_depth': 83,
#     'num_leaves': 647,
#     'min_child_samples': 3,
# }

# # # 50
# params_3 = {
#     'reg_alpha': 0.002937356908910416,
#     'reg_lambda': 0.003822180117262245,
#     'colsample_bytree': 0.8,
#     'subsample': 1.0,
#     'learning_rate': 0.09489749817678472,
#     'max_depth': 41,
#     'num_leaves': 842,
#     'min_child_samples': 18,
# }

# 50 warm start std scaler
# params_3 = {
#     'reg_alpha': 0.2380367567801365,
#     'reg_lambda': 0.005052844767806766,
#     'colsample_bytree': 0.9,
#     'subsample': 0.5,
#     'learning_rate': 0.11958787026894079,
#     'max_depth': 41,
#     'num_leaves': 690,
#     'min_child_samples': 16
# }


# robust scaler with warm start
params_3 = {
    'reg_alpha': 0.013728350276458868,
    'reg_lambda': 0.002557827949836765,
    'colsample_bytree': 0.8,
    'subsample': 0.4,
    'learning_rate': 0.08404139563379286,
    'max_depth': 80,
    'num_leaves': 886,
    'min_child_samples': 1
}

In [95]:
# lgbm_cross_validation(X3, y3, params_3)
lgbm_scaled_cross_validation(X3, y3, params_3)

-----------LGBM CROSS VALIDATION BEGINNING-----------
RMSE score: 0.06064624432688606
MAE score: 0.03887273275254841
None
-------------------FOLD 1-----------------
RMSE score: 0.0601302325869877
MAE score: 0.03914922759031806
None
-------------------FOLD 2-----------------
RMSE score: 0.05794151276900089
MAE score: 0.03762067611130553
None
-------------------FOLD 3-----------------
RMSE score: 0.05936794963006943
MAE score: 0.03838169212263983
None
-------------------FOLD 4-----------------
RMSE score: 0.05833107169680216
MAE score: 0.03738948646954827
None
-------------------FOLD 5-----------------
RMSE score: 0.05741202981171924
MAE score: 0.03715121985482552
None
-------------------FOLD 6-----------------
RMSE score: 0.059130738871233685
MAE score: 0.03786656768570517
None
-------------------FOLD 7-----------------
RMSE score: 0.058156135786221856
MAE score: 0.038074541627495406
None
-------------------FOLD 8-----------------
RMSE score: 0.05989830883539013
MAE score: 0.03875445785

## WP4

| |  | Mean | Std | |
| --- | --- | --- | --- | --- |
| No params | RMSE | 0.10486204816363351 | 0.0015105949978751166 ||
| No params  - Std | RMSE | 0.10480849342496516 | 0.0010370863436755212 ||
| After tuning - 50trials| RMSE | 0.06513233717204232 | 0.0015891617240032727 ||
| After tuning 100trials| RMSE | 0.06357594848470964 | 0.0013676749030776929 ||
| After tuning with W.S - 50trials - Std | RMSE | 0.06339401569270936 | 0.001228053306037005 ||
| After tuning with W.S. - 50trials - RobustScaler | RMSE | 0.0632324115841705 | 0.0010081050240456021 ||
| --- | --- | --- | --- | --- |
| No params | MAE | 0.07564776733421566 | 0.00104638869825841 ||
| No params  - Std | MAE | 0.07570794104041156 | 0.0008419207475550308 ||
| After tuning - 50trials| MAE | 0.04219236028055372 | 0.0008190579419060266 ||
| After tuning 100trials| MAE |0.04172111697148837  | 0.0009349285385250968 ||
| After tuning with W.S - 50trials  - Std | MAE | 0.04150668920859586 | 0.0005729825500890684 ||
| After tuning with W.S. - 50trials - RobustScaler | MAE | 0.04170428506837879 | 0.0006101247158768171 ||

In [61]:
wp4_X = train_wp4[[c for c in train_wp4 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X4 = wp4_X.drop('wp', axis = 1)
y4 = wp4_X['wp']

def objective_wp4(trial,data = X4,target = y4):
    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.15,random_state=42)
    return hyperparametrization(trial, train_x, test_x, train_y, test_y)

In [62]:
# lgbm_cross_validation(X4, y4, None)
lgbm_scaled_cross_validation(X4, y4, None)

-----------LGBM CROSS VALIDATION BEGINNING-----------
RMSE score: 0.11227449275038803
MAE score: 0.08067967144788524
None
-------------------FOLD 1-----------------
RMSE score: 0.11028855428139524
MAE score: 0.07960583378521108
None
-------------------FOLD 2-----------------
RMSE score: 0.1088487783793776
MAE score: 0.07872772765228164
None
-------------------FOLD 3-----------------
RMSE score: 0.10671296331306089
MAE score: 0.07674760451886291
None
-------------------FOLD 4-----------------
RMSE score: 0.10667076125606384
MAE score: 0.07715723935804648
None
-------------------FOLD 5-----------------
RMSE score: 0.10899821120451332
MAE score: 0.07859465707376892
None
-------------------FOLD 6-----------------
RMSE score: 0.10930209913646854
MAE score: 0.07822105884261472
None
-------------------FOLD 7-----------------
RMSE score: 0.11110307228893893
MAE score: 0.07990342867273831
None
-------------------FOLD 8-----------------
RMSE score: 0.10652195393070206
MAE score: 0.07656027772431

In [96]:
try_these_first = [{
    'reg_alpha': 0.08714703614419553,
    'reg_lambda': 9.983645262139024,
    'colsample_bytree': 0.9,
    'subsample': 0.8,
    'learning_rate': 0.13413154768816146,
    'max_depth': 41,
    'num_leaves': 613,
    'min_child_samples': 15,
    'min_data_per_groups': 29
},  {
    'reg_alpha': 0.15331128149569725,
    'reg_lambda': 0.28560184971009756,
    'colsample_bytree': 0.7,
    'subsample': 0.5,
    'learning_rate': 0.11430869527789024,
    'max_depth': 24,
    'num_leaves': 856,
    'min_child_samples': 14,
    'min_data_per_groups': 33
}]

study = optuna.create_study(direction='minimize')
study.enqueue_trial(try_these_first[0])
study.enqueue_trial(try_these_first[1])

[32m[I 2021-09-07 11:19:06,603][0m A new study created in memory with name: no-name-162367fd-e48a-432b-a703-e10196a4b6be[0m
  study.enqueue_trial(try_these_first[0])
  create_trial(state=TrialState.WAITING, system_attrs={"fixed_params": params})
  self.add_trial(
  study.enqueue_trial(try_these_first[1])


In [97]:
# study = optuna.create_study(direction='minimize')
study.optimize(objective_wp4, n_trials=50)
# write_results('Data/Hyperparametrization/scaled_lgbm_50trials.xlsx', 'wp4', study.trials_dataframe())
best_trial = study.best_trial.params
best_trial

[32m[I 2021-09-07 11:19:15,786][0m Trial 0 finished with value: 0.06422184601752864 and parameters: {'reg_alpha': 0.08714703614419553, 'reg_lambda': 9.983645262139024, 'colsample_bytree': 0.9, 'subsample': 0.8, 'learning_rate': 0.13413154768816146, 'max_depth': 41, 'num_leaves': 613, 'min_child_samples': 15}. Best is trial 0 with value: 0.06422184601752864.[0m
[32m[I 2021-09-07 11:19:21,843][0m Trial 1 finished with value: 0.06519212725219317 and parameters: {'reg_alpha': 0.15331128149569725, 'reg_lambda': 0.28560184971009756, 'colsample_bytree': 0.7, 'subsample': 0.5, 'learning_rate': 0.11430869527789024, 'max_depth': 24, 'num_leaves': 856, 'min_child_samples': 14}. Best is trial 0 with value: 0.06422184601752864.[0m
[32m[I 2021-09-07 11:19:22,337][0m Trial 2 finished with value: 0.13619985008349192 and parameters: {'reg_alpha': 8.181665894479984, 'reg_lambda': 0.29305648023034275, 'colsample_bytree': 0.4, 'subsample': 0.4, 'learning_rate': 0.03012063115122468, 'max_depth': 65

{'reg_alpha': 0.08714703614419553,
 'reg_lambda': 9.983645262139024,
 'colsample_bytree': 0.9,
 'subsample': 0.8,
 'learning_rate': 0.13413154768816146,
 'max_depth': 41,
 'num_leaves': 613,
 'min_child_samples': 15}

In [117]:
# 100
# params_4 = {
#     'reg_alpha': 0.08714703614419553,
#     'reg_lambda': 9.983645262139024,
#     'colsample_bytree': 0.9,
#     'subsample': 0.8,
#     'learning_rate': 0.13413154768816146,
#     'max_depth': 41,
#     'num_leaves': 613,
#     'min_child_samples': 15,
# }

# {
#     'reg_alpha': 0.15331128149569725,
#     'reg_lambda': 0.28560184971009756,
#     'colsample_bytree': 0.7,
#     'subsample': 0.5,
#     'learning_rate': 0.11430869527789024,
#     'max_depth': 24,
#     'num_leaves': 856,
#     'min_child_samples': 14,
# }


# robust scaler with warm start
params_4 = {
    'reg_alpha': 0.08714703614419553,
    'reg_lambda': 9.983645262139024,
    'colsample_bytree': 0.9,
    'subsample': 0.8,
    'learning_rate': 0.13413154768816146,
    'max_depth': 41,
    'num_leaves': 613,
    'min_child_samples': 15
}

In [99]:
# lgbm_cross_validation(X4, y4, params_4)
lgbm_scaled_cross_validation(X4, y4, params_4)

-----------LGBM CROSS VALIDATION BEGINNING-----------
RMSE score: 0.06132943722936564
MAE score: 0.04104272073751141
None
-------------------FOLD 1-----------------
RMSE score: 0.0651100416043352
MAE score: 0.04247006738347042
None
-------------------FOLD 2-----------------
RMSE score: 0.06259852024703741
MAE score: 0.04101797797592181
None
-------------------FOLD 3-----------------
RMSE score: 0.06252702036247586
MAE score: 0.04111022663571434
None
-------------------FOLD 4-----------------
RMSE score: 0.0631443393718535
MAE score: 0.04147863284954414
None
-------------------FOLD 5-----------------
RMSE score: 0.06375804692090321
MAE score: 0.041918650791172664
None
-------------------FOLD 6-----------------
RMSE score: 0.0630222172074326
MAE score: 0.04163907681643609
None
-------------------FOLD 7-----------------
RMSE score: 0.0635700455540836
MAE score: 0.04182726662870645
None
-------------------FOLD 8-----------------
RMSE score: 0.06445521501400622
MAE score: 0.0430175117436378

## WP5

| |  | Mean | Std |
| --- | --- | --- | --- |
| No params | RMSE | 0.11722129743692011 | 0.0017732599261516583 |
| No params - Std | RMSE | 0.11729790317307003 | 0.0014884067903823003 |
| After tuning - 50trials| RMSE | 0.07721413638593042 | 0.0011020420293213135 |
| After tuning - 100trials| RMSE | 0.07297648991888442 | 0.0014970317509404526 |
| After tuning - 100trials - warm start | RMSE | 0.07362803793800192 | 0.0013223501622953715 |
| After tuning 50trials - std - warm start | RMSE | 0.07379631452164911 | 0.0019249511778190373 |
| After tuning 50trials W.S - RobustScaler | RMSE | 0.07295312559507504 | 0.0017094978756410563 |
| --- | --- | --- | --- |
| No params | MAE | 0.08497074568090211 | 0.0009101526501392155 |
| No params - Std | MAE | 0.0849648687365363 | 0.0011901882563545429 |
| After tuning - 50trials| MAE | 0.051677856581467195 | 0.0006374939894477714 |
| After tuning - 100trials| MAE | 0.04765271414503236 | 0.0006257356756510128 |
| After tuning - 100trials - warm trials | MAE | 0.04785179154681675 | 0.0005795839605605526 |
| After tuning 50trials - std - warm start | MAE | 0.0480147138609328 | 0.0008230908433814974 |
| After tuning 50trials W.S - RobustScaler | MAE | 0.04783683731205745 | 0.0007485320718195094 |

In [67]:
wp5_X = train_wp5[[c for c in train_wp5 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X5 = wp5_X.drop('wp', axis = 1)
y5 = wp5_X['wp']

def objective_wp5(trial, data = X5,target = y5):
    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.15,random_state=42)
    return hyperparametrization(trial, train_x, test_x, train_y, test_y)

In [68]:
# lgbm_cross_validation(X5, y5, None)
lgbm_scaled_cross_validation(X5, y5, None)

-----------LGBM CROSS VALIDATION BEGINNING-----------
RMSE score: 0.12160449868539627
MAE score: 0.08798263988907014
None
-------------------FOLD 1-----------------
RMSE score: 0.12242553521341798
MAE score: 0.0889302913568743
None
-------------------FOLD 2-----------------
RMSE score: 0.1229999294538851
MAE score: 0.08888647888357365
None
-------------------FOLD 3-----------------
RMSE score: 0.12045816733281568
MAE score: 0.08782639945492014
None
-------------------FOLD 4-----------------
RMSE score: 0.1204457747415774
MAE score: 0.0870096976234127
None
-------------------FOLD 5-----------------
RMSE score: 0.11799889097696373
MAE score: 0.08542675864744775
None
-------------------FOLD 6-----------------
RMSE score: 0.12409313595923488
MAE score: 0.08930081757228697
None
-------------------FOLD 7-----------------
RMSE score: 0.11941588632662829
MAE score: 0.08651072866434162
None
-------------------FOLD 8-----------------
RMSE score: 0.12246390650104498
MAE score: 0.08776810002970215

In [101]:
try_these_first = [{
    'reg_alpha': 0.04781362061382749,
    'reg_lambda': 9.716980953182604,
    'colsample_bytree': 0.9,
    'subsample': 0.7,
    'learning_rate': 0.14614317149730652,
    'max_depth': 57,
    'num_leaves': 532,
    'min_child_samples': 7,
},  {
    'reg_alpha': 0.0025641515787025067,
    'reg_lambda': 0.024580995322705475,
    'colsample_bytree': 0.8,
    'subsample': 0.4,
    'learning_rate': 0.11844862032615265,
    'max_depth': 69,
    'num_leaves': 328,
    'min_child_samples': 62,
},  {
    'reg_alpha': 0.1420112281892889,
    'reg_lambda': 0.14745955581286027,
    'colsample_bytree': 0.9,
    'subsample': 0.7,
    'learning_rate': 0.15576554024588912,
    'max_depth': 61,
    'num_leaves': 483,
    'min_child_samples': 10,
}]

study = optuna.create_study(direction='minimize')
study.enqueue_trial(try_these_first[0])
study.enqueue_trial(try_these_first[1])
study.enqueue_trial(try_these_first[2])

[32m[I 2021-09-07 11:37:16,029][0m A new study created in memory with name: no-name-dd7e39e8-9cfc-40fe-bb98-65457d0a0a25[0m
  study.enqueue_trial(try_these_first[0])
  study.enqueue_trial(try_these_first[1])
  study.enqueue_trial(try_these_first[2])


In [103]:
# study = optuna.create_study(direction='minimize')
study.optimize(objective_wp5, n_trials=50)
# write_results('Data/Hyperparametrization/scaled_lgbm_50trials.xlsx', 'wp5', study.trials_dataframe())
best_trial = study.best_trial.params
best_trial

[32m[I 2021-09-07 11:37:32,529][0m Trial 0 finished with value: 0.07328114869595546 and parameters: {'reg_alpha': 0.04781362061382749, 'reg_lambda': 9.716980953182604, 'colsample_bytree': 0.9, 'subsample': 0.7, 'learning_rate': 0.14614317149730652, 'max_depth': 57, 'num_leaves': 532, 'min_child_samples': 7}. Best is trial 0 with value: 0.07328114869595546.[0m
[32m[I 2021-09-07 11:37:35,041][0m Trial 1 finished with value: 0.07834956112406605 and parameters: {'reg_alpha': 0.0025641515787025067, 'reg_lambda': 0.024580995322705475, 'colsample_bytree': 0.8, 'subsample': 0.4, 'learning_rate': 0.11844862032615265, 'max_depth': 69, 'num_leaves': 328, 'min_child_samples': 62}. Best is trial 0 with value: 0.07328114869595546.[0m
[32m[I 2021-09-07 11:37:39,304][0m Trial 2 finished with value: 0.07527507779945834 and parameters: {'reg_alpha': 0.1420112281892889, 'reg_lambda': 0.14745955581286027, 'colsample_bytree': 0.9, 'subsample': 0.7, 'learning_rate': 0.15576554024588912, 'max_depth':

{'reg_alpha': 0.04781362061382749,
 'reg_lambda': 9.716980953182604,
 'colsample_bytree': 0.9,
 'subsample': 0.7,
 'learning_rate': 0.14614317149730652,
 'max_depth': 57,
 'num_leaves': 532,
 'min_child_samples': 7}

In [118]:
# warm start 50 std
# params_5 = {
#     'reg_alpha': 0.1420112281892889,
#     'reg_lambda': 0.14745955581286027,
#     'colsample_bytree': 0.9,
#     'subsample': 0.7,
#     'learning_rate': 0.15576554024588912,
#     'max_depth': 61,
#     'num_leaves': 483,
#     'min_child_samples': 10
# }


# # 100
# params_5 = {
#     'reg_alpha': 0.04781362061382749,
#     'reg_lambda': 9.716980953182604,
#     'colsample_bytree': 0.9,
#     'subsample': 0.7,
#     'learning_rate': 0.14614317149730652,
#     'max_depth': 57,
#     'num_leaves': 532,
#     'min_child_samples': 7,
# }

# # 50
# params_5 = {
#     'reg_alpha': 0.0025641515787025067,
#     'reg_lambda': 0.024580995322705475,
#     'colsample_bytree': 0.8,
#     'subsample': 0.4,
#     'learning_rate': 0.11844862032615265,
#     'max_depth': 69,
#     'num_leaves': 328,
#     'min_child_samples': 62,
# }

# robust scaler with warm start
params_5 = {
    'reg_alpha': 0.04781362061382749,
    'reg_lambda': 9.716980953182604,
    'colsample_bytree': 0.9,
    'subsample': 0.7,
    'learning_rate': 0.14614317149730652,
    'max_depth': 57,
    'num_leaves': 532,
    'min_child_samples': 7
}

In [105]:
# lgbm_cross_validation(X5, y5, params_5)
lgbm_scaled_cross_validation(X5, y5, params_5)

-----------LGBM CROSS VALIDATION BEGINNING-----------
RMSE score: 0.07537217049347301
MAE score: 0.048555942836035475
None
-------------------FOLD 1-----------------
RMSE score: 0.07590944351197824
MAE score: 0.0492342085223887
None
-------------------FOLD 2-----------------
RMSE score: 0.07481956883284945
MAE score: 0.0475218989674185
None
-------------------FOLD 3-----------------
RMSE score: 0.0732349855552006
MAE score: 0.04847627456606321
None
-------------------FOLD 4-----------------
RMSE score: 0.07121162666363555
MAE score: 0.047344262257087856
None
-------------------FOLD 5-----------------
RMSE score: 0.07336281614315454
MAE score: 0.04806400625485194
None
-------------------FOLD 6-----------------
RMSE score: 0.07029425824800647
MAE score: 0.046300188760616035
None
-------------------FOLD 7-----------------
RMSE score: 0.0725241158364099
MAE score: 0.04682132935919499
None
-------------------FOLD 8-----------------
RMSE score: 0.07326264859947337
MAE score: 0.04772755678246

## WP6

| |  | Mean | Std |
| --- | --- | --- | --- |
| No params | RMSE | 0.0940394026188472 | 0.0010749562915831372 |
| No params - std | RMSE | 0.09409110695713666 | 0.0013476052174559326 |
| After tuning - 50trials| RMSE | 0.05404362835213171 | 0.0008595325139047733 |
| After tuning 100trials| RMSE | 0.054861488499908594 | 0.0007335378238383901 |
| After tuning 50trials std - W.S. | RMSE |  0.05446642457662869 | 0.0009850877593637966 |
| After tuning with W.S. - 50trials - RobustScaler | RMSE | 0.053430744425113176 | 0.0010865698281516414 ||
| --- | --- | --- | --- |
| No params | MAE | 0.070455643271004 | 0.0006641538274191148 |
| After tuning - 50trials| MAE | 0.03657758274248596 | 0.0005325521314198646 |
| After tuning 100trials| MAE | 0.03783933495157941 | 0.00045956939815828987 |
| After tuning 50trials std - warm start| MAE | 0.0367847898055025 | 0.00039001216012464674 |
| After tuning with W.S. - 50trials - RobustScaler | RMSE | 0.03652663713189234 | 0.0006251342721346248 ||

In [106]:
wp6_X = train_wp6[[c for c in train_wp6 if c not in ["wp"]] + ["wp"]].drop(to_drop, axis = 1)
X6 = wp6_X.drop('wp', axis = 1)
y6 = wp6_X['wp']

def objective_wp6(trial,data = X6,target = y6):
    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.15,random_state=42)
    return hyperparametrization(trial, train_x, test_x, train_y, test_y)

In [74]:
# lgbm_cross_validation(X6, y6, None)
lgbm_scaled_cross_validation(X6, y6, None)

-----------LGBM CROSS VALIDATION BEGINNING-----------
RMSE score: 0.09859424794094925
MAE score: 0.07315841746939594
None
-------------------FOLD 1-----------------
RMSE score: 0.10022450839793628
MAE score: 0.07475606849683573
None
-------------------FOLD 2-----------------
RMSE score: 0.09597798461053336
MAE score: 0.07169925212215536
None
-------------------FOLD 3-----------------
RMSE score: 0.09788397193388976
MAE score: 0.07418759492601489
None
-------------------FOLD 4-----------------
RMSE score: 0.09769395690138527
MAE score: 0.07323600907984107
None
-------------------FOLD 5-----------------
RMSE score: 0.09952243539425228
MAE score: 0.07479873656793729
None
-------------------FOLD 6-----------------
RMSE score: 0.09711192615006495
MAE score: 0.07311671003773038
None
-------------------FOLD 7-----------------
RMSE score: 0.09947074306722367
MAE score: 0.07425401048117601
None
-------------------FOLD 8-----------------
RMSE score: 0.09663323833248737
MAE score: 0.0727290621284

In [108]:
try_these_first = [{
    'reg_alpha': 0.23451110075396234,
    'reg_lambda': 0.796705483623135,
    'colsample_bytree': 0.9,
    'subsample': 0.4,
    'learning_rate': 0.1561492653707781,
    'max_depth': 67,
    'num_leaves': 998,
    'min_child_samples': 45,
},  {
    'reg_alpha': 0.11420484028619322,
    'reg_lambda': 2.6106462927544216,
    'colsample_bytree': 0.5,
    'subsample': 0.4,
    'learning_rate': 0.13579539259861131,
    'max_depth': 35,
    'num_leaves': 765,
    'min_child_samples': 3,
}, {
    'reg_alpha': 0.19099691249064502,
    'reg_lambda': 0.3893771552082417,
    'colsample_bytree': 0.6,
    'subsample': 0.7,
    'learning_rate': 0.10214699989265669,
    'max_depth': 70,
    'num_leaves': 903,
    'min_child_samples': 1
} ]

study = optuna.create_study(direction='minimize')
study.enqueue_trial(try_these_first[0])
study.enqueue_trial(try_these_first[1])
study.enqueue_trial(try_these_first[2])

[32m[I 2021-09-07 11:43:45,114][0m A new study created in memory with name: no-name-4bf8d3f7-841b-44c9-8763-2cc0d4be1945[0m
  study.enqueue_trial(try_these_first[0])
  study.enqueue_trial(try_these_first[1])
  study.enqueue_trial(try_these_first[2])


In [109]:
# study = optuna.create_study(direction='minimize')
study.optimize(objective_wp6, n_trials=50)
# write_results('Data/Hyperparametrization/scaled_lgbm_50trials.xlsx', 'wp6', study.trials_dataframe())
best_trial = study.best_trial.params
best_trial

[32m[I 2021-09-07 11:43:51,514][0m Trial 0 finished with value: 0.05714406123297331 and parameters: {'reg_alpha': 0.23451110075396234, 'reg_lambda': 0.796705483623135, 'colsample_bytree': 0.9, 'subsample': 0.4, 'learning_rate': 0.1561492653707781, 'max_depth': 67, 'num_leaves': 998, 'min_child_samples': 45}. Best is trial 0 with value: 0.05714406123297331.[0m
[32m[I 2021-09-07 11:43:56,559][0m Trial 1 finished with value: 0.05509693896422073 and parameters: {'reg_alpha': 0.11420484028619322, 'reg_lambda': 2.6106462927544216, 'colsample_bytree': 0.5, 'subsample': 0.4, 'learning_rate': 0.13579539259861131, 'max_depth': 35, 'num_leaves': 765, 'min_child_samples': 3}. Best is trial 1 with value: 0.05509693896422073.[0m
[32m[I 2021-09-07 11:44:03,428][0m Trial 2 finished with value: 0.054681706617884254 and parameters: {'reg_alpha': 0.19099691249064502, 'reg_lambda': 0.3893771552082417, 'colsample_bytree': 0.6, 'subsample': 0.7, 'learning_rate': 0.10214699989265669, 'max_depth': 70,

{'reg_alpha': 0.19099691249064502,
 'reg_lambda': 0.3893771552082417,
 'colsample_bytree': 0.6,
 'subsample': 0.7,
 'learning_rate': 0.10214699989265669,
 'max_depth': 70,
 'num_leaves': 903,
 'min_child_samples': 1}

In [119]:
# warm start
# params_6 = {
#     'reg_alpha': 0.19099691249064502,
#     'reg_lambda': 0.3893771552082417,
#     'colsample_bytree': 0.6,
#     'subsample': 0.7,
#     'learning_rate': 0.10214699989265669,
#     'max_depth': 70,
#     'num_leaves': 903,
#     'min_child_samples': 1
# } 

# # 100
# params_6 = {
#     'reg_alpha': 0.23451110075396234,
#     'reg_lambda': 0.796705483623135,
#     'colsample_bytree': 0.9,
#     'subsample': 0.4,
#     'learning_rate': 0.1561492653707781,
#     'max_depth': 67,
#     'num_leaves': 998,
#     'min_child_samples': 45,
# }

# # 50
# params_6 = {
#     'reg_alpha': 0.11420484028619322,
#     'reg_lambda': 2.6106462927544216,
#     'colsample_bytree': 0.5,
#     'subsample': 0.4,
#     'learning_rate': 0.13579539259861131,
#     'max_depth': 35,
#     'num_leaves': 765,
#     'min_child_samples': 3,
# }


params_6 = {
    'reg_alpha': 0.19099691249064502,
    'reg_lambda': 0.3893771552082417,
    'colsample_bytree': 0.6,
    'subsample': 0.7,
    'learning_rate': 0.10214699989265669,
    'max_depth': 70,
    'num_leaves': 903,
    'min_child_samples': 1
}

In [111]:
# lgbm_cross_validation(X6, y6, params_6)
lgbm_scaled_cross_validation(X6, y6, params_6)

-----------LGBM CROSS VALIDATION BEGINNING-----------
RMSE score: 0.0537528384482343
MAE score: 0.03657699049921024
None
-------------------FOLD 1-----------------
RMSE score: 0.0537478893963279
MAE score: 0.03631704530369509
None
-------------------FOLD 2-----------------
RMSE score: 0.05434696871806744
MAE score: 0.037521997421402234
None
-------------------FOLD 3-----------------
RMSE score: 0.05328186001666546
MAE score: 0.03656512301544506
None
-------------------FOLD 4-----------------
RMSE score: 0.051757643318816805
MAE score: 0.035761813873787296
None
-------------------FOLD 5-----------------
RMSE score: 0.05284533455633231
MAE score: 0.035975112340831554
None
-------------------FOLD 6-----------------
RMSE score: 0.05322163973750589
MAE score: 0.03654121789965823
None
-------------------FOLD 7-----------------
RMSE score: 0.055621951061471823
MAE score: 0.03746817525946214
None
-------------------FOLD 8-----------------
RMSE score: 0.05183739503088155
MAE score: 0.0355762279

# Predictions

## Functions

In [123]:
to_drop_test = ['date','wd','forecast_time', 'forecast', "forecast_dist", 'wp']+feature_corr
def make_prediction_dataset(test, to_drop=to_drop_test):
    test_to_predict = test.dropna(subset=['ws','u','v'], how = 'any') # keeps only lines with u,v,ws,wd
    test_to_predict = test_to_predict[test_to_predict['wp'].isna()] # keeps only lines with no wp
    test_to_predict = test_to_predict.sort_values(by=['date', 'forecast_time'], ascending = [True, False]).drop_duplicates(subset='date')
    test_to_predict = test_to_predict.drop(to_drop, axis = 1)
    return test_to_predict

In [148]:
def make_submission_file(lst_X_trains, lst_y_trains, lst_tests, lst_models, dates):
    i = 1
    lst_prediction = []
    lst_models_trained = []
    for X, y, test, model in zip(lst_X_trains, lst_y_trains, lst_tests, lst_models):
        print(f'--------------Model {i}--------------')
        model.fit(X, y)
        print(f'True:\n\tMin:{min(y)}\n\tMax:{max(y)}\n\tMean:{y.mean()}')
        predictions = model.predict(test)
        print(f'Prediction:\n\tMin:{min(predictions)}\n\tMax:{max(predictions)}\n\tMean:{np.mean(predictions)}')
#         predictions = [min(y) if i < 0 else i for i in predictions]
#         predictions = [max(y) if i > max(y) else i for i in predictions]
        predictions = [0 if i < 0 else i for i in predictions]
        predictions = [1 if i > 1 else i for i in predictions]
        print(f'Prediction corrected:\n\tMin:{min(predictions)}\n\tMax:{max(predictions)}\n\tMean:{np.mean(predictions)}')
        lst_prediction.append(predictions)
        lst_models_trained.append(model)
        i+=1
    
    df_predictions = pd.DataFrame({
        'date': test_dates,
        'wp1': lst_prediction[0],
        'wp2': lst_prediction[1],
        'wp3': lst_prediction[2],
        'wp4': lst_prediction[3],
        'wp5': lst_prediction[4],
        'wp6': lst_prediction[5],        
    })
    return df_predictions, lst_models_trained

## Submission 

In [149]:
model_1 = Pipeline([('scaler', MaxAbsScaler()),('ridge', LGBMRegressor(**params_1))])
model_2 = Pipeline([('scaler', MaxAbsScaler()),('ridge', LGBMRegressor(**params_2))])
model_3 = Pipeline([('scaler', MaxAbsScaler()),('ridge', LGBMRegressor(**params_3))])
model_4 = Pipeline([('scaler', MaxAbsScaler()),('ridge', LGBMRegressor(**params_4))])
model_5 = Pipeline([('scaler', MaxAbsScaler()),('ridge', LGBMRegressor(**params_5))])
model_6 = Pipeline([('scaler', MaxAbsScaler()),('ridge', LGBMRegressor(**params_6))])

In [150]:
# model_1 = LGBMRegressor(**params_1)
# model_2 = LGBMRegressor(**params_2)
# model_3 = LGBMRegressor(**params_3)
# model_4 = LGBMRegressor(**params_4)
# model_5 = LGBMRegressor(**params_5)
# model_6 = LGBMRegressor(**params_6)

lst_models = [model_1, model_2, model_3, model_4, model_5, model_6]
lst_X_trains = [X1, X2, X3, X4, X5, X6]
lst_y_trains = [y1, y2, y3, y4, y5, y6]

In [151]:
lst_tests = []
for test in [test_wp1, test_wp2, test_wp3, test_wp4, test_wp5, test_wp6]:
    test = make_prediction_dataset(test)
    lst_tests.append(test)

In [152]:
df_predictions, lst_models_trained = make_submission_file(lst_X_trains, lst_y_trains, lst_tests, lst_models, test_dates)

--------------Model 1--------------
True:
	Min:0.0
	Max:0.96
	Mean:0.2845981952075702
Prediction:
	Min:-0.013845727204492645
	Max:0.962581150337638
	Mean:0.29682551865581636
Prediction corrected:
	Min:0
	Max:0.962581150337638
	Mean:0.29685571274116673
--------------Model 2--------------
True:
	Min:0.0
	Max:0.966
	Mean:0.25890153769841273
Prediction:
	Min:-0.014861700692300516
	Max:0.9848387095488478
	Mean:0.25566332951768944
Prediction corrected:
	Min:0
	Max:0.9848387095488478
	Mean:0.25567592292927865
--------------Model 3--------------
True:
	Min:0.0
	Max:0.989
	Mean:0.2625247252747253
Prediction:
	Min:-0.004031606720203462
	Max:0.9379342912891017
	Mean:0.28904000448245387
Prediction corrected:
	Min:0
	Max:0.9379342912891017
	Mean:0.28904188290485683
--------------Model 4--------------
True:
	Min:0.0
	Max:0.992
	Mean:0.2763637820512821
Prediction:
	Min:-0.017744098520285082
	Max:0.9256871227201043
	Mean:0.2803732040461147
Prediction corrected:
	Min:0
	Max:0.9256871227201043
	Mean:0.2

## Saving models

In [153]:
nb_sub = 30
model = "robustscaler-lgbm"
prepro = 'RobustScaler'
postpro = "Prediction limited by 0-1"

In [154]:
# df_predictions.to_csv('Predictions/submission_nb_10_full_maxabs-lgbm-featselect.csv', index=False, sep=';')
df_predictions.to_csv(f'Predictions/submission_nb_{nb_sub}_{model}.csv', index=False, sep=';')

In [155]:
f = open(f"Predictions/submission-{nb_sub}_{model}.txt", "x")
f.write(f"params_lgbm_1 = {str(params_1)}\n")
f.write(f"params_lgbm_1 = {str(params_2)}\n")
f.write(f"params_lgbm_1 = {str(params_3)}\n")
f.write(f"params_lgbm_1 = {str(params_4)}\n")
f.write(f"params_lgbm_1 = {str(params_5)}\n")
f.write(f"params_lgbm_1 = {str(params_6)}\n")
f.write(f"Preprocessing: {prepro}\n")
f.write(f"Postprocessing: {postpro}\n")
f.close()

In [132]:
# pkl_model = "Models/LGBM/LGBM-maxabs-wp1-100trials_best_warm_start-featselect.pkl"
# with open(pkl_model, 'wb') as file:
#     pickle.dump(lst_models_trained[0], file)
    
    
# pkl_model = "Models/LGBM/LGBM-maxabs-wp2-50trials_best_warm_start-featselect.pkl"
# with open(pkl_model, 'wb') as file:
#     pickle.dump(lst_models_trained[1], file)
    

# pkl_model = "Models/LGBM/LGBM-maxabs-wp3-100trials_best_warm_start-featselect.pkl"
# with open(pkl_model, 'wb') as file:
#     pickle.dump(lst_models_trained[2], file)


# pkl_model = "Models/LGBM/LGBM-maxabs-wp4-50trials_best_warm_start-featselect.pkl"
# with open(pkl_model, 'wb') as file:
#     pickle.dump(lst_models_trained[3], file)


# pkl_model = "Models/LGBM/LGBM-maxabs-wp5-50trials_best_warm_start-featselect.pkl"
# with open(pkl_model, 'wb') as file:
#     pickle.dump(lst_models_trained[4], file)


# pkl_model = "Models/LGBM/LGBM-maxabs-wp6-50trials_best_warm_start-featselect.pkl"
# with open(pkl_model, 'wb') as file:
#     pickle.dump(lst_models_trained[5], file)