In [None]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import mean_squared_error

import lightgbm as lgb
import optuna

In [None]:
class CONFIG:
    seed = 42
    num_folds = 5
    use_optuna_tuning = True
    test_size_optuna=0.1
    num_estimators = 2_000
    num_trials_optuna = 100
    num_trials_early_stopping = 150

In [None]:
random.seed(CONFIG.seed)

In [None]:
#import data

In [None]:
df_train, df_test = train_test_split( train_feats, 
                                     test_size=CONFIG.test_size_optuna,
                                     random_state=CONFIG.seed)
kfold = KFold(n_splits=CONFIG.num_folds, shuffle=True, random_state=CONFIG.seed)
for fold, (_, val_idx) in enumerate(kfold.split(df_train)):
     df_train.loc[ df_train.index[val_idx], "fold"] = fold

In [None]:
def objective(trial,df_train=df_train):
    param = {
        'random_state': CONFIG.seed,
        'n_estimators': CONFIG.num_estimators,
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-3, 10.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 10.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.4, 1),
        'subsample': trial.suggest_float('subsample', 0.5, 1),
        'learning_rate': trial.suggest_categorical('learning_rate',
                                                   [0.005,0.01,0.02]),
        'num_leaves' : trial.suggest_int('num_leaves', 8, 64),
        'max_depth' : trial.suggest_int('max_depth',6 ,12),
        'min_child_samples': trial.suggest_int('min_child_samples', 1, 50),
    }

    for fold in range(CONFIG.num_folds):
        model = lgb.LGBMRegressor(**param)  
        fold_df_train = df_train.loc[df_train.fold!=fold]
        fold_df_val = df_train.loc[df_train.fold==fold]
        model.fit(fold_df_train [feature_names],fold_df_train['target'],
                  eval_set=[(fold_df_val[feature_names],fold_df_val['target'])],
                  early_stopping_rounds=CONFIG.num_trials_early_stopping,
                  verbose=False)
        print(model.best_iteration_)
        preds = model.predict(fold_df_val[feature_names])
        df_train.loc[  fold_df_val .index,"pred_score"] = preds
    rmse = mean_squared_error(df_train['target'], df_train["pred_score"],
                              squared=False)
    return rmse

In [None]:
import warnings
warnings.filterwarnings('ignore')
if CONFIG.use_optuna_tuning:
    study = optuna.create_study(direction='minimize', study_name='Optimize boosting hyperparameters')
    study.optimize(objective, n_trials= CONFIG.num_trials_optuna,)

In [None]:
if CONFIG.use_optuna_tuning:
    best_params = study.best_params
    print(best_params)
else:
    best_params = 