# TODO:
* #### Tune XGBoost wihtout any features

In [1]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from pathlib import Path
import xgboost as xgb
import lightgbm as lgbm
import catboost
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import roc_auc_score
from IPython.display import display
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import optuna
from optuna.samplers import TPESampler
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

In [2]:
import warnings
warnings.filterwarnings('ignore')

# Utils

In [3]:
def plot_feature_importances(cols, feat_imps):
    fig = plt.figure(figsize = (15, 0.35*len(feat_imps)))
    feature_imp_df = pd.DataFrame(data=zip(cols, feat_imps), columns=["feature", "importance"]).sort_values(by="importance", ascending=False)
    plt.title('Feature importances', size=25, y=1.05)
    sns.barplot(data=feature_imp_df, x='importance', y='feature')
    plt.show()

# Loading Data

In [4]:
BASE_DIR = Path("/kaggle/input/playground-series-s3e2/")

train = pd.read_csv(BASE_DIR / "train.csv").drop(columns="id")
test = pd.read_csv(BASE_DIR / "test.csv").drop(columns="id")

# Preprocessing

In [5]:
df = pd.concat([train.drop(columns=["stroke"]), test], axis=0).reset_index(drop=True)
df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status
0,Male,28.0,0,0,Yes,Private,Urban,79.53,31.1,never smoked
1,Male,33.0,0,0,Yes,Private,Rural,78.44,23.9,formerly smoked
2,Female,42.0,0,0,Yes,Private,Rural,103.0,40.3,Unknown
3,Male,56.0,0,0,Yes,Private,Urban,64.87,28.8,never smoked
4,Female,24.0,0,0,No,Private,Rural,73.36,28.8,never smoked


# Feat Engineering

In [6]:
df['morbid'] = np.where(df.bmi>40,1,0)
df['obese'] = np.where(df.bmi>30,1,0)
df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,morbid,obese
0,Male,28.0,0,0,Yes,Private,Urban,79.53,31.1,never smoked,0,1
1,Male,33.0,0,0,Yes,Private,Rural,78.44,23.9,formerly smoked,0,0
2,Female,42.0,0,0,Yes,Private,Rural,103.0,40.3,Unknown,1,1
3,Male,56.0,0,0,Yes,Private,Urban,64.87,28.8,never smoked,0,0
4,Female,24.0,0,0,No,Private,Rural,73.36,28.8,never smoked,0,0


In [7]:
def feature_risk_factors(df):
    df["risk_factors"] = df[[
        "avg_glucose_level", "age", "bmi", 
        "hypertension", "heart_disease", 
        "smoking_status"
    ]].apply(
        lambda x: \
        0 + (1 if x.avg_glucose_level > 99 else 0) + \
        (1 if x.age > 45 else 0) + (1 if x.bmi > 24.99 else 0) + \
        (1 if x.hypertension == 1 else 0) + \
        (1 if x.heart_disease == 1 else 0) + \
        (1 if x.smoking_status in ["formerly smoked", "smokes"] else 0),
        axis=1
    )
    return df

In [8]:
feature_risk_factors(df)

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,morbid,obese,risk_factors
0,Male,28.0,0,0,Yes,Private,Urban,79.53,31.1,never smoked,0,1,1
1,Male,33.0,0,0,Yes,Private,Rural,78.44,23.9,formerly smoked,0,0,1
2,Female,42.0,0,0,Yes,Private,Rural,103.00,40.3,Unknown,1,1,2
3,Male,56.0,0,0,Yes,Private,Urban,64.87,28.8,never smoked,0,0,2
4,Female,24.0,0,0,No,Private,Rural,73.36,28.8,never smoked,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
25503,Female,27.0,0,0,No,Private,Urban,75.77,17.6,never smoked,0,0,0
25504,Male,49.0,0,0,Yes,Private,Urban,102.91,26.7,Unknown,0,0,3
25505,Female,3.0,0,0,No,children,Rural,104.04,18.3,Unknown,0,0,1
25506,Male,31.0,0,0,Yes,Private,Urban,82.41,28.7,never smoked,0,0,1


In [9]:
df = pd.get_dummies(df)
df.head()

Unnamed: 0,age,hypertension,heart_disease,avg_glucose_level,bmi,morbid,obese,risk_factors,gender_Female,gender_Male,...,work_type_Never_worked,work_type_Private,work_type_Self-employed,work_type_children,Residence_type_Rural,Residence_type_Urban,smoking_status_Unknown,smoking_status_formerly smoked,smoking_status_never smoked,smoking_status_smokes
0,28.0,0,0,79.53,31.1,0,1,1,0,1,...,0,1,0,0,0,1,0,0,1,0
1,33.0,0,0,78.44,23.9,0,0,1,0,1,...,0,1,0,0,1,0,0,1,0,0
2,42.0,0,0,103.0,40.3,1,1,2,1,0,...,0,1,0,0,1,0,1,0,0,0
3,56.0,0,0,64.87,28.8,0,0,2,0,1,...,0,1,0,0,0,1,0,0,1,0
4,24.0,0,0,73.36,28.8,0,0,1,1,0,...,0,1,0,0,1,0,0,0,1,0


In [10]:
sc = StandardScaler()
feats_to_scale = ["age", "avg_glucose_level", "bmi"]
df[feats_to_scale] = sc.fit_transform(df[feats_to_scale])

In [13]:
X = df.iloc[:-len(test), :]
test_new = df.iloc[-len(test):, :]
y = train.stroke

# Tuning XGBoost - with feature engineering

In [14]:
def objective(trial, X, y):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_loguniform('gamma', 0.00001, 0.5),
        'subsample': trial.suggest_loguniform('subsample', 0.2, 1.0),
        'colsample_bytree': trial.suggest_loguniform('colsample_bytree', 0.2, 1.0),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 0.00001, 1.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 0.00001, 1.0)
    }
    

    cv = StratifiedKFold(n_splits=8, shuffle=True, random_state=1337)

    cv_scores = np.empty(8)
    for fold, (train_idx, test_idx) in enumerate(cv.split(X, y)):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        model = xgb.XGBClassifier(**params)
        model.fit(
            X_train,
            y_train,
            eval_set=[(X_test, y_test)],
            eval_metric="auc",
            early_stopping_rounds=50,
            verbose=0,
        )
        y_preds = model.predict_proba(X_test)[:, 1]
        cv_scores[fold] = roc_auc_score(y_test, y_preds)
    
    auc = np.mean(cv_scores)
    print(f"AVG CV AUC: \t {auc}")

    return auc

In [15]:
study = optuna.create_study(study_name="xgboost_tuning", direction="maximize")
func = lambda trial: objective(trial, X, y)
study.optimize(func, n_trials=100)

[32m[I 2023-01-16 08:39:27,543][0m A new study created in memory with name: xgboost_tuning[0m
[32m[I 2023-01-16 08:39:34,965][0m Trial 0 finished with value: 0.8900998026034261 and parameters: {'n_estimators': 87, 'max_depth': 10, 'learning_rate': 0.028523547398637612, 'min_child_weight': 2, 'gamma': 0.08595428660869475, 'subsample': 0.693315689604206, 'colsample_bytree': 0.25352514809448096, 'reg_alpha': 0.00012805964720578848, 'reg_lambda': 0.01495725742459704}. Best is trial 0 with value: 0.8900998026034261.[0m


AVG CV AUC: 	 0.8900998026034261


[32m[I 2023-01-16 08:39:40,578][0m Trial 1 finished with value: 0.8881137584031583 and parameters: {'n_estimators': 464, 'max_depth': 7, 'learning_rate': 0.04092859536692872, 'min_child_weight': 8, 'gamma': 5.591033745535546e-05, 'subsample': 0.8956860909232143, 'colsample_bytree': 0.3140240610871242, 'reg_alpha': 0.03558386055319189, 'reg_lambda': 0.0029222443437971783}. Best is trial 0 with value: 0.8900998026034261.[0m


AVG CV AUC: 	 0.8881137584031583


[32m[I 2023-01-16 08:39:50,228][0m Trial 2 finished with value: 0.8889256035779854 and parameters: {'n_estimators': 332, 'max_depth': 5, 'learning_rate': 0.0632354806807259, 'min_child_weight': 3, 'gamma': 0.059611858244196445, 'subsample': 0.6918593401468862, 'colsample_bytree': 0.9107253124138736, 'reg_alpha': 0.0002045529704007026, 'reg_lambda': 0.0003352659271131514}. Best is trial 0 with value: 0.8900998026034261.[0m


AVG CV AUC: 	 0.8889256035779854


[32m[I 2023-01-16 08:40:01,634][0m Trial 3 finished with value: 0.8873653251521886 and parameters: {'n_estimators': 175, 'max_depth': 10, 'learning_rate': 0.012894936967035118, 'min_child_weight': 3, 'gamma': 0.0037260254673483918, 'subsample': 0.4336244255025771, 'colsample_bytree': 0.38685239574063673, 'reg_alpha': 5.2103563742656166e-05, 'reg_lambda': 0.0012443684555340706}. Best is trial 0 with value: 0.8900998026034261.[0m


AVG CV AUC: 	 0.8873653251521886


[32m[I 2023-01-16 08:40:06,537][0m Trial 4 finished with value: 0.88686838272849 and parameters: {'n_estimators': 378, 'max_depth': 3, 'learning_rate': 0.018787062542579794, 'min_child_weight': 3, 'gamma': 0.003033844033940843, 'subsample': 0.25693246706359385, 'colsample_bytree': 0.4100673773092707, 'reg_alpha': 0.1571165035820976, 'reg_lambda': 3.212095367298236e-05}. Best is trial 0 with value: 0.8900998026034261.[0m


AVG CV AUC: 	 0.88686838272849


[32m[I 2023-01-16 08:40:13,021][0m Trial 5 finished with value: 0.8817673895338404 and parameters: {'n_estimators': 313, 'max_depth': 10, 'learning_rate': 0.28435400783271464, 'min_child_weight': 6, 'gamma': 0.016660035331654957, 'subsample': 0.4037162477284573, 'colsample_bytree': 0.6073773960425339, 'reg_alpha': 0.005077417198765005, 'reg_lambda': 0.00014204930390372946}. Best is trial 0 with value: 0.8900998026034261.[0m


AVG CV AUC: 	 0.8817673895338404


[32m[I 2023-01-16 08:40:18,733][0m Trial 6 finished with value: 0.8902559598580952 and parameters: {'n_estimators': 143, 'max_depth': 3, 'learning_rate': 0.23566657762272047, 'min_child_weight': 5, 'gamma': 0.00036512547690086515, 'subsample': 0.5686578502928974, 'colsample_bytree': 0.8851026087626026, 'reg_alpha': 0.09229799911764441, 'reg_lambda': 0.18532843269242974}. Best is trial 6 with value: 0.8902559598580952.[0m


AVG CV AUC: 	 0.8902559598580952


[32m[I 2023-01-16 08:40:24,560][0m Trial 7 finished with value: 0.8901968616705548 and parameters: {'n_estimators': 385, 'max_depth': 5, 'learning_rate': 0.04153790062520359, 'min_child_weight': 4, 'gamma': 3.90060239373841e-05, 'subsample': 0.40039763332866185, 'colsample_bytree': 0.7175438617956618, 'reg_alpha': 0.010859558097531695, 'reg_lambda': 0.27353464602329197}. Best is trial 6 with value: 0.8902559598580952.[0m


AVG CV AUC: 	 0.8901968616705548


[32m[I 2023-01-16 08:40:34,619][0m Trial 8 finished with value: 0.8889497605013597 and parameters: {'n_estimators': 240, 'max_depth': 7, 'learning_rate': 0.06278622703247551, 'min_child_weight': 10, 'gamma': 2.760507522410726e-05, 'subsample': 0.21129742135760443, 'colsample_bytree': 0.8367579387405412, 'reg_alpha': 0.005550283111709085, 'reg_lambda': 0.016997862475977805}. Best is trial 6 with value: 0.8902559598580952.[0m


AVG CV AUC: 	 0.8889497605013597


[32m[I 2023-01-16 08:40:47,958][0m Trial 9 finished with value: 0.8832750403765719 and parameters: {'n_estimators': 428, 'max_depth': 10, 'learning_rate': 0.042289190934440106, 'min_child_weight': 1, 'gamma': 0.018672602607429, 'subsample': 0.37045571318354004, 'colsample_bytree': 0.9345363401196565, 'reg_alpha': 0.00028940418198060295, 'reg_lambda': 0.0034942412787105376}. Best is trial 6 with value: 0.8902559598580952.[0m


AVG CV AUC: 	 0.8832750403765719


[32m[I 2023-01-16 08:40:51,367][0m Trial 10 finished with value: 0.8905458429385862 and parameters: {'n_estimators': 54, 'max_depth': 3, 'learning_rate': 0.23874978506357772, 'min_child_weight': 6, 'gamma': 0.00031197174187481627, 'subsample': 0.5921807819908562, 'colsample_bytree': 0.5564034766647046, 'reg_alpha': 0.4528187517380507, 'reg_lambda': 0.7698121536820096}. Best is trial 10 with value: 0.8905458429385862.[0m


AVG CV AUC: 	 0.8905458429385862


[32m[I 2023-01-16 08:40:54,498][0m Trial 11 finished with value: 0.8899207825462778 and parameters: {'n_estimators': 50, 'max_depth': 3, 'learning_rate': 0.2787636126803436, 'min_child_weight': 6, 'gamma': 0.00040722363185495254, 'subsample': 0.6144224198352045, 'colsample_bytree': 0.5508964310967478, 'reg_alpha': 0.8464632023731716, 'reg_lambda': 0.9880293480931285}. Best is trial 10 with value: 0.8905458429385862.[0m


AVG CV AUC: 	 0.8899207825462778


[32m[I 2023-01-16 08:41:00,336][0m Trial 12 finished with value: 0.8900458808994658 and parameters: {'n_estimators': 151, 'max_depth': 4, 'learning_rate': 0.13687100174440597, 'min_child_weight': 8, 'gamma': 0.00038106585916669166, 'subsample': 0.5591063481356113, 'colsample_bytree': 0.5410077878369948, 'reg_alpha': 0.7570796302886506, 'reg_lambda': 0.08632443828713074}. Best is trial 10 with value: 0.8905458429385862.[0m


AVG CV AUC: 	 0.8900458808994658


[32m[I 2023-01-16 08:41:07,451][0m Trial 13 finished with value: 0.8884101120881245 and parameters: {'n_estimators': 135, 'max_depth': 5, 'learning_rate': 0.136333092641664, 'min_child_weight': 5, 'gamma': 0.0003341244321185358, 'subsample': 0.9791060871664456, 'colsample_bytree': 0.6956075180297232, 'reg_alpha': 0.09817312246275925, 'reg_lambda': 0.1248611069964087}. Best is trial 10 with value: 0.8905458429385862.[0m


AVG CV AUC: 	 0.8884101120881245


[32m[I 2023-01-16 08:41:14,165][0m Trial 14 finished with value: 0.8913978058611598 and parameters: {'n_estimators': 229, 'max_depth': 3, 'learning_rate': 0.14323835461421203, 'min_child_weight': 7, 'gamma': 0.00018754505289875784, 'subsample': 0.5424563896181578, 'colsample_bytree': 0.47257350694429434, 'reg_alpha': 0.16433591568096, 'reg_lambda': 0.7797110060749756}. Best is trial 14 with value: 0.8913978058611598.[0m


AVG CV AUC: 	 0.8913978058611598


[32m[I 2023-01-16 08:41:20,284][0m Trial 15 finished with value: 0.8888147405546429 and parameters: {'n_estimators': 236, 'max_depth': 8, 'learning_rate': 0.13377255618537628, 'min_child_weight': 8, 'gamma': 1.2889170826112522e-05, 'subsample': 0.5059359517769837, 'colsample_bytree': 0.4944557073392764, 'reg_alpha': 0.0011024768985407616, 'reg_lambda': 0.5650213159364638}. Best is trial 14 with value: 0.8913978058611598.[0m


AVG CV AUC: 	 0.8888147405546429


[32m[I 2023-01-16 08:41:26,006][0m Trial 16 finished with value: 0.8909556478886849 and parameters: {'n_estimators': 218, 'max_depth': 4, 'learning_rate': 0.10122249735709232, 'min_child_weight': 7, 'gamma': 9.0271341511563e-05, 'subsample': 0.31122243007945505, 'colsample_bytree': 0.3347182023012386, 'reg_alpha': 0.2603989383194213, 'reg_lambda': 0.030049050641426364}. Best is trial 14 with value: 0.8913978058611598.[0m


AVG CV AUC: 	 0.8909556478886849


[32m[I 2023-01-16 08:41:30,926][0m Trial 17 finished with value: 0.8839497087365238 and parameters: {'n_estimators': 221, 'max_depth': 4, 'learning_rate': 0.09514621068089478, 'min_child_weight': 10, 'gamma': 0.4422552821093667, 'subsample': 0.3000046203580182, 'colsample_bytree': 0.20922207799972067, 'reg_alpha': 0.028040201490641105, 'reg_lambda': 0.02080872018772668}. Best is trial 14 with value: 0.8913978058611598.[0m


AVG CV AUC: 	 0.8839497087365238


[32m[I 2023-01-16 08:41:38,215][0m Trial 18 finished with value: 0.8897715272697155 and parameters: {'n_estimators': 293, 'max_depth': 6, 'learning_rate': 0.09953156451318378, 'min_child_weight': 7, 'gamma': 7.749056255004087e-05, 'subsample': 0.3321229418590307, 'colsample_bytree': 0.32187629902344583, 'reg_alpha': 1.2857444851649668e-05, 'reg_lambda': 0.056817596990390594}. Best is trial 14 with value: 0.8913978058611598.[0m


AVG CV AUC: 	 0.8897715272697155


[32m[I 2023-01-16 08:41:43,273][0m Trial 19 finished with value: 0.8882630136797207 and parameters: {'n_estimators': 199, 'max_depth': 4, 'learning_rate': 0.08857125115008613, 'min_child_weight': 9, 'gamma': 0.00010317871151242222, 'subsample': 0.24253490436088274, 'colsample_bytree': 0.3389854660126782, 'reg_alpha': 0.2924362592287869, 'reg_lambda': 0.03673157971609023}. Best is trial 14 with value: 0.8913978058611598.[0m


AVG CV AUC: 	 0.8882630136797207


[32m[I 2023-01-16 08:41:48,275][0m Trial 20 finished with value: 0.8894367813315296 and parameters: {'n_estimators': 270, 'max_depth': 6, 'learning_rate': 0.17744078737649344, 'min_child_weight': 7, 'gamma': 0.0011996542982229737, 'subsample': 0.29896848352106903, 'colsample_bytree': 0.27970063109367815, 'reg_alpha': 0.0011275919594917663, 'reg_lambda': 0.0027778174265809252}. Best is trial 14 with value: 0.8913978058611598.[0m


AVG CV AUC: 	 0.8894367813315296


[32m[I 2023-01-16 08:41:53,265][0m Trial 21 finished with value: 0.8911342365722016 and parameters: {'n_estimators': 112, 'max_depth': 3, 'learning_rate': 0.17707344845581202, 'min_child_weight': 7, 'gamma': 0.00013365185102683062, 'subsample': 0.7573431089478752, 'colsample_bytree': 0.45214981021725464, 'reg_alpha': 0.3342714284323996, 'reg_lambda': 0.5297841983684463}. Best is trial 14 with value: 0.8913978058611598.[0m


AVG CV AUC: 	 0.8911342365722016


[32m[I 2023-01-16 08:41:58,717][0m Trial 22 finished with value: 0.8907809415678534 and parameters: {'n_estimators': 106, 'max_depth': 4, 'learning_rate': 0.1851429527675517, 'min_child_weight': 7, 'gamma': 1.067256851398991e-05, 'subsample': 0.8007770845487947, 'colsample_bytree': 0.4510300634336801, 'reg_alpha': 0.03267474280579804, 'reg_lambda': 0.2877335270393423}. Best is trial 14 with value: 0.8913978058611598.[0m


AVG CV AUC: 	 0.8907809415678534


[32m[I 2023-01-16 08:42:05,770][0m Trial 23 finished with value: 0.8913982372347915 and parameters: {'n_estimators': 189, 'max_depth': 3, 'learning_rate': 0.17436017452525826, 'min_child_weight': 9, 'gamma': 0.001526472401847248, 'subsample': 0.4741539250421119, 'colsample_bytree': 0.3853912955554281, 'reg_alpha': 0.16371519704400073, 'reg_lambda': 0.33207786272003015}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.8913982372347915


[32m[I 2023-01-16 08:42:11,047][0m Trial 24 finished with value: 0.8900036062835608 and parameters: {'n_estimators': 173, 'max_depth': 3, 'learning_rate': 0.18517796968229835, 'min_child_weight': 9, 'gamma': 0.0007612240631918093, 'subsample': 0.48879977604816727, 'colsample_bytree': 0.38330531090713266, 'reg_alpha': 0.07104499566407184, 'reg_lambda': 0.35885048392208696}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.8900036062835608


[32m[I 2023-01-16 08:42:17,090][0m Trial 25 finished with value: 0.889210310174896 and parameters: {'n_estimators': 93, 'max_depth': 5, 'learning_rate': 0.15389611302571066, 'min_child_weight': 9, 'gamma': 0.006821244245186767, 'subsample': 0.7292619063474938, 'colsample_bytree': 0.48569599022138954, 'reg_alpha': 0.02198059409926247, 'reg_lambda': 0.17405352314803585}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.889210310174896


[32m[I 2023-01-16 08:42:25,869][0m Trial 26 finished with value: 0.8910699619010808 and parameters: {'n_estimators': 265, 'max_depth': 3, 'learning_rate': 0.0757624036945751, 'min_child_weight': 8, 'gamma': 0.0001650620656969676, 'subsample': 0.4906566993723628, 'colsample_bytree': 0.42169753942971294, 'reg_alpha': 0.18460331133340727, 'reg_lambda': 0.8756023475324138}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.8910699619010808


[32m[I 2023-01-16 08:42:33,056][0m Trial 27 finished with value: 0.887696620101321 and parameters: {'n_estimators': 190, 'max_depth': 8, 'learning_rate': 0.12504081641727582, 'min_child_weight': 9, 'gamma': 0.001942263358253981, 'subsample': 0.7965019106652662, 'colsample_bytree': 0.6315195519963119, 'reg_alpha': 0.4760519823919224, 'reg_lambda': 0.09423984064025481}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.887696620101321


[32m[I 2023-01-16 08:42:39,041][0m Trial 28 finished with value: 0.8891568198445675 and parameters: {'n_estimators': 123, 'max_depth': 4, 'learning_rate': 0.21605787369159743, 'min_child_weight': 5, 'gamma': 0.0009083268524306617, 'subsample': 0.6472599322908774, 'colsample_bytree': 0.46164902887208836, 'reg_alpha': 0.011761422344410067, 'reg_lambda': 0.41142495157217945}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.8891568198445675


[32m[I 2023-01-16 08:42:42,372][0m Trial 29 finished with value: 0.8819257036566679 and parameters: {'n_estimators': 82, 'max_depth': 3, 'learning_rate': 0.022999173662793755, 'min_child_weight': 10, 'gamma': 0.007906234189817157, 'subsample': 0.7589109145954849, 'colsample_bytree': 0.24581925975842814, 'reg_alpha': 0.05370570241417156, 'reg_lambda': 0.009247833083059234}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.8819257036566679


[32m[I 2023-01-16 08:42:49,307][0m Trial 30 finished with value: 0.8894216832544207 and parameters: {'n_estimators': 164, 'max_depth': 5, 'learning_rate': 0.16295528847806975, 'min_child_weight': 7, 'gamma': 0.00017842020323057762, 'subsample': 0.5245154981494186, 'colsample_bytree': 0.3620432919047752, 'reg_alpha': 0.001797724339636795, 'reg_lambda': 0.008855096823860733}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.8894216832544207


[32m[I 2023-01-16 08:42:54,967][0m Trial 31 finished with value: 0.8885063084079897 and parameters: {'n_estimators': 262, 'max_depth': 3, 'learning_rate': 0.08231389696995317, 'min_child_weight': 8, 'gamma': 0.00014651912892541563, 'subsample': 0.4518648586883747, 'colsample_bytree': 0.28262672282102036, 'reg_alpha': 0.17432258124873878, 'reg_lambda': 0.5235188975555688}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.8885063084079897


[32m[I 2023-01-16 08:43:04,025][0m Trial 32 finished with value: 0.8902947834849468 and parameters: {'n_estimators': 330, 'max_depth': 3, 'learning_rate': 0.07299449680433588, 'min_child_weight': 8, 'gamma': 2.5969826802791408e-05, 'subsample': 0.470735326134948, 'colsample_bytree': 0.45279397202444754, 'reg_alpha': 0.14834678394054593, 'reg_lambda': 0.9490134390948772}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.8902947834849468


[32m[I 2023-01-16 08:43:11,128][0m Trial 33 finished with value: 0.889706821224963 and parameters: {'n_estimators': 256, 'max_depth': 4, 'learning_rate': 0.11327029137974966, 'min_child_weight': 8, 'gamma': 0.0007012557475768772, 'subsample': 0.648360659228178, 'colsample_bytree': 0.406462000304397, 'reg_alpha': 0.48833954767950577, 'reg_lambda': 0.22600345164883318}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.889706821224963


[32m[I 2023-01-16 08:43:17,451][0m Trial 34 finished with value: 0.8883423864279502 and parameters: {'n_estimators': 208, 'max_depth': 3, 'learning_rate': 0.05573158621374433, 'min_child_weight': 9, 'gamma': 5.622772131877818e-05, 'subsample': 0.844767042225811, 'colsample_bytree': 0.420875335393252, 'reg_alpha': 0.2890757756420092, 'reg_lambda': 0.057399324174498105}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.8883423864279502


[32m[I 2023-01-16 08:43:25,287][0m Trial 35 finished with value: 0.8885149358806234 and parameters: {'n_estimators': 186, 'max_depth': 4, 'learning_rate': 0.032842209945296186, 'min_child_weight': 6, 'gamma': 0.0001516666796586548, 'subsample': 0.5307630302805004, 'colsample_bytree': 0.3752526566203358, 'reg_alpha': 0.04818595863663014, 'reg_lambda': 0.5189585681477227}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.8885149358806234


[32m[I 2023-01-16 08:43:31,517][0m Trial 36 finished with value: 0.8870944225114918 and parameters: {'n_estimators': 285, 'max_depth': 9, 'learning_rate': 0.21373744149894205, 'min_child_weight': 7, 'gamma': 0.0017796277888285136, 'subsample': 0.43048441532970644, 'colsample_bytree': 0.4985938759981293, 'reg_alpha': 0.14850842369303077, 'reg_lambda': 0.00036750164408503623}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.8870944225114918


[32m[I 2023-01-16 08:43:35,607][0m Trial 37 finished with value: 0.8891046236351339 and parameters: {'n_estimators': 365, 'max_depth': 6, 'learning_rate': 0.011270471316484542, 'min_child_weight': 8, 'gamma': 0.00019910836091079903, 'subsample': 0.357683763716263, 'colsample_bytree': 0.2876568682920981, 'reg_alpha': 0.9387803505795909, 'reg_lambda': 0.1454698912823484}. Best is trial 23 with value: 0.8913982372347915.[0m


AVG CV AUC: 	 0.8891046236351339


[32m[I 2023-01-16 08:43:40,886][0m Trial 38 finished with value: 0.8916238456441616 and parameters: {'n_estimators': 499, 'max_depth': 3, 'learning_rate': 0.29565754388035825, 'min_child_weight': 6, 'gamma': 4.19807437038387e-05, 'subsample': 0.6993067219836925, 'colsample_bytree': 0.41510979792041725, 'reg_alpha': 0.01937331588784124, 'reg_lambda': 0.3093388356531279}. Best is trial 38 with value: 0.8916238456441616.[0m


AVG CV AUC: 	 0.8916238456441616


[32m[I 2023-01-16 08:43:46,097][0m Trial 39 finished with value: 0.8868506964095909 and parameters: {'n_estimators': 462, 'max_depth': 5, 'learning_rate': 0.29794522115894084, 'min_child_weight': 4, 'gamma': 1.774681547361398e-05, 'subsample': 0.6818478482158482, 'colsample_bytree': 0.6210489910589359, 'reg_alpha': 0.010021623885460315, 'reg_lambda': 2.7502684376763528e-05}. Best is trial 38 with value: 0.8916238456441616.[0m


AVG CV AUC: 	 0.8868506964095909


[32m[I 2023-01-16 08:43:51,361][0m Trial 40 finished with value: 0.8910061186035919 and parameters: {'n_estimators': 420, 'max_depth': 3, 'learning_rate': 0.23565157395356418, 'min_child_weight': 4, 'gamma': 4.694152572443858e-05, 'subsample': 0.9589817068071649, 'colsample_bytree': 0.34568186842764254, 'reg_alpha': 0.01848677985005356, 'reg_lambda': 9.717858006989221e-05}. Best is trial 38 with value: 0.8916238456441616.[0m


AVG CV AUC: 	 0.8910061186035919


[32m[I 2023-01-16 08:43:56,733][0m Trial 41 finished with value: 0.8921483959802881 and parameters: {'n_estimators': 309, 'max_depth': 3, 'learning_rate': 0.19866350519311063, 'min_child_weight': 6, 'gamma': 6.660855075279272e-05, 'subsample': 0.8897265056586251, 'colsample_bytree': 0.4156670508327957, 'reg_alpha': 0.09286190589038691, 'reg_lambda': 0.29571860017140655}. Best is trial 41 with value: 0.8921483959802881.[0m


AVG CV AUC: 	 0.8921483959802881


[32m[I 2023-01-16 08:44:01,900][0m Trial 42 finished with value: 0.892096631144486 and parameters: {'n_estimators': 358, 'max_depth': 3, 'learning_rate': 0.2568924659688136, 'min_child_weight': 6, 'gamma': 2.5348315329923257e-05, 'subsample': 0.8866306545406502, 'colsample_bytree': 0.41181609777560174, 'reg_alpha': 0.08494674945812301, 'reg_lambda': 0.25103576147628304}. Best is trial 41 with value: 0.8921483959802881.[0m


AVG CV AUC: 	 0.892096631144486


[32m[I 2023-01-16 08:44:07,334][0m Trial 43 finished with value: 0.891103609044352 and parameters: {'n_estimators': 486, 'max_depth': 4, 'learning_rate': 0.2596021449208666, 'min_child_weight': 5, 'gamma': 2.576687712125856e-05, 'subsample': 0.8159217889618446, 'colsample_bytree': 0.4115997027860369, 'reg_alpha': 0.07382773138120408, 'reg_lambda': 0.24339111779678596}. Best is trial 41 with value: 0.8921483959802881.[0m


AVG CV AUC: 	 0.891103609044352


[32m[I 2023-01-16 08:44:14,609][0m Trial 44 finished with value: 0.8908918045911958 and parameters: {'n_estimators': 353, 'max_depth': 3, 'learning_rate': 0.2168294627959077, 'min_child_weight': 6, 'gamma': 3.762990047149615e-05, 'subsample': 0.8861012648665165, 'colsample_bytree': 0.5201496697487613, 'reg_alpha': 0.04992794186011578, 'reg_lambda': 0.06222236575454007}. Best is trial 41 with value: 0.8921483959802881.[0m


AVG CV AUC: 	 0.8908918045911958


[32m[I 2023-01-16 08:44:19,677][0m Trial 45 finished with value: 0.890764980743481 and parameters: {'n_estimators': 402, 'max_depth': 3, 'learning_rate': 0.2667275511801509, 'min_child_weight': 6, 'gamma': 6.142349445545217e-05, 'subsample': 0.9002476260868578, 'colsample_bytree': 0.3576665475721127, 'reg_alpha': 0.004930023028491021, 'reg_lambda': 0.11809840939034259}. Best is trial 41 with value: 0.8921483959802881.[0m


AVG CV AUC: 	 0.890764980743481


[32m[I 2023-01-16 08:44:25,308][0m Trial 46 finished with value: 0.8920388270778405 and parameters: {'n_estimators': 319, 'max_depth': 4, 'learning_rate': 0.20497253510553637, 'min_child_weight': 4, 'gamma': 1.7296935863564e-05, 'subsample': 0.7014060619540406, 'colsample_bytree': 0.3069189799384965, 'reg_alpha': 0.10958288803090425, 'reg_lambda': 0.21319811739506728}. Best is trial 41 with value: 0.8921483959802881.[0m


AVG CV AUC: 	 0.8920388270778405


[32m[I 2023-01-16 08:44:30,898][0m Trial 47 finished with value: 0.891242511353754 and parameters: {'n_estimators': 317, 'max_depth': 4, 'learning_rate': 0.20345336452439003, 'min_child_weight': 2, 'gamma': 2.030429331538775e-05, 'subsample': 0.707569074791519, 'colsample_bytree': 0.2974843604608057, 'reg_alpha': 0.014390090390774816, 'reg_lambda': 0.19512797545152885}. Best is trial 41 with value: 0.8921483959802881.[0m


AVG CV AUC: 	 0.891242511353754


[32m[I 2023-01-16 08:44:36,305][0m Trial 48 finished with value: 0.8887931718730588 and parameters: {'n_estimators': 302, 'max_depth': 4, 'learning_rate': 0.24907081246470072, 'min_child_weight': 4, 'gamma': 0.06303839190485017, 'subsample': 0.8951861278735292, 'colsample_bytree': 0.24481520875008292, 'reg_alpha': 0.0926571559504283, 'reg_lambda': 0.03159613345361102}. Best is trial 41 with value: 0.8921483959802881.[0m


AVG CV AUC: 	 0.8887931718730588


[32m[I 2023-01-16 08:44:42,188][0m Trial 49 finished with value: 0.8909953342627996 and parameters: {'n_estimators': 346, 'max_depth': 3, 'learning_rate': 0.29439903804818013, 'min_child_weight': 3, 'gamma': 1.41000678802519e-05, 'subsample': 0.5933413796954967, 'colsample_bytree': 0.30652785754807615, 'reg_alpha': 0.007671013809144619, 'reg_lambda': 0.07751862594818}. Best is trial 41 with value: 0.8921483959802881.[0m


AVG CV AUC: 	 0.8909953342627996


[32m[I 2023-01-16 08:44:49,454][0m Trial 50 finished with value: 0.8874196782297806 and parameters: {'n_estimators': 450, 'max_depth': 4, 'learning_rate': 0.016888135954058157, 'min_child_weight': 3, 'gamma': 0.13349991376588358, 'subsample': 0.9813905016566963, 'colsample_bytree': 0.4006859683658372, 'reg_alpha': 0.03228580322723014, 'reg_lambda': 0.2839194332866484}. Best is trial 41 with value: 0.8921483959802881.[0m


AVG CV AUC: 	 0.8874196782297806


[32m[I 2023-01-16 08:44:55,257][0m Trial 51 finished with value: 0.8915656102038845 and parameters: {'n_estimators': 239, 'max_depth': 3, 'learning_rate': 0.15334040979140112, 'min_child_weight': 5, 'gamma': 3.5682333088717234e-05, 'subsample': 0.5575910400534866, 'colsample_bytree': 0.320690900337499, 'reg_alpha': 0.09374830382925689, 'reg_lambda': 1.2144385390517058e-05}. Best is trial 41 with value: 0.8921483959802881.[0m


AVG CV AUC: 	 0.8915656102038845


[32m[I 2023-01-16 08:45:01,382][0m Trial 52 finished with value: 0.8899345865024917 and parameters: {'n_estimators': 321, 'max_depth': 3, 'learning_rate': 0.11917707819859909, 'min_child_weight': 5, 'gamma': 3.2411236151483614e-05, 'subsample': 0.6342731165212916, 'colsample_bytree': 0.31940348677098845, 'reg_alpha': 0.10483897763133875, 'reg_lambda': 0.0013891456420005565}. Best is trial 41 with value: 0.8921483959802881.[0m


AVG CV AUC: 	 0.8899345865024917


[32m[I 2023-01-16 08:45:08,420][0m Trial 53 finished with value: 0.8923839259831867 and parameters: {'n_estimators': 385, 'max_depth': 3, 'learning_rate': 0.15546901031888585, 'min_child_weight': 5, 'gamma': 1.1029503359703103e-05, 'subsample': 0.5621882579914776, 'colsample_bytree': 0.37273248027710504, 'reg_alpha': 0.04503762236337675, 'reg_lambda': 0.000298371682098627}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8923839259831867


[32m[I 2023-01-16 08:45:15,261][0m Trial 54 finished with value: 0.8906558432146653 and parameters: {'n_estimators': 380, 'max_depth': 3, 'learning_rate': 0.20242567769676895, 'min_child_weight': 5, 'gamma': 1.0127624345813632e-05, 'subsample': 0.6791492688363509, 'colsample_bytree': 0.2654242893675517, 'reg_alpha': 0.04775517119993539, 'reg_lambda': 4.295974288349293e-05}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8906558432146653


[32m[I 2023-01-16 08:45:21,223][0m Trial 55 finished with value: 0.890327136507323 and parameters: {'n_estimators': 394, 'max_depth': 4, 'learning_rate': 0.15156009201138462, 'min_child_weight': 6, 'gamma': 1.6075370145919718e-05, 'subsample': 0.5679557032293256, 'colsample_bytree': 0.3367406573876525, 'reg_alpha': 0.024471215272212164, 'reg_lambda': 0.00026168767665066175}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.890327136507323


[32m[I 2023-01-16 08:45:26,017][0m Trial 56 finished with value: 0.8884743867592453 and parameters: {'n_estimators': 489, 'max_depth': 5, 'learning_rate': 0.2443660511249527, 'min_child_weight': 5, 'gamma': 7.240568368191174e-05, 'subsample': 0.5958846486810895, 'colsample_bytree': 0.35979085352918827, 'reg_alpha': 0.0002845479324336964, 'reg_lambda': 1.3835321417599254e-05}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8884743867592453


[32m[I 2023-01-16 08:45:32,050][0m Trial 57 finished with value: 0.8912640800353382 and parameters: {'n_estimators': 345, 'max_depth': 3, 'learning_rate': 0.19431526602942265, 'min_child_weight': 4, 'gamma': 2.2122629513021527e-05, 'subsample': 0.7588307067421545, 'colsample_bytree': 0.2626283083385762, 'reg_alpha': 0.0032526136445813134, 'reg_lambda': 0.0008064020027834769}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8912640800353382


[32m[I 2023-01-16 08:45:38,825][0m Trial 58 finished with value: 0.8903750189804398 and parameters: {'n_estimators': 300, 'max_depth': 4, 'learning_rate': 0.23503286486837685, 'min_child_weight': 6, 'gamma': 4.62901651031787e-05, 'subsample': 0.8606202906570136, 'colsample_bytree': 0.3194171803584196, 'reg_alpha': 0.017022481456044816, 'reg_lambda': 1.5753868227283098e-05}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8903750189804398


[32m[I 2023-01-16 08:45:45,266][0m Trial 59 finished with value: 0.8869067749817097 and parameters: {'n_estimators': 428, 'max_depth': 7, 'learning_rate': 0.15312203739124866, 'min_child_weight': 2, 'gamma': 3.8667180316570026e-05, 'subsample': 0.7197783576077662, 'colsample_bytree': 0.23140345284953273, 'reg_alpha': 0.10931488488734666, 'reg_lambda': 7.45317865777493e-05}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8869067749817097


[32m[I 2023-01-16 08:45:51,442][0m Trial 60 finished with value: 0.8903711366177547 and parameters: {'n_estimators': 369, 'max_depth': 3, 'learning_rate': 0.1046795241831898, 'min_child_weight': 5, 'gamma': 1.0035874806860133e-05, 'subsample': 0.6655464265674496, 'colsample_bytree': 0.5811615662969362, 'reg_alpha': 0.03885959780584542, 'reg_lambda': 0.00020270573126020537}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8903711366177547


[32m[I 2023-01-16 08:45:56,599][0m Trial 61 finished with value: 0.8915323944342448 and parameters: {'n_estimators': 250, 'max_depth': 3, 'learning_rate': 0.18273282338839344, 'min_child_weight': 4, 'gamma': 0.01925143991902538, 'subsample': 0.40973352646774913, 'colsample_bytree': 0.3858911538884373, 'reg_alpha': 0.07063274628043315, 'reg_lambda': 0.0006056717652420623}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8915323944342448


[32m[I 2023-01-16 08:46:01,585][0m Trial 62 finished with value: 0.8922316510912027 and parameters: {'n_estimators': 242, 'max_depth': 3, 'learning_rate': 0.16741892745063622, 'min_child_weight': 4, 'gamma': 0.028921750048014387, 'subsample': 0.37860839772016164, 'colsample_bytree': 0.42288994533129826, 'reg_alpha': 0.06279483548945522, 'reg_lambda': 0.0006442410836742429}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8922316510912027


[32m[I 2023-01-16 08:46:07,779][0m Trial 63 finished with value: 0.8909509027787363 and parameters: {'n_estimators': 280, 'max_depth': 3, 'learning_rate': 0.1657304393018881, 'min_child_weight': 4, 'gamma': 0.027385509062493332, 'subsample': 0.38365070614020524, 'colsample_bytree': 0.4341327824897483, 'reg_alpha': 0.22501914351134192, 'reg_lambda': 0.0025891021694016605}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8909509027787363


[32m[I 2023-01-16 08:46:13,868][0m Trial 64 finished with value: 0.8904608623331447 and parameters: {'n_estimators': 234, 'max_depth': 3, 'learning_rate': 0.13423233554228797, 'min_child_weight': 5, 'gamma': 0.003410692737434707, 'subsample': 0.35104387684839605, 'colsample_bytree': 0.42865033201562286, 'reg_alpha': 0.06448710899992137, 'reg_lambda': 0.0004609077280429929}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8904608623331447


[32m[I 2023-01-16 08:46:19,187][0m Trial 65 finished with value: 0.8895157227061276 and parameters: {'n_estimators': 410, 'max_depth': 4, 'learning_rate': 0.28055979685396754, 'min_child_weight': 3, 'gamma': 0.16431344944748058, 'subsample': 0.6185334076920593, 'colsample_bytree': 0.4008616566045928, 'reg_alpha': 0.12395600299653312, 'reg_lambda': 0.0013598871134192884}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8895157227061276


[32m[I 2023-01-16 08:46:25,307][0m Trial 66 finished with value: 0.8895472129812404 and parameters: {'n_estimators': 334, 'max_depth': 4, 'learning_rate': 0.22525147317189106, 'min_child_weight': 6, 'gamma': 1.5613331140366153e-05, 'subsample': 0.9532174530296582, 'colsample_bytree': 0.9919542029167971, 'reg_alpha': 4.826127814951946e-05, 'reg_lambda': 0.0001465935921295935}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8895472129812404


[32m[I 2023-01-16 08:46:31,445][0m Trial 67 finished with value: 0.8916398064685338 and parameters: {'n_estimators': 446, 'max_depth': 3, 'learning_rate': 0.13966576472411918, 'min_child_weight': 4, 'gamma': 0.00010832195845744951, 'subsample': 0.7855594443280444, 'colsample_bytree': 0.3638830761501, 'reg_alpha': 0.029167518231511258, 'reg_lambda': 0.006879023124591547}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8916398064685338


[32m[I 2023-01-16 08:46:37,855][0m Trial 68 finished with value: 0.8906851766216197 and parameters: {'n_estimators': 436, 'max_depth': 3, 'learning_rate': 0.13009032232285647, 'min_child_weight': 3, 'gamma': 0.00010658158293742217, 'subsample': 0.825977854614023, 'colsample_bytree': 0.47795324540011896, 'reg_alpha': 0.03261475250167883, 'reg_lambda': 0.0009587979258647849}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8906851766216197


[32m[I 2023-01-16 08:46:43,313][0m Trial 69 finished with value: 0.8906825883798296 and parameters: {'n_estimators': 498, 'max_depth': 4, 'learning_rate': 0.19230162768177197, 'min_child_weight': 4, 'gamma': 0.4616001667143639, 'subsample': 0.7761807631452579, 'colsample_bytree': 0.3681804735892306, 'reg_alpha': 0.02473222776794735, 'reg_lambda': 0.005977221359578908}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8906825883798296


[32m[I 2023-01-16 08:46:52,727][0m Trial 70 finished with value: 0.8904185877172398 and parameters: {'n_estimators': 472, 'max_depth': 3, 'learning_rate': 0.10990579892301008, 'min_child_weight': 6, 'gamma': 0.00023063325382273017, 'subsample': 0.9362459285844086, 'colsample_bytree': 0.5088750468522051, 'reg_alpha': 0.42786088555594154, 'reg_lambda': 0.017201762968399998}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8904185877172398


[32m[I 2023-01-16 08:46:59,384][0m Trial 71 finished with value: 0.8918921600430684 and parameters: {'n_estimators': 455, 'max_depth': 3, 'learning_rate': 0.1419624798924591, 'min_child_weight': 5, 'gamma': 3.189175838050556e-05, 'subsample': 0.7176954413569163, 'colsample_bytree': 0.3454739109762008, 'reg_alpha': 0.08321120018701933, 'reg_lambda': 5.9686908244324385e-05}. Best is trial 53 with value: 0.8923839259831867.[0m


AVG CV AUC: 	 0.8918921600430684


[32m[I 2023-01-16 08:47:06,730][0m Trial 72 finished with value: 0.8931340847286833 and parameters: {'n_estimators': 458, 'max_depth': 3, 'learning_rate': 0.14092293077535503, 'min_child_weight': 4, 'gamma': 2.636321047769501e-05, 'subsample': 0.7372974813889643, 'colsample_bytree': 0.3507737760180819, 'reg_alpha': 0.04170974929388676, 'reg_lambda': 9.32627670367783e-05}. Best is trial 72 with value: 0.8931340847286833.[0m


AVG CV AUC: 	 0.8931340847286833


[32m[I 2023-01-16 08:47:12,789][0m Trial 73 finished with value: 0.8931405553331584 and parameters: {'n_estimators': 442, 'max_depth': 3, 'learning_rate': 0.16894325955966064, 'min_child_weight': 4, 'gamma': 2.3973788914112574e-05, 'subsample': 0.7325220564292374, 'colsample_bytree': 0.3440318161832336, 'reg_alpha': 0.04070996761929491, 'reg_lambda': 7.607115094642777e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8931405553331584


[32m[I 2023-01-16 08:47:19,490][0m Trial 74 finished with value: 0.8927471425810638 and parameters: {'n_estimators': 411, 'max_depth': 3, 'learning_rate': 0.17118174811466924, 'min_child_weight': 4, 'gamma': 2.5872245116892264e-05, 'subsample': 0.7331206250591277, 'colsample_bytree': 0.338849124320865, 'reg_alpha': 0.21173185682873352, 'reg_lambda': 7.667330854038655e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8927471425810638


[32m[I 2023-01-16 08:47:26,096][0m Trial 75 finished with value: 0.891507806137239 and parameters: {'n_estimators': 390, 'max_depth': 4, 'learning_rate': 0.17306215550144646, 'min_child_weight': 3, 'gamma': 2.224223707811427e-05, 'subsample': 0.924073441276876, 'colsample_bytree': 0.2937148637293262, 'reg_alpha': 0.6256905237489178, 'reg_lambda': 0.0001824445238652637}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.891507806137239


[32m[I 2023-01-16 08:47:31,177][0m Trial 76 finished with value: 0.8870728538299075 and parameters: {'n_estimators': 410, 'max_depth': 3, 'learning_rate': 0.04500372357616333, 'min_child_weight': 4, 'gamma': 0.0052153077961897675, 'subsample': 0.7403270067415817, 'colsample_bytree': 0.3341107115066523, 'reg_alpha': 0.24094234970912443, 'reg_lambda': 2.839289624574374e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8870728538299075


[32m[I 2023-01-16 08:47:38,092][0m Trial 77 finished with value: 0.8920207093853098 and parameters: {'n_estimators': 364, 'max_depth': 3, 'learning_rate': 0.1180643660533939, 'min_child_weight': 3, 'gamma': 1.2712060373153884e-05, 'subsample': 0.8188644488350937, 'colsample_bytree': 0.3079406092653897, 'reg_alpha': 0.34412103668349053, 'reg_lambda': 0.00012447461880280727}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8920207093853098


[32m[I 2023-01-16 08:47:44,297][0m Trial 78 finished with value: 0.8917886303714645 and parameters: {'n_estimators': 417, 'max_depth': 4, 'learning_rate': 0.1660915768423867, 'min_child_weight': 4, 'gamma': 7.575172906563152e-05, 'subsample': 0.8654621085793621, 'colsample_bytree': 0.39036516418276496, 'reg_alpha': 0.20961154440055405, 'reg_lambda': 9.236209173546067e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8917886303714645


[32m[I 2023-01-16 08:47:50,444][0m Trial 79 finished with value: 0.8874576391093688 and parameters: {'n_estimators': 440, 'max_depth': 7, 'learning_rate': 0.2038941173435179, 'min_child_weight': 4, 'gamma': 1.9129068304129265e-05, 'subsample': 0.9954557921644793, 'colsample_bytree': 0.43418451657334073, 'reg_alpha': 0.04287397161900942, 'reg_lambda': 4.401256638438162e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8874576391093688


[32m[I 2023-01-16 08:47:58,960][0m Trial 80 finished with value: 0.8892780358350703 and parameters: {'n_estimators': 376, 'max_depth': 8, 'learning_rate': 0.09076019680444332, 'min_child_weight': 5, 'gamma': 2.8706226963063736e-05, 'subsample': 0.3144868689755714, 'colsample_bytree': 0.7850560998567132, 'reg_alpha': 0.13672310486872788, 'reg_lambda': 0.00028666049371811776}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8892780358350703


[32m[I 2023-01-16 08:48:05,678][0m Trial 81 finished with value: 0.8916889830625456 and parameters: {'n_estimators': 356, 'max_depth': 3, 'learning_rate': 0.11923967012418987, 'min_child_weight': 3, 'gamma': 1.5743220146644263e-05, 'subsample': 0.8443865454023257, 'colsample_bytree': 0.29873637134189623, 'reg_alpha': 0.3053708296017695, 'reg_lambda': 0.00011001227135148093}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8916889830625456


[32m[I 2023-01-16 08:48:11,608][0m Trial 82 finished with value: 0.8925202400507986 and parameters: {'n_estimators': 331, 'max_depth': 3, 'learning_rate': 0.16422379951468774, 'min_child_weight': 2, 'gamma': 1.3113706631688945e-05, 'subsample': 0.8085155073201696, 'colsample_bytree': 0.30718399349026, 'reg_alpha': 0.3897636338752741, 'reg_lambda': 0.00013313799583567764}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8925202400507986


[32m[I 2023-01-16 08:48:18,277][0m Trial 83 finished with value: 0.8912576094308629 and parameters: {'n_estimators': 308, 'max_depth': 3, 'learning_rate': 0.18511893490116849, 'min_child_weight': 2, 'gamma': 1.2706160731357397e-05, 'subsample': 0.7500638622795032, 'colsample_bytree': 0.34752740781361047, 'reg_alpha': 0.6823895072330917, 'reg_lambda': 7.051647884505207e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8912576094308629


[32m[I 2023-01-16 08:48:24,768][0m Trial 84 finished with value: 0.8915332571815082 and parameters: {'n_estimators': 334, 'max_depth': 3, 'learning_rate': 0.1574347917154898, 'min_child_weight': 1, 'gamma': 2.315514475227678e-05, 'subsample': 0.7842652342525773, 'colsample_bytree': 0.33298245899741363, 'reg_alpha': 0.058420634220458484, 'reg_lambda': 0.00018612276968320678}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8915332571815082


[32m[I 2023-01-16 08:48:31,022][0m Trial 85 finished with value: 0.8909526282732632 and parameters: {'n_estimators': 288, 'max_depth': 3, 'learning_rate': 0.17158109529050308, 'min_child_weight': 3, 'gamma': 5.782768844903316e-05, 'subsample': 0.6913905524102113, 'colsample_bytree': 0.27760927084955406, 'reg_alpha': 0.17498758809500817, 'reg_lambda': 2.051733935020155e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8909526282732632


[32m[I 2023-01-16 08:48:36,429][0m Trial 86 finished with value: 0.892540083237856 and parameters: {'n_estimators': 327, 'max_depth': 3, 'learning_rate': 0.2172891759557804, 'min_child_weight': 1, 'gamma': 2.7831923413767527e-05, 'subsample': 0.9143144382068366, 'colsample_bytree': 0.37272126019054935, 'reg_alpha': 0.37354243680365473, 'reg_lambda': 4.917008731281827e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.892540083237856


[32m[I 2023-01-16 08:48:41,176][0m Trial 87 finished with value: 0.8918472971853733 and parameters: {'n_estimators': 469, 'max_depth': 3, 'learning_rate': 0.2238132508446539, 'min_child_weight': 1, 'gamma': 2.8009630049236262e-05, 'subsample': 0.9223025098821772, 'colsample_bytree': 0.3888807949836042, 'reg_alpha': 0.45812604551024017, 'reg_lambda': 4.3793067689578564e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8918472971853733


[32m[I 2023-01-16 08:48:46,366][0m Trial 88 finished with value: 0.891420668663639 and parameters: {'n_estimators': 396, 'max_depth': 3, 'learning_rate': 0.2588197412683835, 'min_child_weight': 1, 'gamma': 4.711123736889273e-05, 'subsample': 0.8777911795224771, 'colsample_bytree': 0.4684701578682333, 'reg_alpha': 0.6347757250175337, 'reg_lambda': 0.0004079733346826068}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.891420668663639


[32m[I 2023-01-16 08:48:54,143][0m Trial 89 finished with value: 0.8914124725646371 and parameters: {'n_estimators': 340, 'max_depth': 3, 'learning_rate': 0.14469766948139745, 'min_child_weight': 2, 'gamma': 0.0004501275311349422, 'subsample': 0.8319600393110264, 'colsample_bytree': 0.35363057474652415, 'reg_alpha': 0.3545064978777599, 'reg_lambda': 6.263479812398369e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8914124725646371


[32m[I 2023-01-16 08:48:59,680][0m Trial 90 finished with value: 0.8848547306157944 and parameters: {'n_estimators': 271, 'max_depth': 10, 'learning_rate': 0.12620336931290943, 'min_child_weight': 2, 'gamma': 1.2840859322563074e-05, 'subsample': 0.2711652389522496, 'colsample_bytree': 0.37149803209842147, 'reg_alpha': 0.2403413019334055, 'reg_lambda': 0.00014808786585810823}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8848547306157944


[32m[I 2023-01-16 08:49:05,893][0m Trial 91 finished with value: 0.8928506722526677 and parameters: {'n_estimators': 356, 'max_depth': 4, 'learning_rate': 0.20031179592841059, 'min_child_weight': 4, 'gamma': 1.7442827476941266e-05, 'subsample': 0.790198749150779, 'colsample_bytree': 0.3285580561490234, 'reg_alpha': 0.1324694444988872, 'reg_lambda': 8.499691534858881e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8928506722526677


[32m[I 2023-01-16 08:49:11,889][0m Trial 92 finished with value: 0.8912515702000194 and parameters: {'n_estimators': 355, 'max_depth': 3, 'learning_rate': 0.1902471927982446, 'min_child_weight': 1, 'gamma': 1.9089598194206253e-05, 'subsample': 0.9096015741647502, 'colsample_bytree': 0.3298602319481855, 'reg_alpha': 0.9924666075371635, 'reg_lambda': 0.0002462877777937481}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8912515702000194


[32m[I 2023-01-16 08:49:17,828][0m Trial 93 finished with value: 0.8921980039479315 and parameters: {'n_estimators': 324, 'max_depth': 3, 'learning_rate': 0.2258628237517934, 'min_child_weight': 5, 'gamma': 0.0026172628086096545, 'subsample': 0.7931482915769151, 'colsample_bytree': 0.3800326314937328, 'reg_alpha': 0.13624832905245973, 'reg_lambda': 8.18452491277058e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8921980039479315


[32m[I 2023-01-16 08:49:23,791][0m Trial 94 finished with value: 0.8901688223844955 and parameters: {'n_estimators': 327, 'max_depth': 4, 'learning_rate': 0.2220920838381273, 'min_child_weight': 5, 'gamma': 0.012331505981904954, 'subsample': 0.7943075219956074, 'colsample_bytree': 0.39619936101719294, 'reg_alpha': 0.1461212082522444, 'reg_lambda': 5.391562362929858e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8901688223844955


[32m[I 2023-01-16 08:49:29,795][0m Trial 95 finished with value: 0.8927790642298082 and parameters: {'n_estimators': 295, 'max_depth': 3, 'learning_rate': 0.16141609326132372, 'min_child_weight': 4, 'gamma': 0.04045461294691363, 'subsample': 0.6580649716615431, 'colsample_bytree': 0.36973841986976774, 'reg_alpha': 0.060356081716044514, 'reg_lambda': 3.621531105596374e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8927790642298082


[32m[I 2023-01-16 08:49:34,346][0m Trial 96 finished with value: 0.8910794521209779 and parameters: {'n_estimators': 298, 'max_depth': 3, 'learning_rate': 0.16394874789928834, 'min_child_weight': 4, 'gamma': 0.04621185936334014, 'subsample': 0.660618730212621, 'colsample_bytree': 0.3731665366490687, 'reg_alpha': 0.0006746947357690026, 'reg_lambda': 3.386801799331445e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8910794521209779


[32m[I 2023-01-16 08:49:38,863][0m Trial 97 finished with value: 0.8902382735391964 and parameters: {'n_estimators': 380, 'max_depth': 3, 'learning_rate': 0.1474107457367675, 'min_child_weight': 3, 'gamma': 0.03569066312389592, 'subsample': 0.2010341167193813, 'colsample_bytree': 0.35051645778155066, 'reg_alpha': 0.06187212811162237, 'reg_lambda': 2.3009431469419485e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8902382735391964


[32m[I 2023-01-16 08:49:45,754][0m Trial 98 finished with value: 0.8895605855638226 and parameters: {'n_estimators': 403, 'max_depth': 6, 'learning_rate': 0.17520049416366207, 'min_child_weight': 4, 'gamma': 0.31856517436672177, 'subsample': 0.7387806578622172, 'colsample_bytree': 0.37959233344024723, 'reg_alpha': 0.19187861661908057, 'reg_lambda': 8.456165736284847e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8895605855638226


[32m[I 2023-01-16 08:49:51,530][0m Trial 99 finished with value: 0.8890265450077992 and parameters: {'n_estimators': 429, 'max_depth': 4, 'learning_rate': 0.23840717723863875, 'min_child_weight': 4, 'gamma': 0.0026655103736747936, 'subsample': 0.6096577802784005, 'colsample_bytree': 0.32527864593592254, 'reg_alpha': 0.043192850011273286, 'reg_lambda': 3.533664249179831e-05}. Best is trial 73 with value: 0.8931405553331584.[0m


AVG CV AUC: 	 0.8890265450077992


In [16]:
study.best_value

0.8931405553331584

In [17]:
study.best_params

{'n_estimators': 442,
 'max_depth': 3,
 'learning_rate': 0.16894325955966064,
 'min_child_weight': 4,
 'gamma': 2.3973788914112574e-05,
 'subsample': 0.7325220564292374,
 'colsample_bytree': 0.3440318161832336,
 'reg_alpha': 0.04070996761929491,
 'reg_lambda': 7.607115094642777e-05}

# Tuning LightGBM

In [38]:
from optuna.integration import LightGBMPruningCallback

def objective_lgbm(trial, X, y):
    param_grid = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 2000),
        "num_rounds": trial.suggest_int("num_rounds", 100, 500),
        "learning_rate": trial.suggest_float("learning_rate", 0.0001, 0.3),
        "num_leaves": trial.suggest_int("num_leaves", 20, 300),
        "max_depth": trial.suggest_int("max_depth", 2, 12),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 20, 1000),
        "lambda_l1": trial.suggest_loguniform('lambda_l1', 0.00001, 1.0),
        "lambda_l2": trial.suggest_loguniform('lambda_l2', 0.00001, 1.0),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
        "bagging_fraction":  trial.suggest_loguniform('bagging_fraction', 0.2, 1.0),
        "feature_fraction": trial.suggest_loguniform('feature_fraction', 0.2, 1.0),
    }

    cv = StratifiedKFold(n_splits=8, shuffle=True, random_state=1337)

    cv_scores = np.empty(8)
    for fold, (train_idx, test_idx) in enumerate(cv.split(X, y)):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        model = lgbm.LGBMClassifier(objective="binary", is_unbalance=True, **param_grid)
        model.fit(
            X_train,
            y_train,
            eval_set=[(X_test, y_test)],
            eval_metric="auc",
            early_stopping_rounds=100,
            verbose=0,
            callbacks=[
                LightGBMPruningCallback(trial, "auc")
            ],  # Add a pruning callback
        )
        y_preds = model.predict_proba(X_test)[:, 1]
        cv_scores[fold] = roc_auc_score(y_test, y_preds)
    
    auc = np.mean(cv_scores)
    print(f"AVG CV AUC: \t {auc}")
    return auc

In [43]:
study_lgbm = optuna.create_study(direction="maximize", study_name="LGBM Tuning")
func = lambda trial: objective_lgbm(trial, X, y)
study_lgbm.optimize(func, n_trials=100, show_progress_bar=True)

[32m[I 2023-01-15 14:53:59,322][0m A new study created in memory with name: LGBM Tuning[0m


  0%|          | 0/100 [00:00<?, ?it/s]

AVG CV AUC: 	 0.8729958381072016
[32m[I 2023-01-15 14:54:00,427][0m Trial 0 finished with value: 0.8729958381072016 and parameters: {'n_estimators': 1305, 'num_rounds': 353, 'learning_rate': 0.11771950798040598, 'num_leaves': 271, 'max_depth': 5, 'min_data_in_leaf': 591, 'lambda_l1': 0.06125275318636127, 'lambda_l2': 0.0010430465970611143, 'min_gain_to_split': 8.878183575866514, 'bagging_fraction': 0.9076821854605938, 'feature_fraction': 0.941217376909169}. Best is trial 0 with value: 0.8729958381072016.[0m
AVG CV AUC: 	 0.8780735371257402
[32m[I 2023-01-15 14:54:01,949][0m Trial 1 finished with value: 0.8780735371257402 and parameters: {'n_estimators': 1580, 'num_rounds': 391, 'learning_rate': 0.2907929538916645, 'num_leaves': 202, 'max_depth': 10, 'min_data_in_leaf': 811, 'lambda_l1': 6.719891433308238e-05, 'lambda_l2': 0.1143808516670079, 'min_gain_to_split': 5.141876950602242, 'bagging_fraction': 0.24723772673381383, 'feature_fraction': 0.4999613423952636}. Best is trial 1 wit

In [44]:
study_lgbm.best_value

0.8850717115525308

In [45]:
study_lgbm.best_params

{'n_estimators': 1942,
 'num_rounds': 477,
 'learning_rate': 0.2870761124159734,
 'num_leaves': 37,
 'max_depth': 6,
 'min_data_in_leaf': 368,
 'lambda_l1': 0.00017451291663562305,
 'lambda_l2': 0.5090553595978456,
 'min_gain_to_split': 0.02547126174774228,
 'bagging_fraction': 0.20531850278394478,
 'feature_fraction': 0.37437811030015083}

# Tuning RandomForestClassifier

In [51]:
def objective_rf(trial, X, y):
    param_grid = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 500),
        "max_features": trial.suggest_int("max_features", 5, 15),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 5),
        "max_depth": trial.suggest_int("max_depth", 2, 20),
    }

    cv = StratifiedKFold(n_splits=8, shuffle=True, random_state=1337)

    cv_scores = np.empty(8)
    for fold, (train_idx, test_idx) in enumerate(cv.split(X, y)):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        model = RandomForestClassifier(**param_grid)
        model.fit(
            X_train,
            y_train
        )
        y_preds = model.predict_proba(X_test)[:, 1]
        cv_scores[fold] = roc_auc_score(y_test, y_preds)
    
    auc = np.mean(cv_scores)
    print(f"AVG CV AUC: \t {auc}")
    return auc

In [52]:
study_rf = optuna.create_study(direction="maximize", study_name="Random Forest Tuning")
func = lambda trial: objective_rf(trial, X, y)
study_rf.optimize(func, n_trials=100, show_progress_bar=True)

[32m[I 2023-01-15 15:07:22,574][0m A new study created in memory with name: Random Forest Tuning[0m


  0%|          | 0/100 [00:00<?, ?it/s]

AVG CV AUC: 	 0.8856109285921345
[32m[I 2023-01-15 15:07:37,554][0m Trial 0 finished with value: 0.8856109285921345 and parameters: {'n_estimators': 107, 'max_features': 11, 'learning_rate': 16, 'min_samples_leaf': 2, 'max_depth': 8}. Best is trial 0 with value: 0.8856109285921345.[0m
AVG CV AUC: 	 0.8771840446972102
[32m[I 2023-01-15 15:08:09,276][0m Trial 1 finished with value: 0.8771840446972102 and parameters: {'n_estimators': 216, 'max_features': 9, 'learning_rate': 16, 'min_samples_leaf': 5, 'max_depth': 20}. Best is trial 0 with value: 0.8856109285921345.[0m
AVG CV AUC: 	 0.8816742128293971
[32m[I 2023-01-15 15:08:36,639][0m Trial 2 finished with value: 0.8816742128293971 and parameters: {'n_estimators': 230, 'max_features': 7, 'learning_rate': 16, 'min_samples_leaf': 2, 'max_depth': 13}. Best is trial 0 with value: 0.8856109285921345.[0m
AVG CV AUC: 	 0.8780614586640532
[32m[I 2023-01-15 15:09:35,632][0m Trial 3 finished with value: 0.8780614586640532 and parameters:

In [53]:
study_rf.best_value

0.8883320334607898

In [54]:
study_rf.best_params

{'n_estimators': 390,
 'max_features': 14,
 'learning_rate': 19,
 'min_samples_leaf': 2,
 'max_depth': 5}