# XGBoost Hyperparameter Sweep 20211031
For optimizing lv2 predictions off of a lv1 with 5 random seeds

# Setup

In [1]:
# two manual flags (ex-config)
colab = False
gpu_available = True

In [2]:
# basic imports
from pathlib import Path
import os
import math
from datetime import datetime
import random
import multiprocessing
import pickle

In [3]:
%matplotlib inline
%config Completer.use_jedi = False
os.environ['WANDB_NOTEBOOK_NAME'] = f"sweep_xgboost_lv2_{datetime.now().strftime('%Y%m%d')}_X-orig+Kmeans8+synth_dataset.ipynb"

In [4]:
# handle Google Colab-specific library installation/updating
if colab:
    # much of the below inspired by or cribbed from the May 2021 Kaggle Tabular Playground winner, at 
    # https://colab.research.google.com/gist/academicsuspect/0aac7bd6e506f5f70295bfc9a3dc2250/tabular-may-baseline.ipynb?authuser=1#scrollTo=LJoVKJb5wN0L
    
    # Kaggle API for downloading the datasets
    !pip install --upgrade -q kaggle

    # weights and biases
    !pip install -qqqU wandb
    
    # Optuna for parameter search
    !pip install -q optuna

    # !pip install --upgrade xgboost

    # upgrade sklearn
    !pip install --upgrade scikit-learn

    # !pip install category_encoders
    # !pip install catboost
#     !pip install --upgrade -q lightgbm

    # lighgbm gpu compatible
    # !git clone --recursive https://github.com/Microsoft/LightGBM
    # ! cd LightGBM && rm -rf build && mkdir build && cd build && cmake -DUSE_GPU=1 ../../LightGBM && make -j4 && cd ../python-package && python3 setup.py install --precompile --gpu;
    
    # # this part is from https://github.com/rapidsai/gputreeshap/issues/24
    # !pip install cmake --upgrade
    # # !pip install sklearn --upgrade
    # !git clone --recursive https://github.com/dmlc/xgboost
    # %cd /content/xgboost
    # !mkdir build
    # %cd build
    # !cmake .. -DUSE_CUDA=ON
    # !make -j4
    # %cd /content/xgboost/python-package
    # !python setup.py install --use-cuda --use-nccl
    # !/opt/bin/nvidia-smi
    # !pip install shap
    

Now, non-stdlib imports

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

# general ML tooling
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
from sklearn.metrics import log_loss, roc_auc_score
import wandb
from optuna.integration.wandb import WeightsAndBiasesCallback
# from wandb.xgboost import wandb_callback
# from wandb.lightgbm import wandb_callback
# from sklearn.impute import KNNImputer, StandardImputer
# import timm

import seaborn as sns

# from catboost import CatBoostClassifier
from xgboost import XGBClassifier
# from lightgbm import LGBMClassifier
# from sklearn.ensemble import RandomForestRegressor
# from sklearn.preprocessing import MaxAbsScaler, StandardScaler, MinMaxScaler, RobustScaler, PolynomialFeatures
# from sklearn.feature_selection import SelectKBest, f_regression
from joblib import dump, load
# feature engineering tools
# from sklearn.feature_selection import mutual_info_regression
# import featuretools as ft


from optuna.samplers import TPESampler
import optuna
# import catboost
from sklearn.utils import resample
import sklearn.metrics

Now, datapath setup

In [6]:
# # This is the code for reading the train.csv and converting it to a .feather file
# df = pd.read_csv(datapath/'train.csv', index_col='id', low_memory=False)
# df.index.name = None
# df.to_feather(path='./dataset_df.feather')

In [7]:
if colab:
    # mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')
    
    # handling datapath
#     datapath = Path('/content/drive/MyDrive/kaggle/tabular_playgrounds/sep2021/')
    
else:
    # if on local machine
#     datapath = Path('/media/sf/easystore/kaggle_data/tabular_playgrounds/sep2021/')  
    root = Path('/home/sf/code/kaggle/tabular_playgrounds/oct2021/')
    datapath = root/'datasets'
    edapath = root/'EDA'
    modelpath = root/'models'
    predpath = root/'preds'
    subpath = root/'submissions'
    studypath = root/'optuna_studies'
    altdatapath = Path('/media/sf/easystore/kaggle_data/tabular_playgrounds/oct2021/alt_datasets/')
    
    for pth in [root, datapath, edapath, modelpath, predpath, subpath, altdatapath]:
        pth.mkdir(exist_ok=True)

In [8]:

# n_trials = int(1000)
SEED = 42

In [9]:
# Function to seed everything
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
seed_everything(SEED)

## Ex-Model Config

In [10]:
# meta-config for preprocessing and cross-validation, but NOT for model parameters
# in the sweep version, this includes both ex-model parameters and defaults for model parameters
exmodel_config = {
    # model config
    "library": 'xgboost',
#     "model": XGBClassifier,
#     "n_estimators": 100, 
#     "max_depth": 3,
#     "learning_rate": 0.1,
#     "test_size": 0.2,
#     "reg_lambda": None, 
#     "scaler": "sklearn.preprocessing.StandardScaler()", # TODO: experiment with others (but imputation may be slow)
#     "scale_b4_impute": False,
#     "imputer": "sklearn.impute.SimpleImputer(strategy='median', add_indicator=True)",
#     "knn_imputer_n_neighbors": None, # None if a different imputer is used
#     "feature_selector": SelectKBest,
#     "k_best": 80,
#     "feature_selection_scoring": f_regression,
    'random_state': SEED,
    'optuna': True,
#     'optuna_trials': 20,
#     'subsample': 1,
#     'cross_val_strategy': None, # None for holdout, or the relevant sklearn class
#     'kfolds': 1, # if 1, that means just doing holdout
#     'test_size': 0.2,
    # these are XGBoost default (my choice) params 
#     "tree_method": "auto", # set to 'gpu_hist' to try GPU if available
#     "booster": 'gbtree', # dart may be marginally better, but will opt for this quicker approach as a default
#     "n_estimators": 200, 
#     "max_depth": 3,
#     "learning_rate": 0.1,
#     "n_jobs": -1,
#     "verbosity": 1,
#     "subsample": 1,
#     'features_created': False,
#     'feature_creator': None,
}

wandb_kwargs = {
    # wandb config
    'name': f"{os.environ['WANDB_NOTEBOOK_NAME'][:-6]}_{datetime.now().strftime('%H%M%S')}", # just removes the .ipynb extension, leaving the notebook filename's stem
    'project': '202110_Kaggle_tabular_playground',
    'tags': ['sweep'],
    'notes': "Sweep for XGBoost on GPU with Optuna, For lv2 model based on 5rs / architecture preds from original dataset",
    'config': exmodel_config,
}

## Data Setup

**TODO** Write some conditional logic here to automate it -- possibly as part of a sklearn.*pipeline

In [11]:
# train_source = datapath/'train.feather'
# df = pd.read_feather(path=datapath/'train.feather')
# y = np.array(df.target)
# dump(y, filename=datapath/'y.joblib')
# del df

# y = load(datapath/'y.joblib')
y = load(predpath/'5folds_rs42_oof_y.joblib')

# df.index.name = 'id'
# y_train = df.target
# features = [x for x in df.columns if x != 'target']
# X_train = df[features]
# # X.index.name = 'id'
# # y.index.name = 'id'
# X = np.array(X_train)
# y = np.array(y_train)

# del df, X_train, y_train

# load the Boruta-filtered green-zone 98 features (based on 200 iterations of the algo)
# train_source = '/media/sf/easystore/kaggle_data/tabular_playgrounds/oct2021/alt_datasets/X_boruta_200iter_filtered_green.joblib'
# X = load(train_source)

# train_source = altdatapath/'train-WITH-KMeans_8cluster_ninit50_maxiter1000_rs42-AND-synthetic.feather' #'X_boruta_shap_200trials.feather'
# train_source = predpath/'stacking_manual_20211020_104938_X_orig+KMeans8+synth_oof_lv1.feather'
train_source = altdatapath/'oof_lv1_5rs.feather'
exmodel_config['train_source'] = str(train_source)
X = pd.read_feather(path=train_source)
if 'target' in X.columns:
    X = X.drop(['target'], axis=1)

# exmodel_config['feature_count'] = len(X.columns)
exmodel_config['feature_count'] = X.shape[1]
exmodel_config['instance_count'] = X.shape[0]

# exmodel_config['feature_generator'] = None
# exmodel_config['feature_generator'] = "Summary statistics"

# exmodel_config['train_source'] = str(train_source)
# test_source = datapath/'test.feather'
# exmodel_config['test_source'] = str(test_source)
# X_test = pd.read_feather(path=test_source)
# X_test = X_test.iloc[:, 1:]
# X_test = np.array(X_test)

In [12]:
y = pd.Series(y)

In [13]:
X.shape, y.shape

((1000000, 20), (1000000,))

In [14]:
type(X), type(y)

(pandas.core.frame.DataFrame, pandas.core.series.Series)

# Experiment setup

In [15]:
# wandb_kwargs = {
#     # wandb config:
#     'name': f"{os.environ['WANDB_NOTEBOOK_NAME'][:-6]}_{datetime.now().strftime('%H%M%S')}", # just removes the .ipynb extension, leaving the notebook filename's stem
#     'project': '202109_Kaggle_tabular_playground',
#     'tags': ['sweep'],
#     'notes': "Sweep for CatBoost using Optuna",
#     'config': exmodel_config,
# }

The best parameters up to present have been:

```python
best_xgboost_params = {
        'n_estimators': 3878,
        'max_depth': 4,
        'learning_rate': 0.024785857161974977,
        'reg_alpha': 26.867682044658245,
        'reg_lambda': 10.839759074147148,
        'subsample': 0.8208581489835881,
        'min_child_weight': 8.829122644339664,
        'colsample_bytree': 0.906420714280384,
        'gamma': 1.472322916021486
    }
```

These params get the following ROC_AUC scores on a 20% holdout for these dataset versions:

| Version | Feature Count | Valid ROC-AUC |
| ----- | ----- | ----- |
| original | 285 | 0.8572984856383443 |
| Boruta green-only | 98 | 0.8553163413048461 |
| Boruta green-and-blue | 109 | 0.8558487581638441 |
| Boruta with SHAP | 136 | 0.8566790062778752 |
| Original plus KMeans 8 plus synth | 301 | 0.8570855909847465 |

In [16]:
# X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=int(SEED), shuffle=True)
# # create wrappers for the training and validation partitions
# # train_pool = catboost.Pool(X_train, y_train)
# # valid_pool = catboost.Pool(X_valid, y_valid)

# # experimental parameters -- based off prev. best
# params = {
#     'n_estimators': 3878,
#     'max_depth': 4,
#     'learning_rate': 0.024785857161974977,
#     'reg_alpha': 26.867682044658245,
#     'reg_lambda': 10.839759074147148,
#     'subsample': 0.8208581489835881,
#     'min_child_weight': 8.829122644339664,
#     'colsample_bytree': 0.906420714280384,
#     'gamma': 1.472322916021486
# }

# # instantiate the model, with some parameters locked in, and experimnental ones passed via splat 
# model = XGBClassifier(
#     objective='binary:logistic',
#     verbosity=1,
#     tree_method='gpu_hist',
#     booster='gbtree', # not bothering with dart for time reasons
#     random_state=SEED,
# #         n_jobs=-1,
#     **params
# )    

# model.fit(X_train, y_train)
# # generate predictions
# preds = model.predict_proba(X_valid)[:,1]
# # rounds to the nearest integer, and the nearest even in case of _.5s

# # Evaluation
# valid_auc = roc_auc_score(y_valid, preds)
# print('ROC AUC Score of XGBoost =', valid_auc)

In [17]:
# originally from https://www.kaggle.com/satorushibata/optimize-catboost-hyperparameter-with-optuna-gpu
def objective(trial):
    # split the (original Kaggle training) data into partitions
    # if study.best_trial:
    #     print("Dumping best params, which are:")
    #     print(str(study.best_trial.params))
    #     dump(study.best_trial.params, filename=datapath/'optuna_catboost_best_20210920.joblib')
       
    # else:
    #     print("No best study yet")
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=int(SEED), shuffle=True)
    # create wrappers for the training and validation partitions
    # train_pool = catboost.Pool(X_train, y_train)
    # valid_pool = catboost.Pool(X_valid, y_valid)
    
    # experimental parameters -- based off prev. best
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 350, 10000), # was 900-4500 for CPU
        'max_depth' : trial.suggest_int('depth', 3, 8),                                       
        'learning_rate' : trial.suggest_loguniform('learning_rate', 0.001, 0.3),               
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 0.001, 50),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 0.001, 30),
        'subsample': trial.suggest_uniform('subsample', 0.5, 1),
#         'booster': trial.suggest_categorical('boosting_type', ['gbtree', 'dart']),
        'min_child_weight': trial.suggest_uniform('min_child_weight', 0.001, 12),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1),
        'gamma': trial.suggest_uniform('gamma', 0.1, 10)
    }  

    # instantiate the model, with some parameters locked in, and experimnental ones passed via splat 
    model = XGBClassifier(
        objective='binary:logistic',
        verbosity=1,
        tree_method='gpu_hist',
        booster='gbtree', # not bothering with dart for time reasons
        random_state=SEED,
#         n_jobs=-1,
        **params
    )    

    model.fit(X_train, y_train)
    # generate predictions
    preds = model.predict_proba(X_valid)[:,1]
    # rounds to the nearest integer, and the nearest even in case of _.5s

    # Evaluation
    valid_auc = roc_auc_score(y_valid, preds)
    print('ROC AUC Score of XGBoost =', valid_auc)
    wandb.log({'valid_auc': valid_auc,
              })

    return valid_auc

In [18]:
wandbc = WeightsAndBiasesCallback(wandb_kwargs=wandb_kwargs)

  wandbc = WeightsAndBiasesCallback(wandb_kwargs=wandb_kwargs)
[34m[1mwandb[0m: Currently logged in as: [33mhushifang[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.6 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


In [19]:
study = optuna.create_study(direction = "maximize", 
                            sampler = TPESampler(seed=int(SEED)), 
                            study_name=f"xgboost-lv2-X_orig_5rs_{datetime.now().strftime('%Y%m%d')}")

# study = load(studypath/f"optuna_xgboost_study_106trials_20211004.joblib")


[32m[I 2021-10-31 16:20:22,667][0m A new study created in memory with name: xgboost-lv2-X_orig_5rs_20211031[0m


In [20]:
# import torch

In [21]:
import xgboost

In [22]:
# xgboost.core.XGBoostError?

In [23]:
# study.optimize?

In [24]:
for x in range(1, 500):
    study.optimize(objective, n_trials = 1, callbacks = [wandbc], show_progress_bar=False, catch=(xgboost.core.XGBoostError,)) 
    dump(study, filename=studypath/f"optuna_xgboost_lv2_study_{x}trials_{datetime.now().strftime('%Y%m%d')}.joblib")
#     dump(study.best_trial.params, filename=datapath/f'optuna_lightgbm_study_best-thru-{x*5}trials_20210927.joblib')





[32m[I 2021-10-31 16:20:39,820][0m Trial 0 finished with value: 0.8567745571085623 and parameters: {'n_estimators': 3964, 'depth': 8, 'learning_rate': 0.06504856968981275, 'reg_alpha': 0.6502468545951017, 'reg_lambda': 0.004994757081068292, 'subsample': 0.5779972601681014, 'min_child_weight': 0.6979452624062253, 'colsample_bytree': 0.9330880728874675, 'gamma': 6.051038616257767}. Best is trial 0 with value: 0.8567745571085623.[0m


ROC AUC Score of XGBoost = 0.8567745571085623






[32m[I 2021-10-31 16:21:03,489][0m Trial 1 finished with value: 0.8566107431017014 and parameters: {'n_estimators': 7183, 'depth': 3, 'learning_rate': 0.2526878207508456, 'reg_alpha': 8.158738235092015, 'reg_lambda': 0.00892622738184373, 'subsample': 0.5909124836035503, 'min_child_weight': 2.2016707137313523, 'colsample_bytree': 0.6521211214797689, 'gamma': 5.295088673159155}. Best is trial 0 with value: 0.8567745571085623.[0m


ROC AUC Score of XGBoost = 0.8566107431017014






[32m[I 2021-10-31 16:21:27,519][0m Trial 2 finished with value: 0.8565019690483414 and parameters: {'n_estimators': 4518, 'depth': 4, 'learning_rate': 0.032781876533976156, 'reg_alpha': 0.004523529917658778, 'reg_lambda': 0.02032202659636255, 'subsample': 0.6831809216468459, 'min_child_weight': 5.473383740620215, 'colsample_bytree': 0.8925879806965068, 'gamma': 2.0767704433677614}. Best is trial 0 with value: 0.8567745571085623.[0m


ROC AUC Score of XGBoost = 0.8565019690483414






[32m[I 2021-10-31 16:22:11,086][0m Trial 3 finished with value: 0.856907606229623 and parameters: {'n_estimators': 5312, 'depth': 6, 'learning_rate': 0.0013033567475147442, 'reg_alpha': 0.7158714383119805, 'reg_lambda': 0.005800389779115683, 'subsample': 0.5325257964926398, 'min_child_weight': 11.386677561502745, 'colsample_bytree': 0.9828160165372797, 'gamma': 8.103133746352965}. Best is trial 3 with value: 0.856907606229623.[0m


ROC AUC Score of XGBoost = 0.856907606229623






[32m[I 2021-10-31 16:22:25,135][0m Trial 4 finished with value: 0.8566910863257707 and parameters: {'n_estimators': 3289, 'depth': 3, 'learning_rate': 0.04953682563497157, 'reg_alpha': 0.11702088154220885, 'reg_lambda': 0.0035186816415472676, 'subsample': 0.7475884550556351, 'min_child_weight': 0.41362786486150555, 'colsample_bytree': 0.954660201039391, 'gamma': 2.6619218178401676}. Best is trial 3 with value: 0.856907606229623.[0m


ROC AUC Score of XGBoost = 0.8566910863257707






[32m[I 2021-10-31 16:22:46,896][0m Trial 5 finished with value: 0.8568889733276577 and parameters: {'n_estimators': 6744, 'depth': 4, 'learning_rate': 0.01942099825171803, 'reg_alpha': 0.37065955814875856, 'reg_lambda': 0.0067238158696505896, 'subsample': 0.9847923138822793, 'min_child_weight': 9.301818747510014, 'colsample_bytree': 0.9697494707820946, 'gamma': 8.958790769233723}. Best is trial 3 with value: 0.856907606229623.[0m


ROC AUC Score of XGBoost = 0.8568889733276577






[32m[I 2021-10-31 16:23:31,769][0m Trial 6 finished with value: 0.8569070366280344 and parameters: {'n_estimators': 6120, 'depth': 8, 'learning_rate': 0.0016565580440884786, 'reg_alpha': 0.008335230071817131, 'reg_lambda': 0.001593999043568401, 'subsample': 0.6626651653816322, 'min_child_weight': 4.664738798984096, 'colsample_bytree': 0.6356745158869479, 'gamma': 8.3045013406041}. Best is trial 3 with value: 0.856907606229623.[0m


ROC AUC Score of XGBoost = 0.8569070366280344






[32m[I 2021-10-31 16:23:53,430][0m Trial 7 finished with value: 0.8566717296217867 and parameters: {'n_estimators': 3793, 'depth': 4, 'learning_rate': 0.022096526145513846, 'reg_alpha': 0.0045940816125026864, 'reg_lambda': 3.9042098517777197, 'subsample': 0.5372753218398854, 'min_child_weight': 11.842656352269607, 'colsample_bytree': 0.8861223846483287, 'gamma': 2.0672852471883068}. Best is trial 3 with value: 0.856907606229623.[0m


ROC AUC Score of XGBoost = 0.8566717296217867






[32m[I 2021-10-31 16:23:56,069][0m Trial 8 finished with value: 0.8569089264833052 and parameters: {'n_estimators': 403, 'depth': 7, 'learning_rate': 0.0563600475052774, 'reg_alpha': 2.6642981030636883, 'reg_lambda': 2.838382119353614, 'subsample': 0.5370223258670452, 'min_child_weight': 4.302230276802727, 'colsample_bytree': 0.5579345297625649, 'gamma': 8.644723916168376}. Best is trial 8 with value: 0.8569089264833052.[0m


ROC AUC Score of XGBoost = 0.8569089264833052






[32m[I 2021-10-31 16:24:34,130][0m Trial 9 finished with value: 0.8568953331953946 and parameters: {'n_estimators': 6365, 'depth': 4, 'learning_rate': 0.0014369502768990666, 'reg_alpha': 0.028926547478415564, 'reg_lambda': 0.028568350317608886, 'subsample': 0.864803089169032, 'min_child_weight': 7.651052098791203, 'colsample_bytree': 0.9436063712881633, 'gamma': 4.7749277591032975}. Best is trial 8 with value: 0.8569089264833052.[0m


ROC AUC Score of XGBoost = 0.8568953331953946






[32m[I 2021-10-31 16:24:40,400][0m Trial 10 finished with value: 0.8568799710525512 and parameters: {'n_estimators': 548, 'depth': 6, 'learning_rate': 0.0057835683328601424, 'reg_alpha': 25.193261592370117, 'reg_lambda': 24.215992044481425, 'subsample': 0.8451235367845727, 'min_child_weight': 3.095865504265305, 'colsample_bytree': 0.5055571924113557, 'gamma': 9.696199420265538}. Best is trial 8 with value: 0.8569089264833052.[0m


ROC AUC Score of XGBoost = 0.8568799710525512






[32m[I 2021-10-31 16:25:15,875][0m Trial 11 finished with value: 0.8568998150078941 and parameters: {'n_estimators': 9712, 'depth': 6, 'learning_rate': 0.006265045297405034, 'reg_alpha': 2.0678704761423647, 'reg_lambda': 0.35538772799202606, 'subsample': 0.5154800030768701, 'min_child_weight': 11.28268770042188, 'colsample_bytree': 0.7866454888256145, 'gamma': 7.433049102858982}. Best is trial 8 with value: 0.8569089264833052.[0m


ROC AUC Score of XGBoost = 0.8568998150078941






[32m[I 2021-10-31 16:25:18,500][0m Trial 12 finished with value: 0.8568848430661389 and parameters: {'n_estimators': 503, 'depth': 7, 'learning_rate': 0.16017451645115668, 'reg_alpha': 2.627574281927009, 'reg_lambda': 0.5332090840296356, 'subsample': 0.6372424866412405, 'min_child_weight': 7.532087700126141, 'colsample_bytree': 0.5167929424672362, 'gamma': 6.9631014192624}. Best is trial 8 with value: 0.8569089264833052.[0m


ROC AUC Score of XGBoost = 0.8568848430661389






[32m[I 2021-10-31 16:25:29,310][0m Trial 13 finished with value: 0.8569022324646361 and parameters: {'n_estimators': 1858, 'depth': 7, 'learning_rate': 0.006095386499338796, 'reg_alpha': 49.65627329623703, 'reg_lambda': 1.7655362188643002, 'subsample': 0.5127747474052715, 'min_child_weight': 3.9676254361559256, 'colsample_bytree': 0.7648198488403328, 'gamma': 9.983182168887833}. Best is trial 8 with value: 0.8569089264833052.[0m


ROC AUC Score of XGBoost = 0.8569022324646361






[32m[I 2021-10-31 16:25:58,241][0m Trial 14 finished with value: 0.8562452920824952 and parameters: {'n_estimators': 8667, 'depth': 7, 'learning_rate': 0.10038701694957171, 'reg_alpha': 0.09014037986173169, 'reg_lambda': 0.0709422654078536, 'subsample': 0.7655620151757302, 'min_child_weight': 6.706024197936731, 'colsample_bytree': 0.6471208002214497, 'gamma': 3.7652044058182774}. Best is trial 8 with value: 0.8569089264833052.[0m


ROC AUC Score of XGBoost = 0.8562452920824952






[32m[I 2021-10-31 16:26:13,947][0m Trial 15 finished with value: 0.8569032692675276 and parameters: {'n_estimators': 1874, 'depth': 5, 'learning_rate': 0.002734259025483008, 'reg_alpha': 1.2976213221595907, 'reg_lambda': 16.281489111694658, 'subsample': 0.6067816387994542, 'min_child_weight': 9.054538400000514, 'colsample_bytree': 0.8352725713024433, 'gamma': 7.880321003921752}. Best is trial 8 with value: 0.8569089264833052.[0m


ROC AUC Score of XGBoost = 0.8569032692675276






[32m[I 2021-10-31 16:27:01,993][0m Trial 16 finished with value: 0.8564367975165845 and parameters: {'n_estimators': 5258, 'depth': 6, 'learning_rate': 0.011218855608069756, 'reg_alpha': 8.485571126247757, 'reg_lambda': 0.13781096594280337, 'subsample': 0.7580143193422603, 'min_child_weight': 9.932330562095588, 'colsample_bytree': 0.6943845973931932, 'gamma': 0.23047422237114557}. Best is trial 8 with value: 0.8569089264833052.[0m


ROC AUC Score of XGBoost = 0.8564367975165845






[32m[I 2021-10-31 16:27:12,719][0m Trial 17 finished with value: 0.8568969481998989 and parameters: {'n_estimators': 2408, 'depth': 5, 'learning_rate': 0.011648821855852205, 'reg_alpha': 6.361572953808567, 'reg_lambda': 4.412721138165254, 'subsample': 0.6900913631930554, 'min_child_weight': 2.2648833769944483, 'colsample_bytree': 0.7107053414739195, 'gamma': 6.394821350110929}. Best is trial 8 with value: 0.8569089264833052.[0m


ROC AUC Score of XGBoost = 0.8568969481998989






[32m[I 2021-10-31 16:27:50,441][0m Trial 18 finished with value: 0.8569145201489052 and parameters: {'n_estimators': 7943, 'depth': 7, 'learning_rate': 0.0028449169511802976, 'reg_alpha': 0.001079627815782365, 'reg_lambda': 1.3244077888823096, 'subsample': 0.5490851573245075, 'min_child_weight': 5.108638302486011, 'colsample_bytree': 0.5897670850320078, 'gamma': 8.738156944298614}. Best is trial 18 with value: 0.8569145201489052.[0m


ROC AUC Score of XGBoost = 0.8569145201489052






[32m[I 2021-10-31 16:28:27,327][0m Trial 19 finished with value: 0.8569141821479627 and parameters: {'n_estimators': 8046, 'depth': 8, 'learning_rate': 0.0031535673119604496, 'reg_alpha': 0.001595877295784664, 'reg_lambda': 0.9276823351587395, 'subsample': 0.5697705541247967, 'min_child_weight': 5.752758775505524, 'colsample_bytree': 0.5774774629590134, 'gamma': 8.982494180659982}. Best is trial 18 with value: 0.8569145201489052.[0m


ROC AUC Score of XGBoost = 0.8569141821479627






[32m[I 2021-10-31 16:29:03,771][0m Trial 20 finished with value: 0.8569142704482089 and parameters: {'n_estimators': 8017, 'depth': 8, 'learning_rate': 0.0032629462621328923, 'reg_alpha': 0.0010426657777225991, 'reg_lambda': 0.927881845085354, 'subsample': 0.6200392464368687, 'min_child_weight': 5.610469756138408, 'colsample_bytree': 0.5754574090669539, 'gamma': 8.917894734741536}. Best is trial 18 with value: 0.8569145201489052.[0m


ROC AUC Score of XGBoost = 0.8569142704482089






[32m[I 2021-10-31 16:29:40,869][0m Trial 21 finished with value: 0.8569157614523671 and parameters: {'n_estimators': 7896, 'depth': 8, 'learning_rate': 0.00300451738623939, 'reg_alpha': 0.001148390168156581, 'reg_lambda': 0.8632943448292806, 'subsample': 0.5880221408055133, 'min_child_weight': 5.837682214042644, 'colsample_bytree': 0.5854822546561775, 'gamma': 8.917716054956825}. Best is trial 21 with value: 0.8569157614523671.[0m


ROC AUC Score of XGBoost = 0.8569157614523671






[32m[I 2021-10-31 16:30:19,843][0m Trial 22 finished with value: 0.8569133300955863 and parameters: {'n_estimators': 8050, 'depth': 8, 'learning_rate': 0.0026825690403786797, 'reg_alpha': 0.0016063783860783352, 'reg_lambda': 0.20052981253424565, 'subsample': 0.6247169050806954, 'min_child_weight': 6.501997107454347, 'colsample_bytree': 0.5826099599707831, 'gamma': 9.386749050829426}. Best is trial 21 with value: 0.8569157614523671.[0m


ROC AUC Score of XGBoost = 0.8569133300955863






[32m[I 2021-10-31 16:31:01,235][0m Trial 23 finished with value: 0.8569127196938839 and parameters: {'n_estimators': 9746, 'depth': 8, 'learning_rate': 0.003688054380058554, 'reg_alpha': 0.0012191256521460938, 'reg_lambda': 8.709318243025091, 'subsample': 0.6435544847082245, 'min_child_weight': 4.975686487382656, 'colsample_bytree': 0.6011092721851449, 'gamma': 7.094756361823698}. Best is trial 21 with value: 0.8569157614523671.[0m


ROC AUC Score of XGBoost = 0.8569127196938839






[32m[I 2021-10-31 16:32:12,142][0m Trial 24 finished with value: 0.8569058414247012 and parameters: {'n_estimators': 8802, 'depth': 7, 'learning_rate': 0.0010791285619214564, 'reg_alpha': 0.01653999121424828, 'reg_lambda': 1.1246285644950107, 'subsample': 0.7144563043864602, 'min_child_weight': 7.411141779520299, 'colsample_bytree': 0.6975115409747275, 'gamma': 6.149119270638638}. Best is trial 21 with value: 0.8569157614523671.[0m


ROC AUC Score of XGBoost = 0.8569058414247012






[32m[I 2021-10-31 16:32:54,348][0m Trial 25 finished with value: 0.8569164899543988 and parameters: {'n_estimators': 7194, 'depth': 8, 'learning_rate': 0.002025764200289733, 'reg_alpha': 0.003374199381134067, 'reg_lambda': 0.5424935537649992, 'subsample': 0.5683162795967847, 'min_child_weight': 3.51327029569875, 'colsample_bytree': 0.5414403310742756, 'gamma': 7.870852281537746}. Best is trial 25 with value: 0.8569164899543988.[0m


ROC AUC Score of XGBoost = 0.8569164899543988






[32m[I 2021-10-31 16:33:20,732][0m Trial 26 finished with value: 0.856907126478285 and parameters: {'n_estimators': 7064, 'depth': 7, 'learning_rate': 0.00912792192333895, 'reg_alpha': 0.0036460165830851324, 'reg_lambda': 0.3615479561671184, 'subsample': 0.5639871677301805, 'min_child_weight': 3.35986117127564, 'colsample_bytree': 0.540240281713268, 'gamma': 7.33310929636887}. Best is trial 25 with value: 0.8569164899543988.[0m


ROC AUC Score of XGBoost = 0.856907126478285






[32m[I 2021-10-31 16:34:01,108][0m Trial 27 finished with value: 0.8569016166629186 and parameters: {'n_estimators': 5742, 'depth': 8, 'learning_rate': 0.002100080328741079, 'reg_alpha': 0.01975816090645588, 'reg_lambda': 0.11283899008178311, 'subsample': 0.8214553849136108, 'min_child_weight': 1.7787066751575766, 'colsample_bytree': 0.621755689219156, 'gamma': 7.955214485933839}. Best is trial 25 with value: 0.8569164899543988.[0m


ROC AUC Score of XGBoost = 0.8569016166629186






[32m[I 2021-10-31 16:35:11,981][0m Trial 28 finished with value: 0.8568975106514674 and parameters: {'n_estimators': 7662, 'depth': 7, 'learning_rate': 0.0010083871852237618, 'reg_alpha': 0.002483369897957641, 'reg_lambda': 7.517381111385791, 'subsample': 0.9217051453210541, 'min_child_weight': 3.185725469204564, 'colsample_bytree': 0.531155817343147, 'gamma': 5.594478605700383}. Best is trial 25 with value: 0.8569164899543988.[0m


ROC AUC Score of XGBoost = 0.8568975106514674






[32m[I 2021-10-31 16:36:07,869][0m Trial 29 finished with value: 0.8568197122844956 and parameters: {'n_estimators': 8838, 'depth': 8, 'learning_rate': 0.00474541703870553, 'reg_alpha': 0.00827532438053137, 'reg_lambda': 0.045595015201187104, 'subsample': 0.5007180429400027, 'min_child_weight': 1.3144162658774237, 'colsample_bytree': 0.6796102022110047, 'gamma': 4.59127813724702}. Best is trial 25 with value: 0.8569164899543988.[0m


ROC AUC Score of XGBoost = 0.8568197122844956






[32m[I 2021-10-31 16:36:54,595][0m Trial 30 finished with value: 0.856911049589226 and parameters: {'n_estimators': 8913, 'depth': 8, 'learning_rate': 0.0019417644688080472, 'reg_alpha': 0.03756232256284382, 'reg_lambda': 1.9626224850638216, 'subsample': 0.5788964062629987, 'min_child_weight': 3.7559614477217957, 'colsample_bytree': 0.7334567004168044, 'gamma': 9.460384603620344}. Best is trial 25 with value: 0.8569164899543988.[0m


ROC AUC Score of XGBoost = 0.856911049589226






[32m[I 2021-10-31 16:37:29,029][0m Trial 31 finished with value: 0.8569165048544404 and parameters: {'n_estimators': 7651, 'depth': 8, 'learning_rate': 0.003947544472693994, 'reg_alpha': 0.0011070124064676246, 'reg_lambda': 0.8108289711001738, 'subsample': 0.6080110794163263, 'min_child_weight': 5.147083508731836, 'colsample_bytree': 0.5984193538905249, 'gamma': 8.676722721686408}. Best is trial 31 with value: 0.8569165048544404.[0m


ROC AUC Score of XGBoost = 0.8569165048544404






[32m[I 2021-10-31 16:38:06,358][0m Trial 32 finished with value: 0.8569074509791901 and parameters: {'n_estimators': 7589, 'depth': 8, 'learning_rate': 0.00410627336404385, 'reg_alpha': 0.00823801891592462, 'reg_lambda': 0.4885150715023978, 'subsample': 0.5582552237059151, 'min_child_weight': 6.510438868090795, 'colsample_bytree': 0.6045551900068308, 'gamma': 6.462106096712658}. Best is trial 31 with value: 0.8569165048544404.[0m


ROC AUC Score of XGBoost = 0.8569074509791901






[32m[I 2021-10-31 16:38:32,259][0m Trial 33 finished with value: 0.8569145533489979 and parameters: {'n_estimators': 6759, 'depth': 7, 'learning_rate': 0.008530993382421507, 'reg_alpha': 0.0029535255061163695, 'reg_lambda': 0.22153391584715273, 'subsample': 0.5936539702037942, 'min_child_weight': 4.8519584955124175, 'colsample_bytree': 0.5389434085392616, 'gamma': 8.342400490362532}. Best is trial 31 with value: 0.8569165048544404.[0m


ROC AUC Score of XGBoost = 0.8569145533489979






[32m[I 2021-10-31 16:38:59,481][0m Trial 34 finished with value: 0.85691558325187 and parameters: {'n_estimators': 6894, 'depth': 8, 'learning_rate': 0.00774594674275648, 'reg_alpha': 0.002925190472502194, 'reg_lambda': 0.2538322121045323, 'subsample': 0.5932075668051952, 'min_child_weight': 2.3078218403464676, 'colsample_bytree': 0.5460190002574195, 'gamma': 7.91507327562635}. Best is trial 31 with value: 0.8569165048544404.[0m


ROC AUC Score of XGBoost = 0.85691558325187






[32m[I 2021-10-31 16:39:20,489][0m Trial 35 finished with value: 0.856907964080621 and parameters: {'n_estimators': 4692, 'depth': 8, 'learning_rate': 0.007936309933334183, 'reg_alpha': 0.005978971335883916, 'reg_lambda': 0.6595339698212679, 'subsample': 0.6678245216424936, 'min_child_weight': 2.553809024397921, 'colsample_bytree': 0.6654927785389436, 'gamma': 7.683649959400888}. Best is trial 31 with value: 0.8569165048544404.[0m


ROC AUC Score of XGBoost = 0.856907964080621






[32m[I 2021-10-31 16:39:45,456][0m Trial 36 finished with value: 0.8568908413328673 and parameters: {'n_estimators': 7358, 'depth': 8, 'learning_rate': 0.030084852759592336, 'reg_alpha': 0.002317777878560706, 'reg_lambda': 0.08134827925405787, 'subsample': 0.7139603031627981, 'min_child_weight': 0.05222603038885687, 'colsample_bytree': 0.5545027526689944, 'gamma': 6.879472747450057}. Best is trial 31 with value: 0.8569165048544404.[0m


ROC AUC Score of XGBoost = 0.8568908413328673






[32m[I 2021-10-31 16:40:09,878][0m Trial 37 finished with value: 0.8568864060704979 and parameters: {'n_estimators': 5831, 'depth': 8, 'learning_rate': 0.013878428298327835, 'reg_alpha': 0.05272988320889208, 'reg_lambda': 0.013378598152516429, 'subsample': 0.5986571074522543, 'min_child_weight': 0.9497733558064256, 'colsample_bytree': 0.6230341406761042, 'gamma': 5.631900439288703}. Best is trial 31 with value: 0.8569165048544404.[0m


ROC AUC Score of XGBoost = 0.8568864060704979






[32m[I 2021-10-31 16:40:38,278][0m Trial 38 finished with value: 0.8569086363324958 and parameters: {'n_estimators': 6588, 'depth': 3, 'learning_rate': 0.0021441204323839652, 'reg_alpha': 0.01446673767634864, 'reg_lambda': 0.20497350718019117, 'subsample': 0.6556529863513626, 'min_child_weight': 2.6636099902218446, 'colsample_bytree': 0.5211646027411042, 'gamma': 8.253623844455348}. Best is trial 31 with value: 0.8569165048544404.[0m


ROC AUC Score of XGBoost = 0.8569086363324958






[32m[I 2021-10-31 16:41:10,826][0m Trial 39 finished with value: 0.8569110576392487 and parameters: {'n_estimators': 7108, 'depth': 8, 'learning_rate': 0.004421869370624678, 'reg_alpha': 0.15960883831209602, 'reg_lambda': 0.36238723312150584, 'subsample': 0.7015192765029804, 'min_child_weight': 4.218490430648032, 'colsample_bytree': 0.5037657506628094, 'gamma': 9.288221846194357}. Best is trial 31 with value: 0.8569165048544404.[0m


ROC AUC Score of XGBoost = 0.8569110576392487






[32m[I 2021-10-31 16:41:38,561][0m Trial 40 finished with value: 0.8567318874895611 and parameters: {'n_estimators': 9272, 'depth': 6, 'learning_rate': 0.2956614597706401, 'reg_alpha': 0.004941760815546605, 'reg_lambda': 2.645876677055612, 'subsample': 0.5887663591658137, 'min_child_weight': 6.002229784098234, 'colsample_bytree': 0.5619507815615987, 'gamma': 8.628662244315043}. Best is trial 31 with value: 0.8569165048544404.[0m


ROC AUC Score of XGBoost = 0.8567318874895611






[32m[I 2021-10-31 16:42:05,640][0m Trial 41 finished with value: 0.8569154894516086 and parameters: {'n_estimators': 6983, 'depth': 7, 'learning_rate': 0.00689666976542476, 'reg_alpha': 0.0027249521748128, 'reg_lambda': 0.24769351426219993, 'subsample': 0.5974905433921965, 'min_child_weight': 4.686795335693375, 'colsample_bytree': 0.5342456518862059, 'gamma': 8.041880345471583}. Best is trial 31 with value: 0.8569165048544404.[0m


ROC AUC Score of XGBoost = 0.8569154894516086






[32m[I 2021-10-31 16:42:26,311][0m Trial 42 finished with value: 0.8569106157880164 and parameters: {'n_estimators': 6121, 'depth': 8, 'learning_rate': 0.017259718643401183, 'reg_alpha': 0.001989885057975289, 'reg_lambda': 0.26735854981695023, 'subsample': 0.6203300184367349, 'min_child_weight': 4.487912672728222, 'colsample_bytree': 0.5564040818648989, 'gamma': 9.957463989346724}. Best is trial 31 with value: 0.8569165048544404.[0m


ROC AUC Score of XGBoost = 0.8569106157880164






[32m[I 2021-10-31 16:43:00,279][0m Trial 43 finished with value: 0.8569198162136754 and parameters: {'n_estimators': 8398, 'depth': 7, 'learning_rate': 0.005152495267590912, 'reg_alpha': 0.003501877866082655, 'reg_lambda': 0.5892650659577978, 'subsample': 0.5321195352481216, 'min_child_weight': 1.7950489828988663, 'colsample_bytree': 0.6100580666401978, 'gamma': 7.836182280294878}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569198162136754






[32m[I 2021-10-31 16:43:36,745][0m Trial 44 finished with value: 0.8569086894826441 and parameters: {'n_estimators': 8367, 'depth': 8, 'learning_rate': 0.004945324856536367, 'reg_alpha': 0.011721886473175399, 'reg_lambda': 0.6494801521781453, 'subsample': 0.5318179757314384, 'min_child_weight': 2.083166757591901, 'colsample_bytree': 0.6153316026166756, 'gamma': 6.69244318537931}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569086894826441






[32m[I 2021-10-31 16:44:18,400][0m Trial 45 finished with value: 0.8569099848862569 and parameters: {'n_estimators': 7519, 'depth': 7, 'learning_rate': 0.0021278292763556067, 'reg_alpha': 0.3158434640666459, 'reg_lambda': 0.7248642227664047, 'subsample': 0.5372944691108633, 'min_child_weight': 1.5279715319775722, 'colsample_bytree': 0.6631361354560831, 'gamma': 7.7119808784993245}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569099848862569






[32m[I 2021-10-31 16:45:15,488][0m Trial 46 finished with value: 0.8569123289427942 and parameters: {'n_estimators': 9403, 'depth': 8, 'learning_rate': 0.001488748210746428, 'reg_alpha': 0.005761903175873231, 'reg_lambda': 1.4680709514285828, 'subsample': 0.5209686860699321, 'min_child_weight': 3.588282758285901, 'colsample_bytree': 0.6386443413238928, 'gamma': 7.378203052278044}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569123289427942






[32m[I 2021-10-31 16:45:36,849][0m Trial 47 finished with value: 0.8569142562981693 and parameters: {'n_estimators': 4630, 'depth': 7, 'learning_rate': 0.005557997996561788, 'reg_alpha': 0.0032820955889409685, 'reg_lambda': 0.0030341415509801983, 'subsample': 0.5719096847724411, 'min_child_weight': 2.7644662777419864, 'colsample_bytree': 0.917951518973829, 'gamma': 9.247322280543981}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569142562981693






[32m[I 2021-10-31 16:46:26,272][0m Trial 48 finished with value: 0.8569095747851132 and parameters: {'n_estimators': 8401, 'depth': 8, 'learning_rate': 0.0016467360336050545, 'reg_alpha': 0.0016559264310235745, 'reg_lambda': 4.3639854708378385, 'subsample': 0.6722603601285471, 'min_child_weight': 0.6780242006077817, 'colsample_bytree': 0.5949677386835913, 'gamma': 8.418302961894888}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569095747851132






[32m[I 2021-10-31 16:47:07,265][0m Trial 49 finished with value: 0.8568835153124359 and parameters: {'n_estimators': 6462, 'depth': 6, 'learning_rate': 0.003945468625362624, 'reg_alpha': 0.007760199394902381, 'reg_lambda': 0.11266106636485763, 'subsample': 0.5492515496099784, 'min_child_weight': 1.2645880998749695, 'colsample_bytree': 0.5749884330185173, 'gamma': 3.9719046677849033}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8568835153124359






[32m[I 2021-10-31 16:48:05,619][0m Trial 50 finished with value: 0.8568472584613192 and parameters: {'n_estimators': 5722, 'depth': 7, 'learning_rate': 0.0024523963341764603, 'reg_alpha': 0.00424473536057114, 'reg_lambda': 2.4945090919345496, 'subsample': 0.6426202064947102, 'min_child_weight': 8.41095864999208, 'colsample_bytree': 0.8562762917677651, 'gamma': 1.1587226167876357}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8568472584613192






[32m[I 2021-10-31 16:48:31,365][0m Trial 51 finished with value: 0.8569116085907852 and parameters: {'n_estimators': 6950, 'depth': 5, 'learning_rate': 0.006575340535293173, 'reg_alpha': 0.0024865930733404483, 'reg_lambda': 0.41617078002579694, 'subsample': 0.6000234180832185, 'min_child_weight': 5.176876915355959, 'colsample_bytree': 0.5429840155834513, 'gamma': 8.00234192865881}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569116085907852






[32m[I 2021-10-31 16:49:01,292][0m Trial 52 finished with value: 0.8569121621923292 and parameters: {'n_estimators': 8352, 'depth': 7, 'learning_rate': 0.0070776199527980855, 'reg_alpha': 0.001105290048491268, 'reg_lambda': 0.05053237983138507, 'subsample': 0.58347509552859, 'min_child_weight': 3.8822693160041344, 'colsample_bytree': 0.5130001651122527, 'gamma': 8.861456312391875}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569121621923292






[32m[I 2021-10-31 16:49:27,763][0m Trial 53 finished with value: 0.85691554735177 and parameters: {'n_estimators': 7271, 'depth': 7, 'learning_rate': 0.010324002978485205, 'reg_alpha': 0.0018369958263996355, 'reg_lambda': 0.2531769281825454, 'subsample': 0.5069565925827056, 'min_child_weight': 6.064351640277105, 'colsample_bytree': 0.5659537579223148, 'gamma': 7.547338343500332}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.85691554735177






[32m[I 2021-10-31 16:49:55,636][0m Trial 54 finished with value: 0.8569041222699066 and parameters: {'n_estimators': 7725, 'depth': 8, 'learning_rate': 0.010477326633654799, 'reg_alpha': 0.001574176722812077, 'reg_lambda': 0.13391349377815584, 'subsample': 0.5101721048709404, 'min_child_weight': 7.050723094153504, 'colsample_bytree': 0.6372957626914384, 'gamma': 7.60355304823981}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569041222699066






[32m[I 2021-10-31 16:50:19,730][0m Trial 55 finished with value: 0.8568920953363646 and parameters: {'n_estimators': 7351, 'depth': 6, 'learning_rate': 0.024370482961329962, 'reg_alpha': 0.004197795368362081, 'reg_lambda': 1.0149230721857982, 'subsample': 0.530055205044762, 'min_child_weight': 5.77255454551217, 'colsample_bytree': 0.6087382626169092, 'gamma': 7.164704810272058}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8568920953363646






[32m[I 2021-10-31 16:50:40,753][0m Trial 56 finished with value: 0.8569168926055217 and parameters: {'n_estimators': 6131, 'depth': 8, 'learning_rate': 0.015702650853240047, 'reg_alpha': 0.01104309966265359, 'reg_lambda': 0.6954270024091173, 'subsample': 0.5470242359968996, 'min_child_weight': 6.045592195444086, 'colsample_bytree': 0.569351780921409, 'gamma': 8.568472766305646}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569168926055217






[32m[I 2021-10-31 16:51:00,946][0m Trial 57 finished with value: 0.8569083443816816 and parameters: {'n_estimators': 6464, 'depth': 8, 'learning_rate': 0.04418804318525734, 'reg_alpha': 0.006101047300691368, 'reg_lambda': 0.5806999501147557, 'subsample': 0.5526016790817966, 'min_child_weight': 8.24413182368351, 'colsample_bytree': 0.5890069847681919, 'gamma': 9.766177827262702}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569083443816816






[32m[I 2021-10-31 16:51:18,935][0m Trial 58 finished with value: 0.8569062130257376 and parameters: {'n_estimators': 4975, 'depth': 8, 'learning_rate': 0.013821795311471681, 'reg_alpha': 0.024021482170833566, 'reg_lambda': 1.4443745881884387, 'subsample': 0.6226462290711287, 'min_child_weight': 5.304115509889047, 'colsample_bytree': 0.5023928449827151, 'gamma': 8.560288006262232}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569062130257376






[32m[I 2021-10-31 16:51:50,598][0m Trial 59 finished with value: 0.8569004184595771 and parameters: {'n_estimators': 6159, 'depth': 8, 'learning_rate': 0.0032925027892580644, 'reg_alpha': 0.010786698327584684, 'reg_lambda': 0.8804690528545144, 'subsample': 0.7938929612497877, 'min_child_weight': 6.945964553178131, 'colsample_bytree': 0.6564888123674865, 'gamma': 9.022010578681842}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569004184595771






[32m[I 2021-10-31 16:53:21,838][0m Trial 60 finished with value: 0.8568525504760781 and parameters: {'n_estimators': 7902, 'depth': 8, 'learning_rate': 0.0012514477303947895, 'reg_alpha': 0.06376677905874807, 'reg_lambda': 6.648021077057055, 'subsample': 0.5736987296052254, 'min_child_weight': 1.956912360578048, 'colsample_bytree': 0.7999072482814998, 'gamma': 2.673010945363621}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8568525504760781






[32m[I 2021-10-31 16:53:43,737][0m Trial 61 finished with value: 0.8569126404936631 and parameters: {'n_estimators': 6741, 'depth': 7, 'learning_rate': 0.01681468066890726, 'reg_alpha': 0.0015201649814034636, 'reg_lambda': 0.4116542165445966, 'subsample': 0.539884243375963, 'min_child_weight': 5.863466711163901, 'colsample_bytree': 0.5605268415451673, 'gamma': 8.152167490697154}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569126404936631






[32m[I 2021-10-31 16:54:07,388][0m Trial 62 finished with value: 0.8569073103287979 and parameters: {'n_estimators': 5575, 'depth': 8, 'learning_rate': 0.00533803873214827, 'reg_alpha': 0.0010438635300909136, 'reg_lambda': 0.15744353596298255, 'subsample': 0.5004542401341248, 'min_child_weight': 6.043925362080036, 'colsample_bytree': 0.5804971152677676, 'gamma': 8.616686127079127}. Best is trial 43 with value: 0.8569198162136754.[0m


ROC AUC Score of XGBoost = 0.8569073103287979




KeyboardInterrupt: 

In [None]:
# dump(study, filename=datapath/f"optuna_xgboost_100trials-complete_{datetime.now().strftime('%Y%m%d')}.joblib")
# dump(study.best_trial.params, filename=datapath/f"optuna_lightgbm_all-500trials-best_{datetime.now().strftime('%Y%m%d')}.joblib")
# pickle.dump(study.best_trial.params, open('CatBoost_Hyperparameter.pickle', 'wb'))
# print('CatBoost Hyperparameter:', study.best_trial.params)

Best on original dataset was:

```python
{'n_estimators': 3878,
 'depth': 4,
 'learning_rate': 0.024785857161974977,
 'reg_alpha': 26.867682044658245,
 'reg_lambda': 10.839759074147148,
 'subsample': 0.8208581489835881,
 'min_child_weight': 8.829122644339664,
 'colsample_bytree': 0.906420714280384,
 'gamma': 1.472322916021486}
```

In [25]:
study.best_trial.params

{'n_estimators': 8398,
 'depth': 7,
 'learning_rate': 0.005152495267590912,
 'reg_alpha': 0.003501877866082655,
 'reg_lambda': 0.5892650659577978,
 'subsample': 0.5321195352481216,
 'min_child_weight': 1.7950489828988663,
 'colsample_bytree': 0.6100580666401978,
 'gamma': 7.836182280294878}

In [26]:
wandb.log({'xgboost_params': study.best_trial.params})
wandb.finish()

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
valid_auc,▇▅█▆█▅███▁██████████████████████▇█████▇█

0,1
valid_auc,0.85691


In [None]:
optuna.visualization.plot_parallel_coordinate(study)