# Searching for best hyperparameters set

In [188]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import cross_val_score
import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances, plot_slice, plot_parallel_coordinate
from itertools import chain, combinations
import plotly.express as px

In [189]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [190]:
train_df = pd.read_feather('./data/train_processed.ftr')
val_df = pd.read_feather('./data/val_processed.ftr')
cols = list(train_df.columns)

In [191]:
cols

['is_g734s',
 'CryoSleep',
 'VIP',
 'Europa',
 'Mars',
 'PSO J318.5-22',
 'TRAPPIST-1e',
 'Age',
 'RoomService',
 'FoodCourt',
 'ShoppingMall',
 'Spa',
 'VRDeck',
 'segment',
 'Transported']

## Prepare AdaBoost model

In [192]:
train_x = train_df[cols[:-1]].to_numpy()
train_y = train_df[cols[-1]].to_numpy()
val_x = val_df[cols[:-1]].to_numpy()
val_y = val_df[cols[-1]].to_numpy()

In [193]:
class ModelTester:
    
    def __init__(
        self,
        params: dict,
        train_x: np.array,
        train_y: np.array,
        val_x: np.array,
        val_y: np.array
    ):
        self.params = params
        self.train_x = train_x
        self.train_y = train_y
        self.val_x = val_x
        self.val_y = val_y
        
        self.model = self.train_model()
        self.y_pred_bin = self.get_bin_predictions()
            
    def train_model(self):
        # 1 - Run model
        tree = DecisionTreeClassifier(
            min_samples_split=params['min_samples_split'],
            max_depth=params['max_depth'],
            min_impurity_decrease=params['min_impurity_decrease'],
            criterion=params['criterion']
        )
        model = AdaBoostClassifier(
            base_estimator=tree,
            n_estimators=self.params['n_estimators'],
            learning_rate=self.params['learning_rate'],
            algorithm=self.params['algorithm'],
        )
        model.fit(self.train_x, self.train_y)
        return model
    
    def get_bin_predictions(self):
        # 2 - Get predictions
        y_pred = self.model.predict(val_x)
        y_pred_bin = [1 if y else 0 for y in y_pred]
        
        return y_pred_bin
    
    def get_accuracy(self):
        return round(accuracy_score(self.val_y, self.y_pred_bin), 3)
    
    def get_class_report(self):
        return classification_report(self.val_y, self.y_pred_bin)    

## Optuna research

In [194]:
def objective(trial):
    # 0 - Prepare params
    n_estimators = trial.suggest_int("n_estimators", 10, 200)
    algorithm = trial.suggest_categorical('algorithm', ['SAMME', 'SAMME.R'])
    learning_rate = trial.suggest_float('learning_rate', 0.001, 0.5)
    max_depth = trial.suggest_int('max_depth', 1, 100)
    min_samples_split = trial.suggest_int('min_samples_split', 1, 100)
    min_impurity_decrease = trial.suggest_float('min_impurity_decrease', 0, 0.0001)
    criterion = trial.suggest_categorical('criterion', ['gini', 'entropy', 'logg_loss'])
    
    params = {
        'n_estimators': n_estimators,
        'learning_rate': learning_rate,
        'algorithm': algorithm,
        'max_depth': max_depth,
        'min_samples_split': min_samples_split,
        'min_impurity_decrease': min_impurity_decrease,
        'criterion': criterion
    }
    
    # 1 - Model
    model_tester = ModelTester(params, train_x, train_y, val_x, val_y)
    acc = model_tester.get_accuracy()
    error = 1 - acc
    
    return error

In [50]:
study = optuna.create_study()
study.optimize(objective, n_trials=1000)

[32m[I 2022-07-01 14:07:06,516][0m A new study created in memory with name: no-name-2b4906b0-72d6-48c2-8cfd-3abae7f25359[0m
[32m[I 2022-07-01 14:07:06,810][0m Trial 0 finished with value: 0.20899999999999996 and parameters: {'n_estimators': 42, 'algorithm': 'SAMME.R', 'learning_rate': 0.3581669108603753, 'max_depth': 85, 'min_samples_split': 46, 'min_impurity_decrease': 9.809316965828954e-05, 'criterion': 'entropy'}. Best is trial 0 with value: 0.20899999999999996.[0m
[32m[I 2022-07-01 14:07:07,344][0m Trial 1 finished with value: 0.20099999999999996 and parameters: {'n_estimators': 82, 'algorithm': 'SAMME.R', 'learning_rate': 0.36698468988769967, 'max_depth': 7, 'min_samples_split': 50, 'min_impurity_decrease': 8.823835872333633e-05, 'criterion': 'entropy'}. Best is trial 1 with value: 0.20099999999999996.[0m
[32m[I 2022-07-01 14:07:07,576][0m Trial 2 finished with value: 0.21099999999999997 and parameters: {'n_estimators': 33, 'algorithm': 'SAMME.R', 'learning_rate': 0.410

[32m[I 2022-07-01 14:07:22,367][0m Trial 24 finished with value: 0.19999999999999996 and parameters: {'n_estimators': 87, 'algorithm': 'SAMME.R', 'learning_rate': 0.43217092378723465, 'max_depth': 14, 'min_samples_split': 9, 'min_impurity_decrease': 7.198408196599186e-05, 'criterion': 'entropy'}. Best is trial 17 with value: 0.19699999999999995.[0m
[32m[I 2022-07-01 14:07:22,951][0m Trial 25 finished with value: 0.21199999999999997 and parameters: {'n_estimators': 96, 'algorithm': 'SAMME.R', 'learning_rate': 0.11399082886209569, 'max_depth': 45, 'min_samples_split': 31, 'min_impurity_decrease': 8.392579230814408e-05, 'criterion': 'logg_loss'}. Best is trial 17 with value: 0.19699999999999995.[0m
[32m[I 2022-07-01 14:07:23,421][0m Trial 26 finished with value: 0.20399999999999996 and parameters: {'n_estimators': 71, 'algorithm': 'SAMME.R', 'learning_rate': 0.3283708601848096, 'max_depth': 65, 'min_samples_split': 64, 'min_impurity_decrease': 7.616875634458311e-05, 'criterion': '

[32m[I 2022-07-01 14:07:43,439][0m Trial 48 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 181, 'algorithm': 'SAMME.R', 'learning_rate': 0.25820759308043173, 'max_depth': 94, 'min_samples_split': 6, 'min_impurity_decrease': 8.793869642780979e-05, 'criterion': 'gini'}. Best is trial 17 with value: 0.19699999999999995.[0m
[32m[I 2022-07-01 14:07:44,463][0m Trial 49 finished with value: 0.237 and parameters: {'n_estimators': 194, 'algorithm': 'SAMME', 'learning_rate': 0.2859607278647748, 'max_depth': 91, 'min_samples_split': 5, 'min_impurity_decrease': 9.109845587791916e-05, 'criterion': 'logg_loss'}. Best is trial 17 with value: 0.19699999999999995.[0m
[32m[I 2022-07-01 14:07:45,550][0m Trial 50 finished with value: 0.19899999999999995 and parameters: {'n_estimators': 182, 'algorithm': 'SAMME.R', 'learning_rate': 0.26146525285240907, 'max_depth': 98, 'min_samples_split': 5, 'min_impurity_decrease': 8.671615753452972e-05, 'criterion': 'gini'}. Best is tr

[32m[I 2022-07-01 14:08:00,498][0m Trial 72 finished with value: 0.19499999999999995 and parameters: {'n_estimators': 110, 'algorithm': 'SAMME.R', 'learning_rate': 0.4767276789201188, 'max_depth': 19, 'min_samples_split': 36, 'min_impurity_decrease': 1.1613464732863478e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:08:01,053][0m Trial 73 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 90, 'algorithm': 'SAMME.R', 'learning_rate': 0.4744104420160877, 'max_depth': 23, 'min_samples_split': 42, 'min_impurity_decrease': 1.8886790664043046e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:08:01,741][0m Trial 74 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 102, 'algorithm': 'SAMME.R', 'learning_rate': 0.4846278230838988, 'max_depth': 18, 'min_samples_split': 42, 'min_impurity_decrease': 1.0830608560292116e-05, 'criteri

[32m[I 2022-07-01 14:08:15,593][0m Trial 95 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 93, 'algorithm': 'SAMME.R', 'learning_rate': 0.4402731273858903, 'max_depth': 21, 'min_samples_split': 39, 'min_impurity_decrease': 2.7819990776468042e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:08:16,084][0m Trial 96 finished with value: 0.19999999999999996 and parameters: {'n_estimators': 78, 'algorithm': 'SAMME.R', 'learning_rate': 0.4469332813955577, 'max_depth': 19, 'min_samples_split': 40, 'min_impurity_decrease': 4.524030868889235e-06, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:08:16,758][0m Trial 97 finished with value: 0.19899999999999995 and parameters: {'n_estimators': 102, 'algorithm': 'SAMME.R', 'learning_rate': 0.4148584009558838, 'max_depth': 14, 'min_samples_split': 23, 'min_impurity_decrease': 3.093432139409756e-05, 'criterion'

[32m[I 2022-07-01 14:08:30,515][0m Trial 118 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 78, 'algorithm': 'SAMME.R', 'learning_rate': 0.4794344901017529, 'max_depth': 9, 'min_samples_split': 58, 'min_impurity_decrease': 5.421080877846444e-08, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:08:31,102][0m Trial 119 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 92, 'algorithm': 'SAMME.R', 'learning_rate': 0.46884446288106824, 'max_depth': 14, 'min_samples_split': 54, 'min_impurity_decrease': 1.1256408663477572e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:08:31,575][0m Trial 120 finished with value: 0.238 and parameters: {'n_estimators': 87, 'algorithm': 'SAMME', 'learning_rate': 0.4400530513603742, 'max_depth': 6, 'min_samples_split': 71, 'min_impurity_decrease': 2.085894114835937e-06, 'criterion': 'logg_loss'}.

[32m[I 2022-07-01 14:08:45,964][0m Trial 142 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 109, 'algorithm': 'SAMME.R', 'learning_rate': 0.46942907300340037, 'max_depth': 7, 'min_samples_split': 74, 'min_impurity_decrease': 4.570829158396234e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:08:46,686][0m Trial 143 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 109, 'algorithm': 'SAMME.R', 'learning_rate': 0.4815348566665483, 'max_depth': 9, 'min_samples_split': 86, 'min_impurity_decrease': 3.6723863293666975e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:08:47,452][0m Trial 144 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 120, 'algorithm': 'SAMME.R', 'learning_rate': 0.49126556646770037, 'max_depth': 5, 'min_samples_split': 73, 'min_impurity_decrease': 4.356660996906438e-05, 'criter

[32m[I 2022-07-01 14:09:01,483][0m Trial 166 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 122, 'algorithm': 'SAMME.R', 'learning_rate': 0.49146331820159095, 'max_depth': 8, 'min_samples_split': 71, 'min_impurity_decrease': 8.44970513815607e-06, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:09:02,162][0m Trial 167 finished with value: 0.19799999999999995 and parameters: {'n_estimators': 101, 'algorithm': 'SAMME.R', 'learning_rate': 0.43707620199180286, 'max_depth': 1, 'min_samples_split': 69, 'min_impurity_decrease': 7.934745546734785e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:09:02,330][0m Trial 168 finished with value: 0.21599999999999997 and parameters: {'n_estimators': 29, 'algorithm': 'SAMME.R', 'learning_rate': 0.3491132806575244, 'max_depth': 4, 'min_samples_split': 68, 'min_impurity_decrease': 1.3687286344007131e-05, 'criterion': '

[32m[I 2022-07-01 14:09:17,163][0m Trial 190 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 140, 'algorithm': 'SAMME.R', 'learning_rate': 0.4762218494456837, 'max_depth': 5, 'min_samples_split': 57, 'min_impurity_decrease': 9.611409793654221e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:09:17,963][0m Trial 191 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 127, 'algorithm': 'SAMME.R', 'learning_rate': 0.4823138016071563, 'max_depth': 3, 'min_samples_split': 64, 'min_impurity_decrease': 9.144923799189423e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:09:18,753][0m Trial 192 finished with value: 0.19499999999999995 and parameters: {'n_estimators': 134, 'algorithm': 'SAMME.R', 'learning_rate': 0.4819293589516322, 'max_depth': 6, 'min_samples_split': 66, 'min_impurity_decrease': 9.538041594854854e-05, 'criterion': 'gini'}

[32m[I 2022-07-01 14:09:34,406][0m Trial 214 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 110, 'algorithm': 'SAMME.R', 'learning_rate': 0.4720308918388257, 'max_depth': 6, 'min_samples_split': 71, 'min_impurity_decrease': 7.18922589675664e-06, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:09:35,024][0m Trial 215 finished with value: 0.19999999999999996 and parameters: {'n_estimators': 108, 'algorithm': 'SAMME.R', 'learning_rate': 0.31528726548233654, 'max_depth': 1, 'min_samples_split': 66, 'min_impurity_decrease': 3.070672565567378e-06, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:09:35,808][0m Trial 216 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 120, 'algorithm': 'SAMME.R', 'learning_rate': 0.48188113967529445, 'max_depth': 3, 'min_samples_split': 59, 'min_impurity_decrease': 6.193946806210437e-06, 'criterion': 'en

[32m[I 2022-07-01 14:09:52,004][0m Trial 238 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 110, 'algorithm': 'SAMME.R', 'learning_rate': 0.48334027180310535, 'max_depth': 1, 'min_samples_split': 71, 'min_impurity_decrease': 8.415675219598655e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:09:52,725][0m Trial 239 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 98, 'algorithm': 'SAMME.R', 'learning_rate': 0.4747954628292589, 'max_depth': 10, 'min_samples_split': 51, 'min_impurity_decrease': 7.739622840174048e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:09:53,451][0m Trial 240 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 112, 'algorithm': 'SAMME.R', 'learning_rate': 0.4872239812733546, 'max_depth': 1, 'min_samples_split': 71, 'min_impurity_decrease': 5.734829137660913e-06, 'criterion': 'en

[32m[I 2022-07-01 14:10:09,923][0m Trial 262 finished with value: 0.23199999999999998 and parameters: {'n_estimators': 106, 'algorithm': 'SAMME', 'learning_rate': 0.47808509169502084, 'max_depth': 8, 'min_samples_split': 1, 'min_impurity_decrease': 5.405780738143451e-06, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:10:10,726][0m Trial 263 finished with value: 0.19899999999999995 and parameters: {'n_estimators': 127, 'algorithm': 'SAMME.R', 'learning_rate': 0.2723211072523962, 'max_depth': 5, 'min_samples_split': 68, 'min_impurity_decrease': 8.472910565791083e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:10:11,502][0m Trial 264 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 123, 'algorithm': 'SAMME.R', 'learning_rate': 0.47456267027962457, 'max_depth': 5, 'min_samples_split': 73, 'min_impurity_decrease': 9.411164700398806e-05, 'criterion': 

[32m[I 2022-07-01 14:10:26,631][0m Trial 286 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 133, 'algorithm': 'SAMME.R', 'learning_rate': 0.4835615436051095, 'max_depth': 10, 'min_samples_split': 16, 'min_impurity_decrease': 4.7601279861424075e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:10:27,256][0m Trial 287 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 106, 'algorithm': 'SAMME.R', 'learning_rate': 0.48764036737956834, 'max_depth': 31, 'min_samples_split': 72, 'min_impurity_decrease': 3.005496672765728e-06, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:10:28,008][0m Trial 288 finished with value: 0.19499999999999995 and parameters: {'n_estimators': 124, 'algorithm': 'SAMME.R', 'learning_rate': 0.490479177622127, 'max_depth': 80, 'min_samples_split': 69, 'min_impurity_decrease': 8.07221855959446e-05, 'criterion': '

[32m[I 2022-07-01 14:10:44,109][0m Trial 310 finished with value: 0.19899999999999995 and parameters: {'n_estimators': 103, 'algorithm': 'SAMME.R', 'learning_rate': 0.3993484288499476, 'max_depth': 11, 'min_samples_split': 49, 'min_impurity_decrease': 8.04404297445021e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:10:44,872][0m Trial 311 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 109, 'algorithm': 'SAMME.R', 'learning_rate': 0.4999023437321838, 'max_depth': 4, 'min_samples_split': 56, 'min_impurity_decrease': 7.68293925723046e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:10:45,671][0m Trial 312 finished with value: 0.20099999999999996 and parameters: {'n_estimators': 129, 'algorithm': 'SAMME.R', 'learning_rate': 0.3624840608506689, 'max_depth': 8, 'min_samples_split': 74, 'min_impurity_decrease': 7.861068130741906e-06, 'criterion': 'lo

[32m[I 2022-07-01 14:11:02,357][0m Trial 334 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 125, 'algorithm': 'SAMME.R', 'learning_rate': 0.48766837862856893, 'max_depth': 13, 'min_samples_split': 71, 'min_impurity_decrease': 3.951304883706756e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:11:03,020][0m Trial 335 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 106, 'algorithm': 'SAMME.R', 'learning_rate': 0.4912505512386625, 'max_depth': 3, 'min_samples_split': 60, 'min_impurity_decrease': 8.754093856641478e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:11:03,717][0m Trial 336 finished with value: 0.19299999999999995 and parameters: {'n_estimators': 108, 'algorithm': 'SAMME.R', 'learning_rate': 0.4919460460850531, 'max_depth': 99, 'min_samples_split': 63, 'min_impurity_decrease': 9.076875704604762e-05, 'criterion': 'gin

[32m[I 2022-07-01 14:11:20,337][0m Trial 358 finished with value: 0.19799999999999995 and parameters: {'n_estimators': 129, 'algorithm': 'SAMME.R', 'learning_rate': 0.2442016820565842, 'max_depth': 48, 'min_samples_split': 58, 'min_impurity_decrease': 9.078947053282707e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:11:21,305][0m Trial 359 finished with value: 0.19999999999999996 and parameters: {'n_estimators': 127, 'algorithm': 'SAMME.R', 'learning_rate': 0.4438622455289446, 'max_depth': 43, 'min_samples_split': 73, 'min_impurity_decrease': 2.3950538341259354e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:11:22,068][0m Trial 360 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 122, 'algorithm': 'SAMME.R', 'learning_rate': 0.49306127693550517, 'max_depth': 32, 'min_samples_split': 36, 'min_impurity_decrease': 7.909928317565106e-05, 'criterion': 'g

[32m[I 2022-07-01 14:11:39,054][0m Trial 382 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 135, 'algorithm': 'SAMME.R', 'learning_rate': 0.48087442191175467, 'max_depth': 53, 'min_samples_split': 57, 'min_impurity_decrease': 9.49334741084971e-06, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:11:39,705][0m Trial 383 finished with value: 0.23099999999999998 and parameters: {'n_estimators': 132, 'algorithm': 'SAMME', 'learning_rate': 0.4708977024337153, 'max_depth': 10, 'min_samples_split': 55, 'min_impurity_decrease': 1.4661223355658117e-06, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:11:40,326][0m Trial 384 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 105, 'algorithm': 'SAMME.R', 'learning_rate': 0.48551311537549274, 'max_depth': 41, 'min_samples_split': 62, 'min_impurity_decrease': 4.5410732505789646e-06, 'criteri

[32m[I 2022-07-01 14:11:56,628][0m Trial 406 finished with value: 0.19899999999999995 and parameters: {'n_estimators': 96, 'algorithm': 'SAMME.R', 'learning_rate': 0.4606880552757442, 'max_depth': 3, 'min_samples_split': 48, 'min_impurity_decrease': 5.237739571611744e-06, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:11:56,996][0m Trial 407 finished with value: 0.19999999999999996 and parameters: {'n_estimators': 61, 'algorithm': 'SAMME.R', 'learning_rate': 0.46870722067377096, 'max_depth': 8, 'min_samples_split': 66, 'min_impurity_decrease': 5.2025056167808805e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:11:57,744][0m Trial 408 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 124, 'algorithm': 'SAMME.R', 'learning_rate': 0.4794968750529267, 'max_depth': 51, 'min_samples_split': 62, 'min_impurity_decrease': 4.231583698420241e-06, 'criterion': '

[32m[I 2022-07-01 14:12:14,463][0m Trial 430 finished with value: 0.19299999999999995 and parameters: {'n_estimators': 108, 'algorithm': 'SAMME.R', 'learning_rate': 0.49104263074958526, 'max_depth': 2, 'min_samples_split': 63, 'min_impurity_decrease': 7.922136900955935e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:12:15,264][0m Trial 431 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 124, 'algorithm': 'SAMME.R', 'learning_rate': 0.4900604581639328, 'max_depth': 7, 'min_samples_split': 65, 'min_impurity_decrease': 8.111978312536885e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:12:15,893][0m Trial 432 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 103, 'algorithm': 'SAMME.R', 'learning_rate': 0.45140536133892323, 'max_depth': 5, 'min_samples_split': 52, 'min_impurity_decrease': 1.1881535575539344e-07, 'criterion': 'log

[32m[I 2022-07-01 14:12:31,962][0m Trial 454 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 117, 'algorithm': 'SAMME.R', 'learning_rate': 0.47390392122781055, 'max_depth': 4, 'min_samples_split': 17, 'min_impurity_decrease': 3.796228424512077e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:12:32,818][0m Trial 455 finished with value: 0.20099999999999996 and parameters: {'n_estimators': 130, 'algorithm': 'SAMME.R', 'learning_rate': 0.22828022520950383, 'max_depth': 70, 'min_samples_split': 65, 'min_impurity_decrease': 4.319070151081555e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:12:33,547][0m Trial 456 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 112, 'algorithm': 'SAMME.R', 'learning_rate': 0.48949425073156794, 'max_depth': 7, 'min_samples_split': 5, 'min_impurity_decrease': 7.602389454930694e-05, 'criterion'

[32m[I 2022-07-01 14:12:51,362][0m Trial 478 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 122, 'algorithm': 'SAMME.R', 'learning_rate': 0.4123917048529713, 'max_depth': 76, 'min_samples_split': 24, 'min_impurity_decrease': 6.86791688354961e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:12:52,122][0m Trial 479 finished with value: 0.19299999999999995 and parameters: {'n_estimators': 122, 'algorithm': 'SAMME.R', 'learning_rate': 0.4848352412918095, 'max_depth': 63, 'min_samples_split': 15, 'min_impurity_decrease': 6.507416445678763e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:12:52,886][0m Trial 480 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 131, 'algorithm': 'SAMME.R', 'learning_rate': 0.49994523140205094, 'max_depth': 80, 'min_samples_split': 19, 'min_impurity_decrease': 2.3560451745461403e-05, 'criterion': 'en

[32m[I 2022-07-01 14:13:10,704][0m Trial 502 finished with value: 0.19499999999999995 and parameters: {'n_estimators': 124, 'algorithm': 'SAMME.R', 'learning_rate': 0.4665805719869687, 'max_depth': 58, 'min_samples_split': 73, 'min_impurity_decrease': 1.4261487152361698e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:13:11,465][0m Trial 503 finished with value: 0.19299999999999995 and parameters: {'n_estimators': 127, 'algorithm': 'SAMME.R', 'learning_rate': 0.47646208685901936, 'max_depth': 80, 'min_samples_split': 55, 'min_impurity_decrease': 3.315233615618855e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:13:12,282][0m Trial 504 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 119, 'algorithm': 'SAMME.R', 'learning_rate': 0.49135986251917085, 'max_depth': 71, 'min_samples_split': 76, 'min_impurity_decrease': 7.611543161654489e-05, 'criterio

[32m[I 2022-07-01 14:13:30,646][0m Trial 526 finished with value: 0.19499999999999995 and parameters: {'n_estimators': 119, 'algorithm': 'SAMME.R', 'learning_rate': 0.48015938461267554, 'max_depth': 10, 'min_samples_split': 30, 'min_impurity_decrease': 1.3812069915505478e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:13:31,412][0m Trial 527 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 114, 'algorithm': 'SAMME.R', 'learning_rate': 0.49980971770396854, 'max_depth': 3, 'min_samples_split': 71, 'min_impurity_decrease': 8.465861810433149e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:13:32,205][0m Trial 528 finished with value: 0.19999999999999996 and parameters: {'n_estimators': 121, 'algorithm': 'SAMME.R', 'learning_rate': 0.4705958242782963, 'max_depth': 5, 'min_samples_split': 67, 'min_impurity_decrease': 8.274446152708443e-05, 'criterion'

[32m[I 2022-07-01 14:13:48,936][0m Trial 550 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 125, 'algorithm': 'SAMME.R', 'learning_rate': 0.4696603589945725, 'max_depth': 41, 'min_samples_split': 35, 'min_impurity_decrease': 5.007997557581611e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:13:49,707][0m Trial 551 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 129, 'algorithm': 'SAMME.R', 'learning_rate': 0.45703626691388116, 'max_depth': 6, 'min_samples_split': 13, 'min_impurity_decrease': 8.536366669078434e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:13:50,444][0m Trial 552 finished with value: 0.20299999999999996 and parameters: {'n_estimators': 122, 'algorithm': 'SAMME.R', 'learning_rate': 0.2014958311434572, 'max_depth': 3, 'min_samples_split': 67, 'min_impurity_decrease': 3.708273200949802e-05, 'criterion

[32m[I 2022-07-01 14:14:06,158][0m Trial 574 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 96, 'algorithm': 'SAMME.R', 'learning_rate': 0.4914864624922073, 'max_depth': 6, 'min_samples_split': 69, 'min_impurity_decrease': 1.286380079420973e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:14:07,010][0m Trial 575 finished with value: 0.19899999999999995 and parameters: {'n_estimators': 129, 'algorithm': 'SAMME.R', 'learning_rate': 0.4609540521863097, 'max_depth': 3, 'min_samples_split': 64, 'min_impurity_decrease': 3.108755169855548e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:14:07,898][0m Trial 576 finished with value: 0.19499999999999995 and parameters: {'n_estimators': 144, 'algorithm': 'SAMME.R', 'learning_rate': 0.4796584258871818, 'max_depth': 5, 'min_samples_split': 60, 'min_impurity_decrease': 6.690541635619302e-05, 'criterion': 'entr

[32m[I 2022-07-01 14:14:24,134][0m Trial 598 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 120, 'algorithm': 'SAMME.R', 'learning_rate': 0.4799018498951189, 'max_depth': 13, 'min_samples_split': 78, 'min_impurity_decrease': 3.175883512194463e-06, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:14:24,715][0m Trial 599 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 98, 'algorithm': 'SAMME.R', 'learning_rate': 0.4907081036748321, 'max_depth': 59, 'min_samples_split': 75, 'min_impurity_decrease': 1.5018942090246811e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:14:25,558][0m Trial 600 finished with value: 0.19299999999999995 and parameters: {'n_estimators': 124, 'algorithm': 'SAMME.R', 'learning_rate': 0.4683976970778522, 'max_depth': 2, 'min_samples_split': 29, 'min_impurity_decrease': 7.105537613212403e-05, 'criterion

[32m[I 2022-07-01 14:14:42,137][0m Trial 622 finished with value: 0.20899999999999996 and parameters: {'n_estimators': 28, 'algorithm': 'SAMME.R', 'learning_rate': 0.4908315046058156, 'max_depth': 76, 'min_samples_split': 69, 'min_impurity_decrease': 9.18779346178683e-06, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:14:43,007][0m Trial 623 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 133, 'algorithm': 'SAMME.R', 'learning_rate': 0.47902233299054064, 'max_depth': 52, 'min_samples_split': 41, 'min_impurity_decrease': 5.470935968179803e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:14:43,638][0m Trial 624 finished with value: 0.19299999999999995 and parameters: {'n_estimators': 104, 'algorithm': 'SAMME.R', 'learning_rate': 0.49274847776680253, 'max_depth': 1, 'min_samples_split': 53, 'min_impurity_decrease': 8.649524714206012e-05, 'criterion': 

[32m[I 2022-07-01 14:15:00,657][0m Trial 646 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 74, 'algorithm': 'SAMME.R', 'learning_rate': 0.4923292588149514, 'max_depth': 77, 'min_samples_split': 67, 'min_impurity_decrease': 1.621089501591195e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:15:01,508][0m Trial 647 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 126, 'algorithm': 'SAMME.R', 'learning_rate': 0.47481260637109635, 'max_depth': 23, 'min_samples_split': 46, 'min_impurity_decrease': 8.878325142176407e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:15:02,275][0m Trial 648 finished with value: 0.19499999999999995 and parameters: {'n_estimators': 121, 'algorithm': 'SAMME.R', 'learning_rate': 0.456528726147327, 'max_depth': 13, 'min_samples_split': 96, 'min_impurity_decrease': 8.262996266330767e-05, 'criterion': 

[32m[I 2022-07-01 14:15:18,642][0m Trial 670 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 110, 'algorithm': 'SAMME.R', 'learning_rate': 0.47221871931337195, 'max_depth': 5, 'min_samples_split': 60, 'min_impurity_decrease': 9.142699099502851e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:15:19,473][0m Trial 671 finished with value: 0.19999999999999996 and parameters: {'n_estimators': 129, 'algorithm': 'SAMME.R', 'learning_rate': 0.45289402997385647, 'max_depth': 3, 'min_samples_split': 50, 'min_impurity_decrease': 8.561615862015363e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:15:20,144][0m Trial 672 finished with value: 0.23299999999999998 and parameters: {'n_estimators': 122, 'algorithm': 'SAMME', 'learning_rate': 0.4808972307701766, 'max_depth': 58, 'min_samples_split': 37, 'min_impurity_decrease': 7.545523541427695e-05, 'criterion': 

[32m[I 2022-07-01 14:15:37,923][0m Trial 694 finished with value: 0.19799999999999995 and parameters: {'n_estimators': 126, 'algorithm': 'SAMME.R', 'learning_rate': 0.4661261316043701, 'max_depth': 37, 'min_samples_split': 51, 'min_impurity_decrease': 1.99863295518379e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:15:38,598][0m Trial 695 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 104, 'algorithm': 'SAMME.R', 'learning_rate': 0.483590011902198, 'max_depth': 8, 'min_samples_split': 71, 'min_impurity_decrease': 1.7879552298072825e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:15:39,217][0m Trial 696 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 102, 'algorithm': 'SAMME.R', 'learning_rate': 0.4850391481703296, 'max_depth': 5, 'min_samples_split': 62, 'min_impurity_decrease': 7.180733382066499e-06, 'criterion': '

[32m[I 2022-07-01 14:15:56,576][0m Trial 718 finished with value: 0.19899999999999995 and parameters: {'n_estimators': 121, 'algorithm': 'SAMME.R', 'learning_rate': 0.45680578889163054, 'max_depth': 26, 'min_samples_split': 45, 'min_impurity_decrease': 8.280084475570498e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:15:57,360][0m Trial 719 finished with value: 0.19499999999999995 and parameters: {'n_estimators': 104, 'algorithm': 'SAMME.R', 'learning_rate': 0.49990515213700587, 'max_depth': 9, 'min_samples_split': 73, 'min_impurity_decrease': 6.1840188935795495e-06, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:15:58,199][0m Trial 720 finished with value: 0.19299999999999995 and parameters: {'n_estimators': 116, 'algorithm': 'SAMME.R', 'learning_rate': 0.48306910288541005, 'max_depth': 8, 'min_samples_split': 76, 'min_impurity_decrease': 1.0449969718130142e-05, 'criterio

[32m[I 2022-07-01 14:16:15,416][0m Trial 742 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 111, 'algorithm': 'SAMME.R', 'learning_rate': 0.49985488096637504, 'max_depth': 1, 'min_samples_split': 36, 'min_impurity_decrease': 8.377388551599073e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:16:16,168][0m Trial 743 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 125, 'algorithm': 'SAMME.R', 'learning_rate': 0.41617469112198685, 'max_depth': 80, 'min_samples_split': 71, 'min_impurity_decrease': 7.074520334097531e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:16:17,024][0m Trial 744 finished with value: 0.19899999999999995 and parameters: {'n_estimators': 128, 'algorithm': 'SAMME.R', 'learning_rate': 0.46310517439309273, 'max_depth': 3, 'min_samples_split': 53, 'min_impurity_decrease': 8.14902090239893e-05, 'criterion': 'gin

[32m[I 2022-07-01 14:16:32,647][0m Trial 766 finished with value: 0.23299999999999998 and parameters: {'n_estimators': 106, 'algorithm': 'SAMME', 'learning_rate': 0.49303057601382755, 'max_depth': 3, 'min_samples_split': 1, 'min_impurity_decrease': 8.436808609715378e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:16:33,538][0m Trial 767 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 135, 'algorithm': 'SAMME.R', 'learning_rate': 0.48171248554417107, 'max_depth': 6, 'min_samples_split': 2, 'min_impurity_decrease': 1.149054716430464e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:16:34,505][0m Trial 768 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 137, 'algorithm': 'SAMME.R', 'learning_rate': 0.3016528748373627, 'max_depth': 77, 'min_samples_split': 7, 'min_impurity_decrease': 2.8487474720823652e-05, 'criterion': 'e

[32m[I 2022-07-01 14:16:51,166][0m Trial 790 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 112, 'algorithm': 'SAMME.R', 'learning_rate': 0.4775896926251081, 'max_depth': 7, 'min_samples_split': 63, 'min_impurity_decrease': 7.210881847956419e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:16:51,906][0m Trial 791 finished with value: 0.19299999999999995 and parameters: {'n_estimators': 120, 'algorithm': 'SAMME.R', 'learning_rate': 0.4701635480132834, 'max_depth': 3, 'min_samples_split': 50, 'min_impurity_decrease': 9.785793481299194e-06, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:16:52,585][0m Trial 792 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 117, 'algorithm': 'SAMME.R', 'learning_rate': 0.4859175617028433, 'max_depth': 61, 'min_samples_split': 28, 'min_impurity_decrease': 8.534278735736798e-05, 'criterion': 'gini'

[32m[I 2022-07-01 14:17:10,047][0m Trial 814 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 121, 'algorithm': 'SAMME.R', 'learning_rate': 0.4722828293386177, 'max_depth': 1, 'min_samples_split': 18, 'min_impurity_decrease': 8.521041725638673e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:17:10,713][0m Trial 815 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 102, 'algorithm': 'SAMME.R', 'learning_rate': 0.3142897645477155, 'max_depth': 4, 'min_samples_split': 3, 'min_impurity_decrease': 8.265204683156606e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:17:11,497][0m Trial 816 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 115, 'algorithm': 'SAMME.R', 'learning_rate': 0.48228671026579983, 'max_depth': 6, 'min_samples_split': 69, 'min_impurity_decrease': 9.391994603940487e-05, 'criterion': 'ent

[32m[I 2022-07-01 14:17:28,156][0m Trial 838 finished with value: 0.19799999999999995 and parameters: {'n_estimators': 125, 'algorithm': 'SAMME.R', 'learning_rate': 0.462097544841586, 'max_depth': 44, 'min_samples_split': 78, 'min_impurity_decrease': 6.687161816735287e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:17:28,959][0m Trial 839 finished with value: 0.21299999999999997 and parameters: {'n_estimators': 120, 'algorithm': 'SAMME.R', 'learning_rate': 0.08724209116475778, 'max_depth': 6, 'min_samples_split': 70, 'min_impurity_decrease': 6.370495592091102e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:17:29,689][0m Trial 840 finished with value: 0.19299999999999995 and parameters: {'n_estimators': 118, 'algorithm': 'SAMME.R', 'learning_rate': 0.4767180369868313, 'max_depth': 4, 'min_samples_split': 55, 'min_impurity_decrease': 8.551842799881817e-05, 'criterion': '

[32m[I 2022-07-01 14:17:47,593][0m Trial 862 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 122, 'algorithm': 'SAMME.R', 'learning_rate': 0.4684235850038646, 'max_depth': 10, 'min_samples_split': 72, 'min_impurity_decrease': 7.46131704591138e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:17:48,453][0m Trial 863 finished with value: 0.19999999999999996 and parameters: {'n_estimators': 129, 'algorithm': 'SAMME.R', 'learning_rate': 0.45392059835603116, 'max_depth': 51, 'min_samples_split': 32, 'min_impurity_decrease': 5.032167544615209e-06, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:17:49,272][0m Trial 864 finished with value: 0.19899999999999995 and parameters: {'n_estimators': 137, 'algorithm': 'SAMME.R', 'learning_rate': 0.46255316223492604, 'max_depth': 57, 'min_samples_split': 27, 'min_impurity_decrease': 3.623550744519167e-05, 'crite

[32m[I 2022-07-01 14:18:04,463][0m Trial 886 finished with value: 0.19299999999999995 and parameters: {'n_estimators': 99, 'algorithm': 'SAMME.R', 'learning_rate': 0.4861634898990018, 'max_depth': 3, 'min_samples_split': 55, 'min_impurity_decrease': 2.5247818796856896e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:18:05,318][0m Trial 887 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 135, 'algorithm': 'SAMME.R', 'learning_rate': 0.4790543770317171, 'max_depth': 76, 'min_samples_split': 30, 'min_impurity_decrease': 2.117133667209157e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:18:05,891][0m Trial 888 finished with value: 0.238 and parameters: {'n_estimators': 94, 'algorithm': 'SAMME.R', 'learning_rate': 0.0703235402890802, 'max_depth': 3, 'min_samples_split': 68, 'min_impurity_decrease': 2.0220391948904366e-05, 'criterion': 'gini'}. Best is tri

[32m[I 2022-07-01 14:18:21,784][0m Trial 910 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 128, 'algorithm': 'SAMME.R', 'learning_rate': 0.4523638010530201, 'max_depth': 1, 'min_samples_split': 4, 'min_impurity_decrease': 9.273080307664667e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:18:22,488][0m Trial 911 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 109, 'algorithm': 'SAMME.R', 'learning_rate': 0.49954011986802654, 'max_depth': 12, 'min_samples_split': 67, 'min_impurity_decrease': 7.268230002912171e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:18:23,290][0m Trial 912 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 121, 'algorithm': 'SAMME.R', 'learning_rate': 0.48769418196087916, 'max_depth': 25, 'min_samples_split': 29, 'min_impurity_decrease': 6.77910982565568e-05, 'criteri

[32m[I 2022-07-01 14:18:39,610][0m Trial 934 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 112, 'algorithm': 'SAMME.R', 'learning_rate': 0.4716855759227958, 'max_depth': 1, 'min_samples_split': 50, 'min_impurity_decrease': 8.470246688516253e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:18:40,403][0m Trial 935 finished with value: 0.19599999999999995 and parameters: {'n_estimators': 111, 'algorithm': 'SAMME.R', 'learning_rate': 0.4995927646664601, 'max_depth': 4, 'min_samples_split': 63, 'min_impurity_decrease': 8.213464086851208e-05, 'criterion': 'entropy'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:18:41,263][0m Trial 936 finished with value: 0.19099999999999995 and parameters: {'n_estimators': 129, 'algorithm': 'SAMME.R', 'learning_rate': 0.48131923356527856, 'max_depth': 6, 'min_samples_split': 87, 'min_impurity_decrease': 9.305015883207253e-05, 'criterion': 'gi

[32m[I 2022-07-01 14:18:58,847][0m Trial 958 finished with value: 0.235 and parameters: {'n_estimators': 126, 'algorithm': 'SAMME', 'learning_rate': 0.4098195620164426, 'max_depth': 49, 'min_samples_split': 82, 'min_impurity_decrease': 2.3054043326608573e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:18:59,560][0m Trial 959 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 106, 'algorithm': 'SAMME.R', 'learning_rate': 0.4993430968951064, 'max_depth': 65, 'min_samples_split': 97, 'min_impurity_decrease': 9.077324722768247e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:19:00,316][0m Trial 960 finished with value: 0.19399999999999995 and parameters: {'n_estimators': 103, 'algorithm': 'SAMME.R', 'learning_rate': 0.4912557320813694, 'max_depth': 3, 'min_samples_split': 62, 'min_impurity_decrease': 1.7084737424692996e-05, 'criterion': 'entropy'}. Be

[32m[I 2022-07-01 14:19:17,332][0m Trial 982 finished with value: 0.19199999999999995 and parameters: {'n_estimators': 126, 'algorithm': 'SAMME.R', 'learning_rate': 0.4704946568276425, 'max_depth': 50, 'min_samples_split': 63, 'min_impurity_decrease': 5.799538796748643e-05, 'criterion': 'logg_loss'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:19:18,282][0m Trial 983 finished with value: 0.19699999999999995 and parameters: {'n_estimators': 134, 'algorithm': 'SAMME.R', 'learning_rate': 0.48647724575246387, 'max_depth': 67, 'min_samples_split': 84, 'min_impurity_decrease': 9.235101988081695e-05, 'criterion': 'gini'}. Best is trial 53 with value: 0.19099999999999995.[0m
[32m[I 2022-07-01 14:19:19,233][0m Trial 984 finished with value: 0.19999999999999996 and parameters: {'n_estimators': 132, 'algorithm': 'SAMME.R', 'learning_rate': 0.46534435240365885, 'max_depth': 64, 'min_samples_split': 90, 'min_impurity_decrease': 2.4817602316813385e-05, 'criterio

### Show results

In [55]:
plot_optimization_history(study)

In [56]:
plot_param_importances(study)

In [57]:
key_params = ['algorithm', 'learning_rate', 'n_estimators']
plot_slice(study, params = key_params).show()

In [195]:
study.best_params

{'n_estimators': 106,
 'algorithm': 'SAMME.R',
 'learning_rate': 0.4827054897488229,
 'max_depth': 11,
 'min_samples_split': 1,
 'min_impurity_decrease': 7.200517833654635e-05,
 'criterion': 'gini'}

In [196]:
best_params = {
    'n_estimators': 106,
    'algorithm': 'SAMME.R',
    'learning_rate': 0.4827054897488229,
    'max_depth': 11,
    'min_samples_split': 1,
    'min_impurity_decrease': 7.200517833654635e-05,
    'criterion': 'gini'
}

## Create model with best params

In [197]:
train_x = train_df[cols[:-1]].to_numpy()
train_y = train_df[cols[-1]].to_numpy()
val_x = val_df[cols[:-1]].to_numpy()
val_y = val_df[cols[-1]].to_numpy()

In [198]:
params = best_params
tree = DecisionTreeClassifier(
#     min_samples_split=1,
    max_depth=11,
    min_impurity_decrease=7.200517833654635e-05,
    criterion='gini'
)
model = AdaBoostClassifier(
    base_estimator=tree,
    n_estimators=106,
    learning_rate=0.4827054897488229,
    algorithm='SAMME.R'
)

In [199]:
X = pd.concat([train_df[cols[:-1]], val_df[cols[:-1]]])
y = pd.concat([train_df[cols[-1]], val_df[cols[-1]]])

In [200]:
cv_scores = cross_val_score(model, X, y, cv=10)
print(f'Mean cv (k=10) accuracy score = {round(np.mean(cv_scores), 3)}')

Mean cv (k=10) accuracy score = 0.751


## Feature selection

In [201]:
model.fit(train_x, train_y)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=11,
                                                         min_impurity_decrease=7.200517833654635e-05),
                   learning_rate=0.4827054897488229, n_estimators=106)

In [203]:
imp_df = pd.DataFrame({
    'feature': cols[:-1],
    'imp': model.feature_importances_
}).sort_values('imp', ascending=False)
best_features = imp_df['feature'].to_list()
imp_df

Unnamed: 0,feature,imp
7,Age,0.026601
9,FoodCourt,0.019832
10,ShoppingMall,0.017611
8,RoomService,0.015871
12,VRDeck,0.015086
11,Spa,0.014537
1,CryoSleep,0.00898
4,Mars,0.002921
5,PSO J318.5-22,0.002487
3,Europa,0.002379


In [204]:
temp_cols = []
cv_list = []

for feature in best_features:
    temp_cols.append(feature)
    X = pd.concat([train_df[temp_cols], val_df[temp_cols]]).to_numpy()
    cv_score = cross_val_score(model, X, y, cv=10)
    cv_list.append(cv_score)

In [212]:
means = []
for scores in cv_list:
    means.append(round(np.mean(scores), 3))

i_features = 0
for i in range(len(means)):
    if means[i] == max(means):
        i_features = i
        print(i_features)
        break

7


In [215]:
best_features[:i_features+1]

['Age',
 'FoodCourt',
 'ShoppingMall',
 'RoomService',
 'VRDeck',
 'Spa',
 'CryoSleep',
 'Mars']

## Make final model

In [216]:
train_x = train_df[best_features[:i_features+1]].to_numpy()
val_x = val_df[best_features[:i_features+1]].to_numpy()

In [224]:
params = best_params
tree = DecisionTreeClassifier(
#     min_samples_split=1,
    max_depth=11,
    min_impurity_decrease=7.200517833654635e-05,
    criterion='gini'
)
model = AdaBoostClassifier(
    base_estimator=tree,
    n_estimators=106,
    learning_rate=0.4827054897488229,
    algorithm='SAMME.R'
)

In [225]:
model.fit(train_x, train_y)
pred_val = model.predict_proba(val_x)
pred_train = model.predict_proba(train_x)

In [238]:
train_proba_df = pd.DataFrame(pred_train)
train_proba_df.columns = ['0', 'ada_boost']
train_proba_df['y'] = train_y.astype(int)
train_proba_df.drop(columns='0', inplace=True)
train_proba_df

Unnamed: 0,ada_boost,y
0,0.507872,0
1,0.505969,0
2,0.366664,0
3,0.501475,1
4,0.294826,0
...,...,...
6996,0.401386,0
6997,0.247687,0
6998,0.542936,1
6999,0.455905,0


In [237]:
val_proba_df = pd.DataFrame(pred_val)
val_proba_df.columns = ['0', 'ada_boost']
val_proba_df['y'] = val_y.astype(int)
val_proba_df.drop(columns='0', inplace=True)
val_proba_df

Unnamed: 0,ada_boost,y
0,0.564468,1
1,0.503813,0
2,0.437986,1
3,0.434112,0
4,0.504742,1
...,...,...
1687,0.379731,0
1688,0.507257,1
1689,0.509272,1
1690,0.493184,1


In [239]:
train_proba_df.to_csv('./data/ensemble_train_df.csv')
val_proba_df.to_csv('./data/ensemble_val_df.csv')