# Gradient Boosting: Hyperparameter Tuning

## Packages and Presets

In [1]:
import pandas as pd
import numpy as np
import yaml
import os
import joblib

from sklearn.metrics import (
    f1_score, 
    balanced_accuracy_score,
)

from sklearn.model_selection import StratifiedKFold

import matplotlib.pyplot as plt
import seaborn as sns 

from imblearn.over_sampling import SMOTE

import catboost as cb

from tsfresh.transformers.per_column_imputer import PerColumnImputer
from tsfresh.feature_extraction.settings import EfficientFCParameters, ComprehensiveFCParameters
from tsfresh.feature_extraction import extract_features


pd.set_option('display.max_columns', None)
%load_ext blackcellmagic
%load_ext autoreload

import optuna
import warnings

In [2]:
NUM_TRIALS = 200

with open("classical_ml_config.yaml", "r") as file:
    config = yaml.safe_load(file)

## No Feature Engineering

In [3]:
# Load data
train_df = pd.read_csv(config["paths"]["ptb_train"], header=None)

In [4]:
X_train_all = train_df.iloc[:, :-1]
y_train_all = train_df.iloc[:, -1]

In [5]:
# create objective function for optuna
def objective_no_feat_eng(trial):
    use_smote = trial.suggest_categorical("use_smote", [True, False]),

    # see: https://forecastegy.com/posts/catboost-hyperparameter-tuning-guide-with-optuna/
    hyperparams = {
        "learning_rate": trial.suggest_float("learning_rate", 1e-4, 0.5, log=True),
        "depth": trial.suggest_int("depth", 5, 10),
        "iterations": trial.suggest_int("n_estimators", 100, 1000),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-4, 1e2, log=True),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100),
    }
    
    
    f1_scores = []
    
    skf = StratifiedKFold(n_splits=5, random_state=config["general"]["seed"], shuffle=True)
    for fold_num, (train_idx, val_idx) in enumerate(skf.split(X_train_all, y_train_all)):
        X_train, X_val = X_train_all.iloc[train_idx], X_train_all.iloc[val_idx]
        y_train, y_val = y_train_all[train_idx], y_train_all[val_idx]
        
        if use_smote:
            smote = SMOTE(random_state=config["general"]["seed"])
            X_train, y_train = smote.fit_resample(X_train, y_train) 
        
        
        boost = cb.CatBoostClassifier(
            task_type="CPU", 
            random_state=config["general"]["seed"],
            **hyperparams
        )
        boost.fit(X_train, y_train, verbose=0)
        
        y_preds = boost.predict(X_val)
        
        score = f1_score(y_val, y_preds)
            
        trial.report(score, fold_num)
        if trial.should_prune():
            raise optuna.TrialPruned()
        
        f1_scores.append(score)
            
        
    return np.mean(f1_scores)

In [6]:
# prune bad trials 
pruner = optuna.pruners.MedianPruner(n_startup_trials=20, n_warmup_steps=2)

study_no_feat_eng = optuna.create_study(
    direction="maximize",
    study_name="boost_no_feat_eng",
    sampler=optuna.samplers.TPESampler(seed=config["general"]["seed"]),
    pruner=pruner,
)

[I 2024-04-30 00:01:53,060] A new study created in memory with name: boost_no_feat_eng


In [7]:
study_no_feat_eng.optimize(
    objective_no_feat_eng, 
    n_trials=NUM_TRIALS,
    timeout = 10 * 60 * 60, # timeout after 10 hours
    show_progress_bar=True
)

  0%|          | 0/200 [00:00<?, ?it/s]

[I 2024-04-30 00:02:23,365] Trial 0 finished with value: 0.9826618956905921 and parameters: {'use_smote': False, 'learning_rate': 0.05100627805979915, 'depth': 8, 'n_estimators': 240, 'l2_leaf_reg': 0.0008629132190071859, 'min_data_in_leaf': 6}. Best is trial 0 with value: 0.9826618956905921.
[I 2024-04-30 00:02:53,045] Trial 1 finished with value: 0.9811083732676746 and parameters: {'use_smote': True, 'learning_rate': 0.04160439645256607, 'depth': 5, 'n_estimators': 973, 'l2_leaf_reg': 9.877700294007917, 'min_data_in_leaf': 22}. Best is trial 0 with value: 0.9826618956905921.
[I 2024-04-30 00:03:47,363] Trial 2 finished with value: 0.9014263125566135 and parameters: {'use_smote': False, 'learning_rate': 0.00133469775741781, 'depth': 8, 'n_estimators': 489, 'l2_leaf_reg': 0.005589524205217926, 'min_data_in_leaf': 62}. Best is trial 0 with value: 0.9826618956905921.
[I 2024-04-30 00:04:37,204] Trial 3 finished with value: 0.9227342334581738 and parameters: {'use_smote': False, 'learning

In [8]:
best_params = study_no_feat_eng.best_params
    
print(best_params)

if "catboost_no_feat_eng" in config:
    config["catboost_no_feat_eng"].update(best_params)
else:
    config["catboost_no_feat_eng"] = best_params

# see: https://stackoverflow.com/questions/12470665/how-can-i-write-data-in-yaml-format-in-a-file
with open("classical_ml_config.yaml", "w") as file:
    yaml.dump(config, file, default_flow_style=False)

{'use_smote': True, 'learning_rate': 0.06111785339864093, 'depth': 10, 'n_estimators': 854, 'l2_leaf_reg': 1.9151241607277159, 'min_data_in_leaf': 36}


In [9]:
p_importance_no_feat_eng = optuna.visualization.plot_param_importances(study_no_feat_eng)
p_importance_no_feat_eng.show()

In [10]:
p_history_no_feat_eng = optuna.visualization.plot_optimization_history(study_no_feat_eng)
p_history_no_feat_eng.show()

## With Feature Engineering

In [11]:
# Load data
train_df = pd.read_csv(config["paths"]["ptb_train"], header=None)

In [12]:
X_train_all = train_df.iloc[:, :-1]
y_train_all = train_df.iloc[:, -1]

In [13]:
X_train_all.columns = X_train_all.columns.astype(str)

In [14]:
# create objective function for optuna
def objective_feat_eng(trial):
    use_smote = trial.suggest_categorical("use_smote", [True, False]),

    # see: https://forecastegy.com/posts/catboost-hyperparameter-tuning-guide-with-optuna/
    hyperparams = {
        "learning_rate": trial.suggest_float("learning_rate", 1e-4, 0.5, log=True),
        "depth": trial.suggest_int("depth", 5, 10),
        "iterations": trial.suggest_int("n_estimators", 100, 1000),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-4, 1e2, log=True),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100),
    }
    
    
    f1_scores = []
    
    skf = StratifiedKFold(n_splits=5, random_state=config["general"]["seed"], shuffle=True)
    for fold_num, (train_idx, val_idx) in enumerate(skf.split(X_train_all, y_train_all)):
        X_train, X_val = X_train_all.iloc[train_idx], X_train_all.iloc[val_idx]
        y_train, y_val = y_train_all[train_idx], y_train_all[val_idx]
        
        
        if use_smote:
            smote = SMOTE(random_state=config["general"]["seed"])
            X_train, y_train = smote.fit_resample(X_train, y_train) 
        
        
        X_train["id"] = X_train.index
        X_train_melted =(
            X_train
            .melt(id_vars="id", var_name="time", value_name="value")
            .sort_values(by=["id", "time"])
        )
        # get rid of padding to not ruin the engineered features
        # for simplicity, we drop all 0s, as only few "true" 0s are in the data
        X_train_melted["value"] = X_train_melted["value"].replace(0, np.nan)
        X_train_melted = X_train_melted.dropna()
        
        X_val["id"] = X_val.index
        X_val_melted =(
            X_val
            .melt(id_vars="id", var_name="time", value_name="value")
            .sort_values(by=["id", "time"])
        )
        # get rid of padding to not ruin the engineered features
        # for simplicity, we drop all 0s, as only few "true" 0s are in the data
        X_val_melted["value"] = X_val_melted["value"].replace(0, np.nan)
        X_val_melted = X_val_melted.dropna()
        
        
        # We will follow this tutorial:
        # https://towardsdatascience.com/expanding-your-regression-repertoire-with-regularisation-903d2c9f7b28
        # but will use the ComprehensiveFCParameters instead of the EfficientFCParameters
        # for feature extraction
        X_train_augmented = extract_features(
            X_train_melted,
            column_id="id",
            column_sort="time",
            column_value="value",
            default_fc_parameters=ComprehensiveFCParameters(),
        )  
        X_val_augmented = extract_features(
            X_val_melted,
            column_id="id",
            column_sort="time",
            column_value="value",
            default_fc_parameters=ComprehensiveFCParameters(),
        )  
        
        X_train_merged = pd.merge(
            X_train, X_train_augmented, left_index=True, right_index=True
        )
        X_val_merged = pd.merge(
            X_val, X_val_augmented, left_index=True, right_index=True
        )

        # assert that no rows were lost
        assert X_train_merged.shape[0] == X_train.shape[0]
        assert X_val_merged.shape[0] == X_val.shape[0]
        assert X_train_merged.index.equals(X_train.index)
        assert X_val_merged.index.equals(X_val.index)
        
        imputer = PerColumnImputer()
        X_train_merged = imputer.fit_transform(X_train_merged)
        X_val_merged = imputer.transform(X_val_merged)
        
        boost = cb.CatBoostClassifier(
            task_type="CPU", 
            random_state=config["general"]["seed"],
            **hyperparams
        )
        boost.fit(X_train_merged, y_train, verbose=0)
        
        y_preds = boost.predict(X_val_merged)
        
        score = f1_score(y_val, y_preds)
            
        trial.report(score, fold_num)
        if trial.should_prune():
            raise optuna.TrialPruned()
        
        f1_scores.append(f1_score)
            
        
    return np.mean(score)

In [15]:
# prune bad trials 
pruner = optuna.pruners.MedianPruner(n_startup_trials=20, n_warmup_steps=2)

study_feat_eng = optuna.create_study(
    direction="maximize",
    study_name="boost_feat_eng",
    sampler=optuna.samplers.TPESampler(seed=config["general"]["seed"]),
    pruner=pruner,
)

[I 2024-04-30 06:03:57,268] A new study created in memory with name: boost_feat_eng


In [16]:
warnings.filterwarnings('ignore') #ignore pandas warnings

study_feat_eng.optimize(
    objective_feat_eng, 
    n_trials=NUM_TRIALS,
    timeout = 10 * 60 * 60, # timeout after 8 hours
    show_progress_bar=True
)

  0%|          | 0/200 [00:00<?, ?it/s]

Feature Extraction: 100%|██████████| 80/80 [00:55<00:00,  1.44it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.31it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.10it/s]
Feature Extraction: 100%|██████████| 80/80 [00:55<00:00,  1.44it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.79it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.11it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.04it/s]


[I 2024-04-30 06:12:25,631] Trial 0 finished with value: 0.9824769824769825 and parameters: {'use_smote': False, 'learning_rate': 0.05100627805979915, 'depth': 8, 'n_estimators': 240, 'l2_leaf_reg': 0.0008629132190071859, 'min_data_in_leaf': 6}. Best is trial 0 with value: 0.9824769824769825.


Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.98it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.97it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.36it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.01it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.04it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.37it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.12it/s]


[I 2024-04-30 06:20:03,113] Trial 1 finished with value: 0.9806490026793688 and parameters: {'use_smote': True, 'learning_rate': 0.04160439645256607, 'depth': 5, 'n_estimators': 973, 'l2_leaf_reg': 9.877700294007917, 'min_data_in_leaf': 22}. Best is trial 0 with value: 0.9824769824769825.


Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.11it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.11it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.98it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.96it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.01it/s]


[I 2024-04-30 06:29:56,434] Trial 2 finished with value: 0.9137148047229791 and parameters: {'use_smote': False, 'learning_rate': 0.00133469775741781, 'depth': 8, 'n_estimators': 489, 'l2_leaf_reg': 0.005589524205217926, 'min_data_in_leaf': 62}. Best is trial 0 with value: 0.9824769824769825.


Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.31it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.51it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.29it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.42it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.42it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.15it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.00it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.95it/s]


[I 2024-04-30 06:39:54,116] Trial 3 finished with value: 0.9338146811070999 and parameters: {'use_smote': False, 'learning_rate': 0.002265486450485179, 'depth': 7, 'n_estimators': 807, 'l2_leaf_reg': 0.0015777663630582469, 'min_data_in_leaf': 52}. Best is trial 0 with value: 0.9824769824769825.


Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.46it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.14it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.54it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.19it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.36it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.12it/s]


[I 2024-04-30 06:47:04,315] Trial 4 finished with value: 0.9234029669996973 and parameters: {'use_smote': True, 'learning_rate': 0.01767218232266507, 'depth': 6, 'n_estimators': 158, 'l2_leaf_reg': 49.35296209402104, 'min_data_in_leaf': 97}. Best is trial 0 with value: 0.9824769824769825.


Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.96it/s]
Feature Extraction: 100%|██████████| 80/80 [01:03<00:00,  1.27it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  9.57it/s]
Feature Extraction: 100%|██████████| 80/80 [00:54<00:00,  1.45it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.88it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.81it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.30it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.17it/s]


[I 2024-04-30 07:00:46,223] Trial 5 finished with value: 0.855608214849921 and parameters: {'use_smote': True, 'learning_rate': 0.0002297662414082499, 'depth': 9, 'n_estimators': 496, 'l2_leaf_reg': 0.0005397956855996448, 'min_data_in_leaf': 50}. Best is trial 0 with value: 0.9824769824769825.


Feature Extraction: 100%|██████████| 80/80 [00:54<00:00,  1.47it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.05it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.04it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  8.71it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.29it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.33it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.80it/s]


[I 2024-04-30 07:10:16,063] Trial 6 finished with value: 0.8761254268860602 and parameters: {'use_smote': False, 'learning_rate': 0.0009061902594114382, 'depth': 8, 'n_estimators': 380, 'l2_leaf_reg': 0.1319496149042567, 'min_data_in_leaf': 55}. Best is trial 0 with value: 0.9824769824769825.


Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.40it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.92it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.07it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.16it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.40it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.86it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.95it/s]


[I 2024-04-30 07:26:34,343] Trial 7 finished with value: 0.9854728728135191 and parameters: {'use_smote': False, 'learning_rate': 0.0736534446668837, 'depth': 10, 'n_estimators': 906, 'l2_leaf_reg': 0.3867228849117747, 'min_data_in_leaf': 93}. Best is trial 7 with value: 0.9854728728135191.


Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  8.88it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.91it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.42it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.03it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.40it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.16it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.42it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.15it/s]


[I 2024-04-30 07:34:33,049] Trial 8 finished with value: 0.8240917782026769 and parameters: {'use_smote': False, 'learning_rate': 0.00014699223219374907, 'depth': 6, 'n_estimators': 450, 'l2_leaf_reg': 0.004247116662617146, 'min_data_in_leaf': 83}. Best is trial 7 with value: 0.9854728728135191.


Feature Extraction: 100%|██████████| 80/80 [00:52<00:00,  1.51it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.91it/s]
Feature Extraction: 100%|██████████| 80/80 [00:55<00:00,  1.43it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.06it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.37it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.82it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.10it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.41it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.00it/s]


[I 2024-04-30 07:42:06,236] Trial 9 finished with value: 0.9365521510368306 and parameters: {'use_smote': True, 'learning_rate': 0.010172238474449055, 'depth': 5, 'n_estimators': 822, 'l2_leaf_reg': 0.00028009403633756793, 'min_data_in_leaf': 99}. Best is trial 7 with value: 0.9854728728135191.


Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.06it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.16it/s]
Feature Extraction: 100%|██████████| 80/80 [01:03<00:00,  1.27it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.41it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.08it/s]
Feature Extraction: 100%|██████████| 80/80 [00:55<00:00,  1.43it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.08it/s]


[I 2024-04-30 07:56:01,905] Trial 10 finished with value: 0.9833134684147795 and parameters: {'use_smote': False, 'learning_rate': 0.2500079596315153, 'depth': 10, 'n_estimators': 704, 'l2_leaf_reg': 0.526059318173317, 'min_data_in_leaf': 74}. Best is trial 7 with value: 0.9854728728135191.


Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  8.75it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.05it/s]
Feature Extraction: 100%|██████████| 80/80 [00:53<00:00,  1.48it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.04it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.31it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  7.87it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.31it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.34it/s]


[I 2024-04-30 08:10:05,140] Trial 11 finished with value: 0.9831010969463386 and parameters: {'use_smote': False, 'learning_rate': 0.40780593388708014, 'depth': 10, 'n_estimators': 690, 'l2_leaf_reg': 0.80187885086786, 'min_data_in_leaf': 76}. Best is trial 7 with value: 0.9854728728135191.


Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.07it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.40it/s]
Feature Extraction: 100%|██████████| 78/78 [00:12<00:00,  6.32it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.31it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.50it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.36it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.04it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.41it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.83it/s]


[I 2024-04-30 08:24:09,072] Trial 12 finished with value: 0.9815366289458011 and parameters: {'use_smote': False, 'learning_rate': 0.45871593902613605, 'depth': 10, 'n_estimators': 696, 'l2_leaf_reg': 0.9695593631143825, 'min_data_in_leaf': 77}. Best is trial 7 with value: 0.9854728728135191.


Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:07<00:00, 10.18it/s]
Feature Extraction: 100%|██████████| 80/80 [01:03<00:00,  1.27it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.04it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.00it/s]
Feature Extraction: 100%|██████████| 80/80 [00:55<00:00,  1.44it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.12it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.40it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.03it/s]


[I 2024-04-30 08:41:36,808] Trial 13 finished with value: 0.9845513963161022 and parameters: {'use_smote': False, 'learning_rate': 0.11778790368374273, 'depth': 10, 'n_estimators': 1000, 'l2_leaf_reg': 0.06489800003437501, 'min_data_in_leaf': 88}. Best is trial 7 with value: 0.9854728728135191.


Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.40it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.02it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.95it/s]
Feature Extraction: 100%|██████████| 80/80 [01:04<00:00,  1.24it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.19it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.18it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.37it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.22it/s]


[I 2024-04-30 09:01:57,938] Trial 14 finished with value: 0.9865951742627346 and parameters: {'use_smote': False, 'learning_rate': 0.11156523715078995, 'depth': 9, 'n_estimators': 997, 'l2_leaf_reg': 0.027795604922870122, 'min_data_in_leaf': 37}. Best is trial 14 with value: 0.9865951742627346.


Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.24it/s]
Feature Extraction: 100%|██████████| 80/80 [01:03<00:00,  1.26it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.40it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.96it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.43it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.16it/s]
Feature Extraction: 100%|██████████| 80/80 [01:03<00:00,  1.26it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  9.51it/s]


[I 2024-04-30 09:20:48,463] Trial 15 finished with value: 0.9869436201780415 and parameters: {'use_smote': False, 'learning_rate': 0.08014673811832856, 'depth': 9, 'n_estimators': 886, 'l2_leaf_reg': 0.025452814834808823, 'min_data_in_leaf': 34}. Best is trial 15 with value: 0.9869436201780415.


Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.16it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.40it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.12it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.30it/s]
Feature Extraction: 100%|██████████| 78/78 [00:07<00:00,  9.88it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.06it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.18it/s]


[I 2024-04-30 09:39:19,797] Trial 16 finished with value: 0.9857651245551602 and parameters: {'use_smote': False, 'learning_rate': 0.018821887854678535, 'depth': 9, 'n_estimators': 852, 'l2_leaf_reg': 0.019543261927417906, 'min_data_in_leaf': 34}. Best is trial 15 with value: 0.9869436201780415.


Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.30it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.30it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.02it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.36it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.47it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.89it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.36it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.13it/s]


[I 2024-04-30 09:56:23,347] Trial 17 finished with value: 0.9872289872289872 and parameters: {'use_smote': False, 'learning_rate': 0.1627334199785534, 'depth': 9, 'n_estimators': 755, 'l2_leaf_reg': 0.037007410339764624, 'min_data_in_leaf': 36}. Best is trial 17 with value: 0.9872289872289872.


Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.97it/s]
Feature Extraction: 100%|██████████| 80/80 [01:04<00:00,  1.24it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.24it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.03it/s]
Feature Extraction: 100%|██████████| 80/80 [01:13<00:00,  1.09it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.28it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.37it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.97it/s]


[I 2024-04-30 10:11:47,646] Trial 18 finished with value: 0.9765509053131493 and parameters: {'use_smote': True, 'learning_rate': 0.004562110581076508, 'depth': 9, 'n_estimators': 601, 'l2_leaf_reg': 0.00012389742830281927, 'min_data_in_leaf': 14}. Best is trial 17 with value: 0.9872289872289872.


Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.30it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.71it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.41it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.58it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.06it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.02it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.31it/s]


[I 2024-04-30 10:21:30,396] Trial 19 finished with value: 0.9784560143626571 and parameters: {'use_smote': False, 'learning_rate': 0.1627369509717598, 'depth': 7, 'n_estimators': 775, 'l2_leaf_reg': 3.823704916741752, 'min_data_in_leaf': 34}. Best is trial 17 with value: 0.9872289872289872.


Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.30it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.65it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.08it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.30it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.43it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.40it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.89it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.29it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.25it/s]


[I 2024-04-30 10:36:49,449] Trial 20 finished with value: 0.9866508454464551 and parameters: {'use_smote': False, 'learning_rate': 0.03672608284184885, 'depth': 9, 'n_estimators': 602, 'l2_leaf_reg': 0.003960354006182484, 'min_data_in_leaf': 23}. Best is trial 17 with value: 0.9872289872289872.


Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.41it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.97it/s]
Feature Extraction: 100%|██████████| 80/80 [01:03<00:00,  1.27it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  7.86it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.08it/s]
Feature Extraction: 100%|██████████| 80/80 [01:02<00:00,  1.28it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  9.14it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.09it/s]


[I 2024-04-30 10:51:51,334] Trial 21 finished with value: 0.9869436201780415 and parameters: {'use_smote': False, 'learning_rate': 0.03786221058841184, 'depth': 9, 'n_estimators': 603, 'l2_leaf_reg': 0.01125291865209966, 'min_data_in_leaf': 24}. Best is trial 17 with value: 0.9872289872289872.


Feature Extraction: 100%|██████████| 80/80 [01:02<00:00,  1.29it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  8.98it/s]
Feature Extraction: 100%|██████████| 80/80 [00:55<00:00,  1.43it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.12it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.87it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.30it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.54it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.25it/s]


[I 2024-04-30 11:03:50,740] Trial 22 finished with value: 0.9848529848529849 and parameters: {'use_smote': False, 'learning_rate': 0.021139770535546158, 'depth': 8, 'n_estimators': 725, 'l2_leaf_reg': 0.02028965202203345, 'min_data_in_leaf': 41}. Best is trial 17 with value: 0.9872289872289872.


Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.76it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.29it/s]
Feature Extraction: 100%|██████████| 78/78 [00:07<00:00,  9.87it/s]
Feature Extraction: 100%|██████████| 80/80 [00:54<00:00,  1.48it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.86it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.37it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.09it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.87it/s]


[I 2024-04-30 11:18:46,708] Trial 23 finished with value: 0.9875370919881306 and parameters: {'use_smote': False, 'learning_rate': 0.2029778371888283, 'depth': 9, 'n_estimators': 606, 'l2_leaf_reg': 0.12229360521210861, 'min_data_in_leaf': 24}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.98it/s]
Feature Extraction: 100%|██████████| 80/80 [01:02<00:00,  1.29it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.38it/s]
Feature Extraction: 100%|██████████| 80/80 [00:53<00:00,  1.50it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.93it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.01it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.40it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.14it/s]


[I 2024-04-30 11:31:58,970] Trial 24 finished with value: 0.9809069212410502 and parameters: {'use_smote': False, 'learning_rate': 0.2578586465228446, 'depth': 8, 'n_estimators': 920, 'l2_leaf_reg': 0.10980732571463532, 'min_data_in_leaf': 5}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.94it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.22it/s]
Feature Extraction: 100%|██████████| 80/80 [01:03<00:00,  1.26it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  9.42it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.86it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.08it/s]


[I 2024-04-30 11:50:51,049] Trial 25 finished with value: 0.9866111276405831 and parameters: {'use_smote': False, 'learning_rate': 0.09068188910459861, 'depth': 9, 'n_estimators': 892, 'l2_leaf_reg': 0.08563079996364127, 'min_data_in_leaf': 44}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  9.48it/s]
Feature Extraction: 100%|██████████| 80/80 [01:02<00:00,  1.28it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  9.23it/s]
Feature Extraction: 100%|██████████| 80/80 [01:02<00:00,  1.28it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.68it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  7.91it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.07it/s]


[I 2024-04-30 11:59:09,407] Trial 26 finished with value: 0.982078853046595 and parameters: {'use_smote': False, 'learning_rate': 0.24518700197561846, 'depth': 7, 'n_estimators': 368, 'l2_leaf_reg': 0.26023708935803064, 'min_data_in_leaf': 28}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.41it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.01it/s]
Feature Extraction: 100%|██████████| 80/80 [01:02<00:00,  1.28it/s]
Feature Extraction: 100%|██████████| 78/78 [00:07<00:00, 10.23it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.41it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.09it/s]


[I 2024-04-30 12:09:27,306] Trial 27 pruned. 


Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.42it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.42it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.03it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.06it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.06it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.18it/s]


[I 2024-04-30 12:20:41,972] Trial 28 finished with value: 0.977326968973747 and parameters: {'use_smote': False, 'learning_rate': 0.23069950607396955, 'depth': 8, 'n_estimators': 641, 'l2_leaf_reg': 0.03410342621546927, 'min_data_in_leaf': 14}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [00:54<00:00,  1.46it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.07it/s]
Feature Extraction: 100%|██████████| 80/80 [01:03<00:00,  1.25it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  8.84it/s]
Feature Extraction: 100%|██████████| 80/80 [00:55<00:00,  1.44it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.03it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.30it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.79it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.10it/s]


[I 2024-04-30 12:32:57,531] Trial 29 finished with value: 0.9833926453143536 and parameters: {'use_smote': False, 'learning_rate': 0.07827227190774119, 'depth': 10, 'n_estimators': 524, 'l2_leaf_reg': 0.0010387942003180126, 'min_data_in_leaf': 59}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.36it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.13it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.10it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.41it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.83it/s]
Feature Extraction: 100%|██████████| 80/80 [01:03<00:00,  1.26it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.51it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.98it/s]


[I 2024-04-30 12:51:37,495] Trial 30 finished with value: 0.9863501483679525 and parameters: {'use_smote': False, 'learning_rate': 0.06645796834504566, 'depth': 9, 'n_estimators': 865, 'l2_leaf_reg': 0.20413146222538875, 'min_data_in_leaf': 46}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.41it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.05it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  9.41it/s]
Feature Extraction: 100%|██████████| 80/80 [00:55<00:00,  1.44it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.00it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.31it/s]
Feature Extraction: 100%|██████████| 78/78 [00:07<00:00,  9.90it/s]
Feature Extraction: 100%|██████████| 80/80 [00:54<00:00,  1.47it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.72it/s]


[I 2024-04-30 13:06:52,582] Trial 31 finished with value: 0.985731272294887 and parameters: {'use_smote': False, 'learning_rate': 0.031681531786968024, 'depth': 9, 'n_estimators': 623, 'l2_leaf_reg': 0.008376668749711012, 'min_data_in_leaf': 27}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.30it/s]
Feature Extraction: 100%|██████████| 78/78 [00:07<00:00, 10.27it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.37it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.58it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.40it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.91it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.41it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.40it/s]


[I 2024-04-30 13:19:21,976] Trial 32 finished with value: 0.9845605700712589 and parameters: {'use_smote': False, 'learning_rate': 0.034868549872115996, 'depth': 9, 'n_estimators': 397, 'l2_leaf_reg': 0.0095224875953553, 'min_data_in_leaf': 18}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.42it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.01it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.36it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.92it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.28it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.29it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.36it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.39it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.14it/s]


[I 2024-04-30 13:30:09,345] Trial 33 finished with value: 0.9827483640690066 and parameters: {'use_smote': False, 'learning_rate': 0.15304299369121424, 'depth': 8, 'n_estimators': 582, 'l2_leaf_reg': 0.0507354282692777, 'min_data_in_leaf': 2}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.36it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.04it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  8.90it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  7.84it/s]


[I 2024-04-30 13:36:07,489] Trial 34 pruned. 


Feature Extraction: 100%|██████████| 80/80 [00:55<00:00,  1.43it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.83it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.36it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.03it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.37it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.03it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.30it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  9.36it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.36it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.71it/s]


[I 2024-04-30 13:47:16,810] Trial 35 finished with value: 0.983957219251337 and parameters: {'use_smote': True, 'learning_rate': 0.16258146264869075, 'depth': 8, 'n_estimators': 660, 'l2_leaf_reg': 0.02182589663959246, 'min_data_in_leaf': 21}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [00:55<00:00,  1.44it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.94it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.04it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.30it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  8.96it/s]


[I 2024-04-30 13:53:31,076] Trial 36 pruned. 


Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.30it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  9.56it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.96it/s]
Feature Extraction: 100%|██████████| 80/80 [00:55<00:00,  1.43it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.01it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  8.94it/s]
Feature Extraction: 100%|██████████| 80/80 [00:55<00:00,  1.44it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.92it/s]


[I 2024-04-30 14:10:55,900] Trial 37 finished with value: 0.9851632047477745 and parameters: {'use_smote': False, 'learning_rate': 0.04550367723760154, 'depth': 9, 'n_estimators': 781, 'l2_leaf_reg': 0.0023709821274604426, 'min_data_in_leaf': 10}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.30it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  8.97it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.42it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.37it/s]
Feature Extraction: 100%|██████████| 80/80 [00:55<00:00,  1.45it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.04it/s]


[I 2024-04-30 14:17:02,001] Trial 38 pruned. 


Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.03it/s]
Feature Extraction: 100%|██████████| 80/80 [00:55<00:00,  1.44it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.85it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.42it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.10it/s]


[I 2024-04-30 14:25:54,508] Trial 39 pruned. 


Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.31it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.38it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.43it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.94it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.37it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.99it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.62it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:07<00:00,  9.97it/s]


[I 2024-04-30 14:36:30,817] Trial 40 finished with value: 0.9857227840571089 and parameters: {'use_smote': False, 'learning_rate': 0.027715999751219165, 'depth': 7, 'n_estimators': 946, 'l2_leaf_reg': 0.05044270485930327, 'min_data_in_leaf': 49}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.41it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.95it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.60it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.42it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.23it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.36it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.00it/s]
Feature Extraction: 100%|██████████| 80/80 [01:02<00:00,  1.28it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.44it/s]


[I 2024-04-30 14:51:14,119] Trial 41 finished with value: 0.9869436201780415 and parameters: {'use_smote': False, 'learning_rate': 0.04669642791039123, 'depth': 9, 'n_estimators': 574, 'l2_leaf_reg': 0.005273865711689841, 'min_data_in_leaf': 22}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.40it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.91it/s]
Feature Extraction: 100%|██████████| 80/80 [01:02<00:00,  1.29it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  9.26it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.43it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.02it/s]


[I 2024-04-30 14:59:56,156] Trial 42 pruned. 


Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.15it/s]
Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.31it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.30it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.93it/s]


[I 2024-04-30 15:09:33,111] Trial 43 pruned. 


Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.26it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.36it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.93it/s]
Feature Extraction: 100%|██████████| 80/80 [01:02<00:00,  1.28it/s]
Feature Extraction: 100%|██████████| 78/78 [00:08<00:00,  9.39it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.40it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.05it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.13it/s]


[I 2024-04-30 15:23:22,305] Trial 44 finished with value: 0.9863258026159334 and parameters: {'use_smote': False, 'learning_rate': 0.05794201121454085, 'depth': 9, 'n_estimators': 511, 'l2_leaf_reg': 0.1587813786286834, 'min_data_in_leaf': 17}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [01:02<00:00,  1.28it/s]
Feature Extraction: 100%|██████████| 78/78 [00:07<00:00, 10.04it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.14it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.01it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.85it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.06it/s]


[I 2024-04-30 15:32:00,618] Trial 45 finished with value: 0.97999402806808 and parameters: {'use_smote': False, 'learning_rate': 0.10792446598486403, 'depth': 6, 'n_estimators': 821, 'l2_leaf_reg': 0.012589824347804405, 'min_data_in_leaf': 9}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.32it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.96it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.35it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.21it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.02it/s]


[I 2024-04-30 15:38:55,713] Trial 46 pruned. 


Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.29it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.74it/s]
Feature Extraction: 100%|██████████| 80/80 [00:56<00:00,  1.41it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  7.07it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.70it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.82it/s]
Feature Extraction: 100%|██████████| 80/80 [01:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████| 78/78 [00:09<00:00,  8.05it/s]


[I 2024-04-30 15:49:39,047] Trial 47 finished with value: 0.9857566765578635 and parameters: {'use_smote': False, 'learning_rate': 0.15258802641897307, 'depth': 8, 'n_estimators': 555, 'l2_leaf_reg': 0.004227273922810982, 'min_data_in_leaf': 55}. Best is trial 23 with value: 0.9875370919881306.


Feature Extraction: 100%|██████████| 80/80 [01:01<00:00,  1.29it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.54it/s]
Feature Extraction: 100%|██████████| 80/80 [00:59<00:00,  1.34it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.14it/s]
Feature Extraction: 100%|██████████| 80/80 [00:57<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:11<00:00,  6.99it/s]
Feature Extraction: 100%|██████████| 80/80 [00:58<00:00,  1.38it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.17it/s]
Feature Extraction: 100%|██████████| 80/80 [01:02<00:00,  1.28it/s]
Feature Extraction: 100%|██████████| 78/78 [00:10<00:00,  7.62it/s]


[I 2024-04-30 16:03:59,239] Trial 48 finished with value: 0.9854469854469855 and parameters: {'use_smote': False, 'learning_rate': 0.07619018055278756, 'depth': 10, 'n_estimators': 680, 'l2_leaf_reg': 0.03708244948904577, 'min_data_in_leaf': 25}. Best is trial 23 with value: 0.9875370919881306.


In [17]:
best_params = study_feat_eng.best_params
    
print(best_params)

if "catboost_feat_eng" in config:
    config["catboost_feat_eng"].update(best_params)
else:
    config["catboost_feat_eng"] = best_params

# see: https://stackoverflow.com/questions/12470665/how-can-i-write-data-in-yaml-format-in-a-file
with open("classical_ml_config.yaml", "w") as file:
    yaml.dump(config, file, default_flow_style=False)

{'use_smote': False, 'learning_rate': 0.2029778371888283, 'depth': 9, 'n_estimators': 606, 'l2_leaf_reg': 0.12229360521210861, 'min_data_in_leaf': 24}


In [18]:
p_importance_feat_eng = optuna.visualization.plot_param_importances(study_feat_eng)
p_importance_feat_eng.show()

In [19]:
p_history_feat_eng = optuna.visualization.plot_optimization_history(study_feat_eng)
p_history_feat_eng.show()