# Maximum Accuracy Model Training Strategy
## Advanced Stacked Ensemble for Drug Category Classification

**Objective:** Achieve maximum accuracy using state-of-the-art ensemble methods

**Strategy:**
- Layer 1: Multiple diverse base models (XGBoost, LightGBM, CatBoost, RF, ExtraTrees, NN)
- Layer 2: Stacked meta-models
- Bayesian hyperparameter optimization
- 10-fold stratified cross-validation

## 1. Setup and Imports

In [1]:
# Core libraries
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Sklearn preprocessing and metrics
from sklearn.model_selection import StratifiedKFold, cross_val_score, cross_validate
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

# Base Models
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

# Boosting models
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier

# Stacking
from sklearn.ensemble import StackingClassifier, VotingClassifier

# Hyperparameter optimization
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
try:
    import optuna
    OPTUNA_AVAILABLE = True
except:
    OPTUNA_AVAILABLE = False
    print("Optuna not available. Using GridSearchCV/RandomizedSearchCV only.")

# Utilities
import joblib
from datetime import datetime
import os

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("All libraries imported successfully!")
print(f"Optuna available: {OPTUNA_AVAILABLE}")

All libraries imported successfully!
Optuna available: True


## 2. Load Engineered Data

In [2]:
# Load engineered datasets
train_df = pd.read_csv('data/data_minihackathon_train_engineered.csv')
test_df = pd.read_csv('data/data_minihackathon_test_engineered.csv')

print(f"Train shape: {train_df.shape}")
print(f"Test shape: {test_df.shape}")
print(f"\nTarget distribution:")
print(train_df['drug_category'].value_counts())

# Separate features and target
X = train_df.drop(['drug_category', 'id'], axis=1, errors='ignore')
y = train_df['drug_category']
X_test = test_df.drop(['id'], axis=1, errors='ignore')
test_ids = test_df['id'] if 'id' in test_df.columns else np.arange(len(test_df))

# Encode target variable
le = LabelEncoder()
y_encoded = le.fit_transform(y)

print(f"\nFeatures shape: {X.shape}")
print(f"Test features shape: {X_test.shape}")
print(f"\nClass encoding: {dict(enumerate(le.classes_))}")

Train shape: (1500, 46)
Test shape: (377, 45)

Target distribution:
drug_category
Hallucinogens    691
Stimulants       567
Depressants      242
Name: count, dtype: int64

Features shape: (1500, 45)
Test features shape: (377, 45)

Class encoding: {0: 'Depressants', 1: 'Hallucinogens', 2: 'Stimulants'}


## 3. Cross-Validation Setup

In [3]:
# Stratified K-Fold for cross-validation
N_FOLDS = 10
RANDOM_STATE = 42

skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=RANDOM_STATE)

# Storage for model results
model_results = {}
oof_predictions = {}  # Out-of-fold predictions for stacking

print(f"Using {N_FOLDS}-fold stratified cross-validation")
print(f"Random state: {RANDOM_STATE}")

Using 10-fold stratified cross-validation
Random state: 42


## 4. Layer 1: Base Models
### 4.1 XGBoost with Bayesian Optimization

In [None]:
%%time

print("\n" + "="*80)
print("Training XGBoost with Hyperparameter Tuning")
print("="*80)

if OPTUNA_AVAILABLE:
    # Bayesian optimization with Optuna
    def objective_xgb(trial):
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'max_depth': trial.suggest_int('max_depth', 3, 11),
            'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 7),
            'subsample': trial.suggest_float('subsample', 0.6, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
            'gamma': trial.suggest_float('gamma', 0, 0.3),
            'reg_alpha': trial.suggest_float('reg_alpha', 0, 1),
            'reg_lambda': trial.suggest_float('reg_lambda', 0, 1),
            'random_state': RANDOM_STATE,
            'tree_method': 'gpu_hist',
            'device': 'cuda',
            'eval_metric': 'mlogloss'
        }
        
        model = xgb.XGBClassifier(**params)
        scores = cross_val_score(model, X, y_encoded, cv=skf, scoring='accuracy', n_jobs=-1)
        return scores.mean()
    
    study_xgb = optuna.create_study(direction='maximize', study_name='xgboost')
    study_xgb.optimize(objective_xgb, n_trials=100, show_progress_bar=True)
    
    print(f"\nBest XGBoost accuracy: {study_xgb.best_value:.4f}")
    print(f"Best parameters: {study_xgb.best_params}")
    
    # Train final model with best parameters
    xgb_model = xgb.XGBClassifier(**study_xgb.best_params)
    
else:
    # Randomized search as fallback
    param_dist = {
        'n_estimators': [100, 300, 500, 700, 1000],
        'max_depth': [3, 5, 7, 9, 11],
        'learning_rate': [0.001, 0.01, 0.05, 0.1],
        'min_child_weight': [1, 3, 5, 7],
        'subsample': [0.6, 0.7, 0.8, 0.9, 1.0],
        'colsample_bytree': [0.6, 0.7, 0.8, 0.9, 1.0],
        'gamma': [0, 0.1, 0.2, 0.3],
        'reg_alpha': [0, 0.01, 0.1, 1],
        'reg_lambda': [0, 0.01, 0.1, 1]
    }
    xgb_base = xgb.XGBClassifier(random_state=RANDOM_STATE, tree_method='gpu_hist', device='cuda', eval_metric='mlogloss')
    xgb_base = xgb.XGBClassifier(random_state=RANDOM_STATE, tree_method='hist', eval_metric='mlogloss')
    random_search = RandomizedSearchCV(xgb_base, param_dist, n_iter=100, cv=5, 
                                      scoring='accuracy', n_jobs=-1, random_state=RANDOM_STATE)
    random_search.fit(X, y_encoded)
    
    print(f"\nBest XGBoost accuracy: {random_search.best_score_:.4f}")
    print(f"Best parameters: {random_search.best_params_}")
    
    xgb_model = random_search.best_estimator_

# Evaluate with cross-validation
cv_results = cross_validate(xgb_model, X, y_encoded, cv=skf, 
                           scoring=['accuracy', 'f1_macro'], return_train_score=True)

model_results['XGBoost'] = {
    'cv_accuracy': cv_results['test_accuracy'].mean(),
    'cv_std': cv_results['test_accuracy'].std(),
    'f1_macro': cv_results['test_f1_macro'].mean(),
    'model': xgb_model
}

print(f"\nXGBoost CV Accuracy: {model_results['XGBoost']['cv_accuracy']:.4f} ± {model_results['XGBoost']['cv_std']:.4f}")
print(f"XGBoost F1-Macro: {model_results['XGBoost']['f1_macro']:.4f}")

[I 2025-11-16 12:58:15,045] A new study created in memory with name: xgboost



Training XGBoost with Hyperparameter Tuning


Best trial: 0. Best value: 0.707333:   1%|          | 1/100 [00:07<13:00,  7.88s/it]

[I 2025-11-16 12:58:22,922] Trial 0 finished with value: 0.7073333333333334 and parameters: {'n_estimators': 409, 'max_depth': 6, 'learning_rate': 0.009013223892741149, 'min_child_weight': 7, 'subsample': 0.7653409673712619, 'colsample_bytree': 0.9757172827484016, 'gamma': 0.26627155882963194, 'reg_alpha': 0.1039928170729526, 'reg_lambda': 0.7341218660122519}. Best is trial 0 with value: 0.7073333333333334.


Best trial: 0. Best value: 0.707333:   2%|▏         | 2/100 [00:13<10:43,  6.56s/it]

[I 2025-11-16 12:58:28,562] Trial 1 finished with value: 0.6806666666666666 and parameters: {'n_estimators': 759, 'max_depth': 3, 'learning_rate': 0.07020474533982024, 'min_child_weight': 3, 'subsample': 0.6047559981818494, 'colsample_bytree': 0.9100529213848848, 'gamma': 0.0032733573729539557, 'reg_alpha': 0.07656207381523061, 'reg_lambda': 0.4537148916146194}. Best is trial 0 with value: 0.7073333333333334.


Best trial: 0. Best value: 0.707333:   3%|▎         | 3/100 [00:19<10:11,  6.31s/it]

[I 2025-11-16 12:58:34,568] Trial 2 finished with value: 0.7066666666666667 and parameters: {'n_estimators': 260, 'max_depth': 11, 'learning_rate': 0.003159037665867539, 'min_child_weight': 1, 'subsample': 0.7447846438873612, 'colsample_bytree': 0.7455102012083835, 'gamma': 0.09082788565939116, 'reg_alpha': 0.8369814951556412, 'reg_lambda': 0.8983398481080717}. Best is trial 0 with value: 0.7073333333333334.


Best trial: 3. Best value: 0.712:   4%|▍         | 4/100 [00:24<09:33,  5.97s/it]   

[I 2025-11-16 12:58:40,018] Trial 3 finished with value: 0.712 and parameters: {'n_estimators': 290, 'max_depth': 11, 'learning_rate': 0.027635516599988103, 'min_child_weight': 1, 'subsample': 0.8922956270128553, 'colsample_bytree': 0.6512840572495996, 'gamma': 0.18179418479880874, 'reg_alpha': 0.09707545660733352, 'reg_lambda': 0.63562050723454}. Best is trial 3 with value: 0.712.


Best trial: 3. Best value: 0.712:   5%|▌         | 5/100 [00:30<09:24,  5.95s/it]

[I 2025-11-16 12:58:45,924] Trial 4 finished with value: 0.702 and parameters: {'n_estimators': 788, 'max_depth': 9, 'learning_rate': 0.0226748326291367, 'min_child_weight': 7, 'subsample': 0.8028520435665496, 'colsample_bytree': 0.6485601779856348, 'gamma': 0.055797769755839666, 'reg_alpha': 0.8961920186163956, 'reg_lambda': 0.49995052662286543}. Best is trial 3 with value: 0.712.


Best trial: 3. Best value: 0.712:   6%|▌         | 6/100 [00:33<07:29,  4.78s/it]

[I 2025-11-16 12:58:48,443] Trial 5 finished with value: 0.7046666666666667 and parameters: {'n_estimators': 150, 'max_depth': 10, 'learning_rate': 0.006961314818629663, 'min_child_weight': 6, 'subsample': 0.9297309202460275, 'colsample_bytree': 0.7657058668909373, 'gamma': 0.214563598868773, 'reg_alpha': 0.1967803602010184, 'reg_lambda': 0.42394024428849175}. Best is trial 3 with value: 0.712.


Best trial: 3. Best value: 0.712:   7%|▋         | 7/100 [00:36<06:44,  4.35s/it]

[I 2025-11-16 12:58:51,909] Trial 6 finished with value: 0.6906666666666668 and parameters: {'n_estimators': 500, 'max_depth': 8, 'learning_rate': 0.05645681105175441, 'min_child_weight': 5, 'subsample': 0.6645917523739185, 'colsample_bytree': 0.8038064986826209, 'gamma': 0.10049471253487585, 'reg_alpha': 0.38561720767984986, 'reg_lambda': 0.8338872991191935}. Best is trial 3 with value: 0.712.


Best trial: 3. Best value: 0.712:   8%|▊         | 8/100 [00:45<08:54,  5.81s/it]

[I 2025-11-16 12:59:00,855] Trial 7 finished with value: 0.7013333333333334 and parameters: {'n_estimators': 978, 'max_depth': 8, 'learning_rate': 0.03415660078354866, 'min_child_weight': 2, 'subsample': 0.7213824692962784, 'colsample_bytree': 0.9556844221735515, 'gamma': 0.04432828609671675, 'reg_alpha': 0.9380782461906054, 'reg_lambda': 0.498638413765489}. Best is trial 3 with value: 0.712.


Best trial: 3. Best value: 0.712:   9%|▉         | 9/100 [00:53<09:43,  6.41s/it]

[I 2025-11-16 12:59:08,584] Trial 8 finished with value: 0.7053333333333333 and parameters: {'n_estimators': 794, 'max_depth': 8, 'learning_rate': 0.015390079653333314, 'min_child_weight': 2, 'subsample': 0.6278836436063697, 'colsample_bytree': 0.8104062470308212, 'gamma': 0.297344965103522, 'reg_alpha': 0.5097026140120111, 'reg_lambda': 0.15968387574227672}. Best is trial 3 with value: 0.712.


Best trial: 3. Best value: 0.712:  10%|█         | 10/100 [00:55<07:38,  5.10s/it]

[I 2025-11-16 12:59:10,736] Trial 9 finished with value: 0.7093333333333333 and parameters: {'n_estimators': 129, 'max_depth': 8, 'learning_rate': 0.008234326629701352, 'min_child_weight': 2, 'subsample': 0.7751091297227187, 'colsample_bytree': 0.6774825139799815, 'gamma': 0.2799883059689844, 'reg_alpha': 0.8456934412231766, 'reg_lambda': 0.45247024304028305}. Best is trial 3 with value: 0.712.


Best trial: 3. Best value: 0.712:  11%|█         | 11/100 [00:58<06:34,  4.43s/it]

[I 2025-11-16 12:59:13,661] Trial 10 finished with value: 0.7066666666666667 and parameters: {'n_estimators': 443, 'max_depth': 5, 'learning_rate': 0.0017881462151453668, 'min_child_weight': 4, 'subsample': 0.9988571367051409, 'colsample_bytree': 0.6046654898896826, 'gamma': 0.17995315444831705, 'reg_alpha': 0.5290314624880549, 'reg_lambda': 0.004781587043070745}. Best is trial 3 with value: 0.712.


Best trial: 3. Best value: 0.712:  12%|█▏        | 12/100 [01:01<05:56,  4.05s/it]

[I 2025-11-16 12:59:16,847] Trial 11 finished with value: 0.7073333333333333 and parameters: {'n_estimators': 108, 'max_depth': 11, 'learning_rate': 0.004596544979001473, 'min_child_weight': 1, 'subsample': 0.8580942266614336, 'colsample_bytree': 0.6754999534052775, 'gamma': 0.22734671676497054, 'reg_alpha': 0.7012507441879686, 'reg_lambda': 0.6639484353323354}. Best is trial 3 with value: 0.712.


Best trial: 3. Best value: 0.712:  13%|█▎        | 13/100 [01:04<05:17,  3.65s/it]

[I 2025-11-16 12:59:19,582] Trial 12 finished with value: 0.7093333333333334 and parameters: {'n_estimators': 292, 'max_depth': 6, 'learning_rate': 0.01590598167255867, 'min_child_weight': 3, 'subsample': 0.8509967904969992, 'colsample_bytree': 0.6953959650705677, 'gamma': 0.15032369697244533, 'reg_alpha': 0.3090526862281435, 'reg_lambda': 0.25374390568299465}. Best is trial 3 with value: 0.712.


Best trial: 3. Best value: 0.712:  14%|█▍        | 14/100 [01:06<04:26,  3.10s/it]

[I 2025-11-16 12:59:21,391] Trial 13 finished with value: 0.7093333333333331 and parameters: {'n_estimators': 308, 'max_depth': 5, 'learning_rate': 0.02874654227306122, 'min_child_weight': 4, 'subsample': 0.8719364324169502, 'colsample_bytree': 0.7183250408878376, 'gamma': 0.15596700905785876, 'reg_alpha': 0.27869353410849224, 'reg_lambda': 0.284907302743343}. Best is trial 3 with value: 0.712.


Best trial: 3. Best value: 0.712:  15%|█▌        | 15/100 [01:08<03:50,  2.71s/it]

[I 2025-11-16 12:59:23,200] Trial 14 finished with value: 0.6940000000000002 and parameters: {'n_estimators': 617, 'max_depth': 6, 'learning_rate': 0.09988781399729046, 'min_child_weight': 3, 'subsample': 0.9016084625501508, 'colsample_bytree': 0.6077110206654811, 'gamma': 0.12134592269802408, 'reg_alpha': 0.019582370042327144, 'reg_lambda': 0.2505716685027238}. Best is trial 3 with value: 0.712.


Best trial: 15. Best value: 0.712667:  16%|█▌        | 16/100 [01:09<03:05,  2.21s/it]

[I 2025-11-16 12:59:24,244] Trial 15 finished with value: 0.7126666666666667 and parameters: {'n_estimators': 293, 'max_depth': 3, 'learning_rate': 0.016816052609671256, 'min_child_weight': 3, 'subsample': 0.8343256810703832, 'colsample_bytree': 0.8459428215401836, 'gamma': 0.18453037571756042, 'reg_alpha': 0.2927820617808735, 'reg_lambda': 0.6269263173551731}. Best is trial 15 with value: 0.7126666666666667.


Best trial: 15. Best value: 0.712667:  17%|█▋        | 17/100 [01:11<03:01,  2.18s/it]

[I 2025-11-16 12:59:26,370] Trial 16 finished with value: 0.6926666666666667 and parameters: {'n_estimators': 592, 'max_depth': 3, 'learning_rate': 0.0010249414522775385, 'min_child_weight': 1, 'subsample': 0.961281302277025, 'colsample_bytree': 0.8682839789798322, 'gamma': 0.2006977778934807, 'reg_alpha': 0.1717657275386993, 'reg_lambda': 0.6474102069627699}. Best is trial 15 with value: 0.7126666666666667.


Best trial: 15. Best value: 0.712667:  18%|█▊        | 18/100 [01:12<02:32,  1.85s/it]

[I 2025-11-16 12:59:27,455] Trial 17 finished with value: 0.7026666666666668 and parameters: {'n_estimators': 232, 'max_depth': 4, 'learning_rate': 0.045850088734295226, 'min_child_weight': 5, 'subsample': 0.8174805086024329, 'colsample_bytree': 0.8429507455905987, 'gamma': 0.1781189504549896, 'reg_alpha': 0.4462802049807978, 'reg_lambda': 0.9825480918456526}. Best is trial 15 with value: 0.7126666666666667.


Best trial: 15. Best value: 0.712667:  19%|█▉        | 19/100 [01:20<04:52,  3.62s/it]

[I 2025-11-16 12:59:35,180] Trial 18 finished with value: 0.71 and parameters: {'n_estimators': 382, 'max_depth': 10, 'learning_rate': 0.014578927531108843, 'min_child_weight': 2, 'subsample': 0.914587586120531, 'colsample_bytree': 0.8971513740839052, 'gamma': 0.23967058386420032, 'reg_alpha': 0.23139913985353086, 'reg_lambda': 0.6085772146523755}. Best is trial 15 with value: 0.7126666666666667.


Best trial: 19. Best value: 0.716:  20%|██        | 20/100 [01:21<04:05,  3.07s/it]   

[I 2025-11-16 12:59:36,979] Trial 19 finished with value: 0.716 and parameters: {'n_estimators': 374, 'max_depth': 4, 'learning_rate': 0.02292121389304592, 'min_child_weight': 3, 'subsample': 0.8309677824517658, 'colsample_bytree': 0.7836944768842525, 'gamma': 0.1284617940901997, 'reg_alpha': 0.612362909954221, 'reg_lambda': 0.7596415780377597}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  21%|██        | 21/100 [01:25<04:02,  3.08s/it]

[I 2025-11-16 12:59:40,065] Trial 20 finished with value: 0.71 and parameters: {'n_estimators': 660, 'max_depth': 4, 'learning_rate': 0.005498393687198307, 'min_child_weight': 4, 'subsample': 0.7036434356669736, 'colsample_bytree': 0.7636674056993829, 'gamma': 0.11770000197452792, 'reg_alpha': 0.6040600951333379, 'reg_lambda': 0.783112541948878}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  22%|██▏       | 22/100 [01:26<03:28,  2.67s/it]

[I 2025-11-16 12:59:41,786] Trial 21 finished with value: 0.7146666666666666 and parameters: {'n_estimators': 343, 'max_depth': 4, 'learning_rate': 0.021866991781351333, 'min_child_weight': 3, 'subsample': 0.83387282517372, 'colsample_bytree': 0.8559861723538503, 'gamma': 0.173072146838497, 'reg_alpha': 0.6392327151194972, 'reg_lambda': 0.6010801602529364}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  23%|██▎       | 23/100 [01:29<03:22,  2.63s/it]

[I 2025-11-16 12:59:44,331] Trial 22 finished with value: 0.714 and parameters: {'n_estimators': 509, 'max_depth': 4, 'learning_rate': 0.012978721317991911, 'min_child_weight': 3, 'subsample': 0.831224681258585, 'colsample_bytree': 0.84206148708642, 'gamma': 0.13796966451297235, 'reg_alpha': 0.691804287028498, 'reg_lambda': 0.5681463192961222}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  24%|██▍       | 24/100 [01:31<03:18,  2.62s/it]

[I 2025-11-16 12:59:46,913] Trial 23 finished with value: 0.7106666666666667 and parameters: {'n_estimators': 507, 'max_depth': 4, 'learning_rate': 0.011541798497514027, 'min_child_weight': 5, 'subsample': 0.8006952454706507, 'colsample_bytree': 0.9264149883638584, 'gamma': 0.13217057123283768, 'reg_alpha': 0.7226889988649249, 'reg_lambda': 0.5648892542162991}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  25%|██▌       | 25/100 [01:34<03:21,  2.69s/it]

[I 2025-11-16 12:59:49,764] Trial 24 finished with value: 0.7020000000000001 and parameters: {'n_estimators': 415, 'max_depth': 5, 'learning_rate': 0.04052666198125527, 'min_child_weight': 3, 'subsample': 0.8353724763213017, 'colsample_bytree': 0.8209445283196799, 'gamma': 0.07698227324547713, 'reg_alpha': 0.6949966685099777, 'reg_lambda': 0.7289315189419261}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  26%|██▌       | 26/100 [01:36<02:59,  2.43s/it]

[I 2025-11-16 12:59:51,590] Trial 25 finished with value: 0.7146666666666667 and parameters: {'n_estimators': 371, 'max_depth': 4, 'learning_rate': 0.019093294256639897, 'min_child_weight': 4, 'subsample': 0.766675067437522, 'colsample_bytree': 0.8728231029535405, 'gamma': 0.1489854242819345, 'reg_alpha': 0.6008642376376886, 'reg_lambda': 0.35549058173999837}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  27%|██▋       | 27/100 [01:39<02:59,  2.46s/it]

[I 2025-11-16 12:59:54,109] Trial 26 finished with value: 0.7066666666666667 and parameters: {'n_estimators': 372, 'max_depth': 5, 'learning_rate': 0.021898637075307693, 'min_child_weight': 4, 'subsample': 0.7685250931548084, 'colsample_bytree': 0.8855809918985796, 'gamma': 0.15710381649625207, 'reg_alpha': 0.6023485650736592, 'reg_lambda': 0.35981418530250214}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  28%|██▊       | 28/100 [01:40<02:42,  2.26s/it]

[I 2025-11-16 12:59:55,907] Trial 27 finished with value: 0.7100000000000001 and parameters: {'n_estimators': 179, 'max_depth': 7, 'learning_rate': 0.025154250368396584, 'min_child_weight': 5, 'subsample': 0.6935690633997469, 'colsample_bytree': 0.7811798544465186, 'gamma': 0.0959559690087158, 'reg_alpha': 0.601165119205813, 'reg_lambda': 0.35852367024157217}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  29%|██▉       | 29/100 [01:42<02:17,  1.93s/it]

[I 2025-11-16 12:59:57,083] Trial 28 finished with value: 0.7106666666666668 and parameters: {'n_estimators': 346, 'max_depth': 3, 'learning_rate': 0.019711540916410177, 'min_child_weight': 6, 'subsample': 0.7420066267161965, 'colsample_bytree': 0.938677058981114, 'gamma': 0.24173629774669625, 'reg_alpha': 0.4199409316234918, 'reg_lambda': 0.9362596089223132}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  30%|███       | 30/100 [01:44<02:26,  2.09s/it]

[I 2025-11-16 12:59:59,542] Trial 29 finished with value: 0.7033333333333334 and parameters: {'n_estimators': 213, 'max_depth': 6, 'learning_rate': 0.010534644433340472, 'min_child_weight': 4, 'subsample': 0.787716636808474, 'colsample_bytree': 0.997986812157531, 'gamma': 0.16150414896211523, 'reg_alpha': 0.7462024988969588, 'reg_lambda': 0.7383184035241804}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  31%|███       | 31/100 [01:46<02:22,  2.07s/it]

[I 2025-11-16 13:00:01,551] Trial 30 finished with value: 0.7060000000000001 and parameters: {'n_estimators': 439, 'max_depth': 4, 'learning_rate': 0.05056547395794851, 'min_child_weight': 6, 'subsample': 0.8749406728248438, 'colsample_bytree': 0.879182017770662, 'gamma': 0.06503958699194169, 'reg_alpha': 0.7789346673344901, 'reg_lambda': 0.8460589988228875}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  32%|███▏      | 32/100 [01:49<02:31,  2.23s/it]

[I 2025-11-16 13:00:04,154] Trial 31 finished with value: 0.716 and parameters: {'n_estimators': 497, 'max_depth': 4, 'learning_rate': 0.00998555081389969, 'min_child_weight': 3, 'subsample': 0.8282939241199114, 'colsample_bytree': 0.8467153199011717, 'gamma': 0.1334884228133324, 'reg_alpha': 0.6676939409645444, 'reg_lambda': 0.5745439141030466}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  33%|███▎      | 33/100 [01:50<02:18,  2.06s/it]

[I 2025-11-16 13:00:05,830] Trial 32 finished with value: 0.7093333333333334 and parameters: {'n_estimators': 469, 'max_depth': 3, 'learning_rate': 0.008424604706405842, 'min_child_weight': 3, 'subsample': 0.7614181045163123, 'colsample_bytree': 0.8602448490241188, 'gamma': 0.11447940815912977, 'reg_alpha': 0.6282447975442755, 'reg_lambda': 0.5610046371569009}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  34%|███▍      | 34/100 [01:54<02:40,  2.44s/it]

[I 2025-11-16 13:00:09,141] Trial 33 finished with value: 0.7026666666666666 and parameters: {'n_estimators': 358, 'max_depth': 5, 'learning_rate': 0.003697611437312506, 'min_child_weight': 2, 'subsample': 0.8120889163119956, 'colsample_bytree': 0.9118456893329132, 'gamma': 0.03301909279568989, 'reg_alpha': 0.5483680752459859, 'reg_lambda': 0.7014520141700453}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  35%|███▌      | 35/100 [01:56<02:41,  2.48s/it]

[I 2025-11-16 13:00:11,727] Trial 34 finished with value: 0.7 and parameters: {'n_estimators': 564, 'max_depth': 4, 'learning_rate': 0.03202012372475023, 'min_child_weight': 4, 'subsample': 0.7399746363504721, 'colsample_bytree': 0.8247550295826128, 'gamma': 0.139425719917388, 'reg_alpha': 0.6329628105761675, 'reg_lambda': 0.7752420148037913}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  36%|███▌      | 36/100 [01:58<02:30,  2.35s/it]

[I 2025-11-16 13:00:13,763] Trial 35 finished with value: 0.694 and parameters: {'n_estimators': 729, 'max_depth': 3, 'learning_rate': 0.0778785902571726, 'min_child_weight': 3, 'subsample': 0.7870375732802898, 'colsample_bytree': 0.7287737402694423, 'gamma': 0.16895674282763767, 'reg_alpha': 0.5541515693820348, 'reg_lambda': 0.39614489572218614}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  37%|███▋      | 37/100 [02:02<02:52,  2.74s/it]

[I 2025-11-16 13:00:17,432] Trial 36 finished with value: 0.7066666666666667 and parameters: {'n_estimators': 332, 'max_depth': 6, 'learning_rate': 0.006237035457993155, 'min_child_weight': 3, 'subsample': 0.878990674520921, 'colsample_bytree': 0.7850588374867603, 'gamma': 0.20317113847191354, 'reg_alpha': 0.8053843660275669, 'reg_lambda': 0.5264581019524911}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  38%|███▊      | 38/100 [02:06<03:20,  3.23s/it]

[I 2025-11-16 13:00:21,803] Trial 37 finished with value: 0.7126666666666667 and parameters: {'n_estimators': 250, 'max_depth': 7, 'learning_rate': 0.01082977208570132, 'min_child_weight': 2, 'subsample': 0.8572601715741932, 'colsample_bytree': 0.9057412914674356, 'gamma': 0.1049552399381696, 'reg_alpha': 0.4517942505631694, 'reg_lambda': 0.123229773807561}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  39%|███▉      | 39/100 [02:08<02:51,  2.81s/it]

[I 2025-11-16 13:00:23,638] Trial 38 finished with value: 0.71 and parameters: {'n_estimators': 410, 'max_depth': 4, 'learning_rate': 0.01945627931930821, 'min_child_weight': 5, 'subsample': 0.9392200552153889, 'colsample_bytree': 0.791653015404522, 'gamma': 0.07832781355476111, 'reg_alpha': 0.6499169291036844, 'reg_lambda': 0.4950559368465425}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  40%|████      | 40/100 [02:11<02:49,  2.82s/it]

[I 2025-11-16 13:00:26,480] Trial 39 finished with value: 0.7013333333333334 and parameters: {'n_estimators': 535, 'max_depth': 5, 'learning_rate': 0.034836197705916436, 'min_child_weight': 4, 'subsample': 0.8230615537571007, 'colsample_bytree': 0.7449890218229201, 'gamma': 0.19560977311362793, 'reg_alpha': 0.8830472280098214, 'reg_lambda': 0.8154166331731914}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  41%|████      | 41/100 [02:13<02:28,  2.51s/it]

[I 2025-11-16 13:00:28,274] Trial 40 finished with value: 0.7086666666666667 and parameters: {'n_estimators': 470, 'max_depth': 3, 'learning_rate': 0.008057452919074492, 'min_child_weight': 2, 'subsample': 0.7554624536981377, 'colsample_bytree': 0.9637781459388404, 'gamma': 0.12967064993761468, 'reg_alpha': 0.3575656213493729, 'reg_lambda': 0.6967508108750773}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  42%|████▏     | 42/100 [02:15<02:28,  2.55s/it]

[I 2025-11-16 13:00:30,920] Trial 41 finished with value: 0.714 and parameters: {'n_estimators': 526, 'max_depth': 4, 'learning_rate': 0.012874401102276752, 'min_child_weight': 3, 'subsample': 0.8349439626143009, 'colsample_bytree': 0.843139066299236, 'gamma': 0.14875958403020556, 'reg_alpha': 0.6781531480503219, 'reg_lambda': 0.5819438336830058}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  43%|████▎     | 43/100 [02:18<02:23,  2.52s/it]

[I 2025-11-16 13:00:33,368] Trial 42 finished with value: 0.7106666666666668 and parameters: {'n_estimators': 476, 'max_depth': 4, 'learning_rate': 0.024223253715441238, 'min_child_weight': 3, 'subsample': 0.7921834594643955, 'colsample_bytree': 0.8567351930116233, 'gamma': 0.1378810930006757, 'reg_alpha': 0.7637159647172704, 'reg_lambda': 0.47774315056537653}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  44%|████▍     | 44/100 [02:21<02:25,  2.60s/it]

[I 2025-11-16 13:00:36,144] Trial 43 finished with value: 0.712 and parameters: {'n_estimators': 398, 'max_depth': 5, 'learning_rate': 0.013323231732491258, 'min_child_weight': 4, 'subsample': 0.8446427598975484, 'colsample_bytree': 0.8287250262852817, 'gamma': 0.0005658019912779255, 'reg_alpha': 0.48453865223422926, 'reg_lambda': 0.42122726411715766}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  45%|████▌     | 45/100 [02:24<02:30,  2.75s/it]

[I 2025-11-16 13:00:39,232] Trial 44 finished with value: 0.7053333333333333 and parameters: {'n_estimators': 664, 'max_depth': 4, 'learning_rate': 0.018159274326394276, 'min_child_weight': 3, 'subsample': 0.8085383363124432, 'colsample_bytree': 0.807276446377972, 'gamma': 0.10828783425398797, 'reg_alpha': 0.669915948061482, 'reg_lambda': 0.5334738306285414}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  46%|████▌     | 46/100 [02:27<02:38,  2.94s/it]

[I 2025-11-16 13:00:42,632] Trial 45 finished with value: 0.71 and parameters: {'n_estimators': 984, 'max_depth': 3, 'learning_rate': 0.010495653240890496, 'min_child_weight': 2, 'subsample': 0.7211569510845627, 'colsample_bytree': 0.8773664388323672, 'gamma': 0.16730345596712826, 'reg_alpha': 0.562007547840796, 'reg_lambda': 0.8822444883655989}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  47%|████▋     | 47/100 [02:29<02:24,  2.73s/it]

[I 2025-11-16 13:00:44,873] Trial 46 finished with value: 0.708 and parameters: {'n_estimators': 327, 'max_depth': 5, 'learning_rate': 0.027071155847110045, 'min_child_weight': 3, 'subsample': 0.8935885607171027, 'colsample_bytree': 0.768821604474884, 'gamma': 0.08752080379568541, 'reg_alpha': 0.9565362685404345, 'reg_lambda': 0.3347118254788625}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  48%|████▊     | 48/100 [02:33<02:42,  3.12s/it]

[I 2025-11-16 13:00:48,914] Trial 47 finished with value: 0.71 and parameters: {'n_estimators': 438, 'max_depth': 6, 'learning_rate': 0.013761841523890648, 'min_child_weight': 4, 'subsample': 0.7774503266213664, 'colsample_bytree': 0.8342695100279719, 'gamma': 0.144817275144054, 'reg_alpha': 0.8386933756269374, 'reg_lambda': 0.4515180921071363}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  49%|████▉     | 49/100 [02:36<02:34,  3.04s/it]

[I 2025-11-16 13:00:51,748] Trial 48 finished with value: 0.7106666666666668 and parameters: {'n_estimators': 567, 'max_depth': 4, 'learning_rate': 0.006763805084381173, 'min_child_weight': 4, 'subsample': 0.8629293799340897, 'colsample_bytree': 0.8146772004047279, 'gamma': 0.12578113301851776, 'reg_alpha': 0.4999064092748997, 'reg_lambda': 0.6754726932772136}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  50%|█████     | 50/100 [02:37<02:01,  2.43s/it]

[I 2025-11-16 13:00:52,763] Trial 49 finished with value: 0.7026666666666668 and parameters: {'n_estimators': 268, 'max_depth': 3, 'learning_rate': 0.0022900156299422774, 'min_child_weight': 1, 'subsample': 0.649919845873659, 'colsample_bytree': 0.857550372980138, 'gamma': 0.18760927965134983, 'reg_alpha': 0.7212645487827788, 'reg_lambda': 0.18176994186390433}. Best is trial 19 with value: 0.716.


Best trial: 19. Best value: 0.716:  51%|█████     | 51/100 [02:42<02:27,  3.00s/it]

[I 2025-11-16 13:00:57,103] Trial 50 finished with value: 0.7 and parameters: {'n_estimators': 878, 'max_depth': 6, 'learning_rate': 0.03777386117758931, 'min_child_weight': 3, 'subsample': 0.8237740200441561, 'colsample_bytree': 0.9296287789638794, 'gamma': 0.1720207435165186, 'reg_alpha': 0.5630447819372733, 'reg_lambda': 0.6147280330151267}. Best is trial 19 with value: 0.716.


Best trial: 51. Best value: 0.717333:  52%|█████▏    | 52/100 [02:44<02:18,  2.89s/it]

[I 2025-11-16 13:00:59,716] Trial 51 finished with value: 0.7173333333333333 and parameters: {'n_estimators': 516, 'max_depth': 4, 'learning_rate': 0.011966949223279103, 'min_child_weight': 3, 'subsample': 0.8391969393161539, 'colsample_bytree': 0.8394626350861055, 'gamma': 0.14762912463169817, 'reg_alpha': 0.6726765817268627, 'reg_lambda': 0.5758401882979282}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  53%|█████▎    | 53/100 [02:48<02:22,  3.04s/it]

[I 2025-11-16 13:01:03,101] Trial 52 finished with value: 0.7153333333333333 and parameters: {'n_estimators': 612, 'max_depth': 4, 'learning_rate': 0.008908801079914796, 'min_child_weight': 2, 'subsample': 0.8510939242730062, 'colsample_bytree': 0.892173245616316, 'gamma': 0.2144358114973954, 'reg_alpha': 0.8001972063105098, 'reg_lambda': 0.5261224581968554}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  54%|█████▍    | 54/100 [02:53<02:51,  3.74s/it]

[I 2025-11-16 13:01:08,473] Trial 53 finished with value: 0.7113333333333334 and parameters: {'n_estimators': 616, 'max_depth': 5, 'learning_rate': 0.008987763674301935, 'min_child_weight': 2, 'subsample': 0.8469011947464015, 'colsample_bytree': 0.8908889161723547, 'gamma': 0.22066833185509008, 'reg_alpha': 0.7843653232624583, 'reg_lambda': 0.6456213126367879}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  55%|█████▌    | 55/100 [02:56<02:44,  3.66s/it]

[I 2025-11-16 13:01:11,970] Trial 54 finished with value: 0.7100000000000001 and parameters: {'n_estimators': 601, 'max_depth': 4, 'learning_rate': 0.0050852347050498355, 'min_child_weight': 1, 'subsample': 0.8813091702588929, 'colsample_bytree': 0.8004086147713694, 'gamma': 0.27229052803852855, 'reg_alpha': 0.5932580085827286, 'reg_lambda': 0.5253229975478811}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  56%|█████▌    | 56/100 [02:59<02:25,  3.30s/it]

[I 2025-11-16 13:01:14,408] Trial 55 finished with value: 0.7066666666666667 and parameters: {'n_estimators': 661, 'max_depth': 3, 'learning_rate': 0.016585787071359815, 'min_child_weight': 2, 'subsample': 0.9083342140347488, 'colsample_bytree': 0.8751403739707586, 'gamma': 0.24956272972995341, 'reg_alpha': 0.9023202611057625, 'reg_lambda': 0.6010112589249983}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  57%|█████▋    | 57/100 [03:13<04:46,  6.67s/it]

[I 2025-11-16 13:01:28,943] Trial 56 finished with value: 0.7133333333333334 and parameters: {'n_estimators': 704, 'max_depth': 9, 'learning_rate': 0.007313864765678678, 'min_child_weight': 2, 'subsample': 0.8040764642607243, 'colsample_bytree': 0.9070996696200928, 'gamma': 0.21420882797599378, 'reg_alpha': 0.6435456447521799, 'reg_lambda': 0.45148025321213925}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  58%|█████▊    | 58/100 [03:18<04:11,  5.98s/it]

[I 2025-11-16 13:01:33,326] Trial 57 finished with value: 0.7006666666666665 and parameters: {'n_estimators': 813, 'max_depth': 4, 'learning_rate': 0.021050093751590455, 'min_child_weight': 3, 'subsample': 0.8572138767942209, 'colsample_bytree': 0.8624757180779079, 'gamma': 0.1886288441959279, 'reg_alpha': 0.8147920745529187, 'reg_lambda': 0.7481957349514134}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  59%|█████▉    | 59/100 [03:21<03:33,  5.21s/it]

[I 2025-11-16 13:01:36,750] Trial 58 finished with value: 0.7073333333333334 and parameters: {'n_estimators': 491, 'max_depth': 5, 'learning_rate': 0.015634643746857604, 'min_child_weight': 7, 'subsample': 0.8189757065260723, 'colsample_bytree': 0.9438623740218447, 'gamma': 0.1533628033792008, 'reg_alpha': 0.7361128807987483, 'reg_lambda': 0.32226565975336646}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  60%|██████    | 60/100 [03:22<02:41,  4.03s/it]

[I 2025-11-16 13:01:37,998] Trial 59 finished with value: 0.7013333333333334 and parameters: {'n_estimators': 311, 'max_depth': 3, 'learning_rate': 0.003882407057010739, 'min_child_weight': 3, 'subsample': 0.840948319638687, 'colsample_bytree': 0.8935610207212829, 'gamma': 0.20463274179121144, 'reg_alpha': 0.9928196691340505, 'reg_lambda': 0.39470602805558297}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  61%|██████    | 61/100 [03:26<02:36,  4.03s/it]

[I 2025-11-16 13:01:42,020] Trial 60 finished with value: 0.7006666666666667 and parameters: {'n_estimators': 553, 'max_depth': 5, 'learning_rate': 0.028929928000827384, 'min_child_weight': 4, 'subsample': 0.892026018238163, 'colsample_bytree': 0.922924944465582, 'gamma': 0.11798866598940326, 'reg_alpha': 0.8668294141867714, 'reg_lambda': 0.477464487847511}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  62%|██████▏   | 62/100 [03:29<02:19,  3.66s/it]

[I 2025-11-16 13:01:44,841] Trial 61 finished with value: 0.7166666666666666 and parameters: {'n_estimators': 502, 'max_depth': 4, 'learning_rate': 0.012326081807893772, 'min_child_weight': 3, 'subsample': 0.8321244982367084, 'colsample_bytree': 0.8460216612524935, 'gamma': 0.1427230136321195, 'reg_alpha': 0.6943683012718195, 'reg_lambda': 0.5579345571374782}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  63%|██████▎   | 63/100 [03:32<02:03,  3.33s/it]

[I 2025-11-16 13:01:47,399] Trial 62 finished with value: 0.7113333333333333 and parameters: {'n_estimators': 442, 'max_depth': 4, 'learning_rate': 0.009606000321028966, 'min_child_weight': 3, 'subsample': 0.7984686717231692, 'colsample_bytree': 0.8517752054486154, 'gamma': 0.16056104147916736, 'reg_alpha': 0.70108233537612, 'reg_lambda': 0.549674021624933}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  64%|██████▍   | 64/100 [03:34<01:46,  2.95s/it]

[I 2025-11-16 13:01:49,457] Trial 63 finished with value: 0.7140000000000001 and parameters: {'n_estimators': 381, 'max_depth': 4, 'learning_rate': 0.018432194961202953, 'min_child_weight': 3, 'subsample': 0.8704710743156483, 'colsample_bytree': 0.8357495106493481, 'gamma': 0.17688288166650395, 'reg_alpha': 0.5844417467539109, 'reg_lambda': 0.5955117534450067}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  65%|██████▌   | 65/100 [03:37<01:45,  3.01s/it]

[I 2025-11-16 13:01:52,617] Trial 64 finished with value: 0.716 and parameters: {'n_estimators': 590, 'max_depth': 4, 'learning_rate': 0.012390279480102195, 'min_child_weight': 2, 'subsample': 0.8320429187993613, 'colsample_bytree': 0.8186359932385477, 'gamma': 0.12791594343646293, 'reg_alpha': 0.5312159707437094, 'reg_lambda': 0.6898194019864907}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  66%|██████▌   | 66/100 [03:39<01:33,  2.76s/it]

[I 2025-11-16 13:01:54,785] Trial 65 finished with value: 0.7153333333333334 and parameters: {'n_estimators': 577, 'max_depth': 3, 'learning_rate': 0.012180064360140859, 'min_child_weight': 1, 'subsample': 0.7774357189870819, 'colsample_bytree': 0.8192641949279302, 'gamma': 0.12234787009607341, 'reg_alpha': 0.5023802623319821, 'reg_lambda': 0.6692461861389322}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  67%|██████▋   | 67/100 [03:42<01:26,  2.63s/it]

[I 2025-11-16 13:01:57,103] Trial 66 finished with value: 0.7146666666666667 and parameters: {'n_estimators': 645, 'max_depth': 3, 'learning_rate': 0.01192578187570969, 'min_child_weight': 1, 'subsample': 0.783258248000072, 'colsample_bytree': 0.7740621299756075, 'gamma': 0.1100568025799451, 'reg_alpha': 0.5234221991086454, 'reg_lambda': 0.668675919724484}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  68%|██████▊   | 68/100 [03:44<01:20,  2.50s/it]

[I 2025-11-16 13:01:59,315] Trial 67 finished with value: 0.708 and parameters: {'n_estimators': 586, 'max_depth': 3, 'learning_rate': 0.006270134520467463, 'min_child_weight': 1, 'subsample': 0.9293565041508393, 'colsample_bytree': 0.8148800904347624, 'gamma': 0.2998569679990114, 'reg_alpha': 0.4699292799326525, 'reg_lambda': 0.7091724961168107}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  69%|██████▉   | 69/100 [03:46<01:12,  2.33s/it]

[I 2025-11-16 13:02:01,245] Trial 68 finished with value: 0.7120000000000001 and parameters: {'n_estimators': 527, 'max_depth': 3, 'learning_rate': 0.007894312135547189, 'min_child_weight': 2, 'subsample': 0.6016114561179826, 'colsample_bytree': 0.794759347326737, 'gamma': 0.09325477211133965, 'reg_alpha': 0.3974743122105542, 'reg_lambda': 0.6351612184998159}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  70%|███████   | 70/100 [03:49<01:23,  2.77s/it]

[I 2025-11-16 13:02:05,036] Trial 69 finished with value: 0.7166666666666667 and parameters: {'n_estimators': 696, 'max_depth': 4, 'learning_rate': 0.009662690326071208, 'min_child_weight': 1, 'subsample': 0.8271625722902779, 'colsample_bytree': 0.7408116295166088, 'gamma': 0.13076094812374017, 'reg_alpha': 0.5157704440587663, 'reg_lambda': 0.8000467561662015}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  71%|███████   | 71/100 [03:52<01:19,  2.73s/it]

[I 2025-11-16 13:02:07,676] Trial 70 finished with value: 0.716 and parameters: {'n_estimators': 797, 'max_depth': 3, 'learning_rate': 0.011558415710509574, 'min_child_weight': 1, 'subsample': 0.8108629680027658, 'colsample_bytree': 0.7446386388640309, 'gamma': 0.12741175901770166, 'reg_alpha': 0.520446448279311, 'reg_lambda': 0.7960501986242995}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  72%|███████▏  | 72/100 [03:55<01:15,  2.69s/it]

[I 2025-11-16 13:02:10,269] Trial 71 finished with value: 0.7106666666666667 and parameters: {'n_estimators': 773, 'max_depth': 3, 'learning_rate': 0.011775518709825166, 'min_child_weight': 1, 'subsample': 0.825893161496096, 'colsample_bytree': 0.7390671270747877, 'gamma': 0.12559486637892361, 'reg_alpha': 0.5198043809870159, 'reg_lambda': 0.7952176118469841}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  73%|███████▎  | 73/100 [03:57<01:10,  2.60s/it]

[I 2025-11-16 13:02:12,655] Trial 72 finished with value: 0.7126666666666667 and parameters: {'n_estimators': 694, 'max_depth': 3, 'learning_rate': 0.009613351602661822, 'min_child_weight': 1, 'subsample': 0.8097380451453341, 'colsample_bytree': 0.7573141604778253, 'gamma': 0.137289283061513, 'reg_alpha': 0.415888604073484, 'reg_lambda': 0.8574342915248528}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  74%|███████▍  | 74/100 [04:01<01:19,  3.04s/it]

[I 2025-11-16 13:02:16,723] Trial 73 finished with value: 0.7066666666666668 and parameters: {'n_estimators': 825, 'max_depth': 4, 'learning_rate': 0.014106647759033331, 'min_child_weight': 1, 'subsample': 0.796881905265195, 'colsample_bytree': 0.70286049010801, 'gamma': 0.12125886177095221, 'reg_alpha': 0.4507309961641317, 'reg_lambda': 0.7661373849697356}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  75%|███████▌  | 75/100 [04:05<01:18,  3.13s/it]

[I 2025-11-16 13:02:20,066] Trial 74 finished with value: 0.7146666666666667 and parameters: {'n_estimators': 644, 'max_depth': 4, 'learning_rate': 0.015130518447137652, 'min_child_weight': 1, 'subsample': 0.8190802791017305, 'colsample_bytree': 0.7560829278212322, 'gamma': 0.10038056902285243, 'reg_alpha': 0.5405630390899294, 'reg_lambda': 0.9305095116376858}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  76%|███████▌  | 76/100 [04:07<01:13,  3.08s/it]

[I 2025-11-16 13:02:23,016] Trial 75 finished with value: 0.7133333333333333 and parameters: {'n_estimators': 882, 'max_depth': 3, 'learning_rate': 0.011533535730779419, 'min_child_weight': 1, 'subsample': 0.8352846740821235, 'colsample_bytree': 0.7044458472715553, 'gamma': 0.14378163513172917, 'reg_alpha': 0.33363591581784074, 'reg_lambda': 0.8145410235450671}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  77%|███████▋  | 77/100 [04:22<02:32,  6.64s/it]

[I 2025-11-16 13:02:37,952] Trial 76 finished with value: 0.7133333333333333 and parameters: {'n_estimators': 750, 'max_depth': 10, 'learning_rate': 0.010257586088881734, 'min_child_weight': 1, 'subsample': 0.7543673154273784, 'colsample_bytree': 0.725056787977344, 'gamma': 0.13350045123179582, 'reg_alpha': 0.6582664708332411, 'reg_lambda': 0.7295440615873056}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  78%|███████▊  | 78/100 [04:28<02:17,  6.26s/it]

[I 2025-11-16 13:02:43,336] Trial 77 finished with value: 0.7113333333333334 and parameters: {'n_estimators': 699, 'max_depth': 5, 'learning_rate': 0.01262087157591633, 'min_child_weight': 2, 'subsample': 0.8650810030694678, 'colsample_bytree': 0.781243392460999, 'gamma': 0.12913641149526703, 'reg_alpha': 0.6257105005335486, 'reg_lambda': 0.6869272648703811}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  79%|███████▉  | 79/100 [04:30<01:49,  5.19s/it]

[I 2025-11-16 13:02:46,034] Trial 78 finished with value: 0.7113333333333333 and parameters: {'n_estimators': 495, 'max_depth': 4, 'learning_rate': 0.005482238301038545, 'min_child_weight': 2, 'subsample': 0.770882920842835, 'colsample_bytree': 0.8046422113539965, 'gamma': 0.08332650332643361, 'reg_alpha': 0.5085819259207384, 'reg_lambda': 0.7552687251564412}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  80%|████████  | 80/100 [04:33<01:24,  4.24s/it]

[I 2025-11-16 13:02:48,045] Trial 79 finished with value: 0.7139999999999999 and parameters: {'n_estimators': 582, 'max_depth': 3, 'learning_rate': 0.007791699023697627, 'min_child_weight': 1, 'subsample': 0.8492797240507075, 'colsample_bytree': 0.6618196681636508, 'gamma': 0.1139200171239925, 'reg_alpha': 0.48600708443527807, 'reg_lambda': 0.6504753700400512}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  81%|████████  | 81/100 [04:36<01:13,  3.89s/it]

[I 2025-11-16 13:02:51,135] Trial 80 finished with value: 0.7126666666666666 and parameters: {'n_estimators': 546, 'max_depth': 4, 'learning_rate': 0.017387600026201447, 'min_child_weight': 1, 'subsample': 0.8122153488641641, 'colsample_bytree': 0.82324382543368, 'gamma': 0.16296090081780046, 'reg_alpha': 0.574680415129687, 'reg_lambda': 0.8813692010172451}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  82%|████████▏ | 82/100 [04:39<01:09,  3.84s/it]

[I 2025-11-16 13:02:54,854] Trial 81 finished with value: 0.7153333333333334 and parameters: {'n_estimators': 730, 'max_depth': 4, 'learning_rate': 0.008998990124307047, 'min_child_weight': 2, 'subsample': 0.8460640266685521, 'colsample_bytree': 0.7373942822541789, 'gamma': 0.15239670529668353, 'reg_alpha': 0.7114603325773022, 'reg_lambda': 0.7166050150783503}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  83%|████████▎ | 83/100 [04:44<01:08,  4.04s/it]

[I 2025-11-16 13:02:59,355] Trial 82 finished with value: 0.7166666666666666 and parameters: {'n_estimators': 883, 'max_depth': 4, 'learning_rate': 0.007110217508085674, 'min_child_weight': 2, 'subsample': 0.8313781961113941, 'colsample_bytree': 0.7371220718161635, 'gamma': 0.15111097752463368, 'reg_alpha': 0.6211513911697042, 'reg_lambda': 0.7223624623820574}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  84%|████████▍ | 84/100 [04:51<01:17,  4.87s/it]

[I 2025-11-16 13:03:06,164] Trial 83 finished with value: 0.7133333333333333 and parameters: {'n_estimators': 891, 'max_depth': 5, 'learning_rate': 0.00582876132673654, 'min_child_weight': 2, 'subsample': 0.8261834107146308, 'colsample_bytree': 0.7502994551546287, 'gamma': 0.14708004621151116, 'reg_alpha': 0.6171437449951689, 'reg_lambda': 0.7994852361915604}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  85%|████████▌ | 85/100 [04:55<01:11,  4.75s/it]

[I 2025-11-16 13:03:10,636] Trial 84 finished with value: 0.7153333333333333 and parameters: {'n_estimators': 844, 'max_depth': 4, 'learning_rate': 0.006859470802849974, 'min_child_weight': 1, 'subsample': 0.7837035612088756, 'colsample_bytree': 0.7161250905915437, 'gamma': 0.12309398033089806, 'reg_alpha': 0.6705296682773164, 'reg_lambda': 0.6231155183552837}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  86%|████████▌ | 86/100 [04:58<00:59,  4.28s/it]

[I 2025-11-16 13:03:13,826] Trial 85 finished with value: 0.7106666666666668 and parameters: {'n_estimators': 945, 'max_depth': 3, 'learning_rate': 0.010628808442586072, 'min_child_weight': 3, 'subsample': 0.8356722285067109, 'colsample_bytree': 0.7893888248609421, 'gamma': 0.1004952415008046, 'reg_alpha': 0.5416209607719044, 'reg_lambda': 0.8194784755029072}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  87%|████████▋ | 87/100 [05:03<00:56,  4.38s/it]

[I 2025-11-16 13:03:18,422] Trial 86 finished with value: 0.7053333333333333 and parameters: {'n_estimators': 940, 'max_depth': 4, 'learning_rate': 0.015146854318271516, 'min_child_weight': 2, 'subsample': 0.8031982020821143, 'colsample_bytree': 0.7742137862785874, 'gamma': 0.13928930940141485, 'reg_alpha': 0.7546462670075395, 'reg_lambda': 0.7786250410585024}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  88%|████████▊ | 88/100 [05:06<00:46,  3.90s/it]

[I 2025-11-16 13:03:21,223] Trial 87 finished with value: 0.71 and parameters: {'n_estimators': 790, 'max_depth': 3, 'learning_rate': 0.012784617909834891, 'min_child_weight': 2, 'subsample': 0.7939764066694217, 'colsample_bytree': 0.8348513340338899, 'gamma': 0.1327208685802193, 'reg_alpha': 0.43294379332275745, 'reg_lambda': 0.5798513031993978}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  89%|████████▉ | 89/100 [05:08<00:38,  3.46s/it]

[I 2025-11-16 13:03:23,661] Trial 88 finished with value: 0.7066666666666667 and parameters: {'n_estimators': 515, 'max_depth': 4, 'learning_rate': 0.004229637710827294, 'min_child_weight': 3, 'subsample': 0.8143625569090271, 'colsample_bytree': 0.6309882926290205, 'gamma': 0.15702970009180223, 'reg_alpha': 0.47124765085891945, 'reg_lambda': 0.844586157098805}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  90%|█████████ | 90/100 [05:13<00:40,  4.00s/it]

[I 2025-11-16 13:03:28,913] Trial 89 finished with value: 0.714 and parameters: {'n_estimators': 631, 'max_depth': 5, 'learning_rate': 0.007104841052925066, 'min_child_weight': 1, 'subsample': 0.8844340081006026, 'colsample_bytree': 0.680329283360092, 'gamma': 0.10542600016096708, 'reg_alpha': 0.5774424959181933, 'reg_lambda': 0.6892263811907366}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  91%|█████████ | 91/100 [05:24<00:53,  5.91s/it]

[I 2025-11-16 13:03:39,270] Trial 90 finished with value: 0.7146666666666667 and parameters: {'n_estimators': 472, 'max_depth': 9, 'learning_rate': 0.008560203320822166, 'min_child_weight': 2, 'subsample': 0.857559691303406, 'colsample_bytree': 0.8130396171813415, 'gamma': 0.11144764107282723, 'reg_alpha': 0.5978006974962405, 'reg_lambda': 0.7385922788430584}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  92%|█████████▏| 92/100 [05:28<00:42,  5.27s/it]

[I 2025-11-16 13:03:43,055] Trial 91 finished with value: 0.7166666666666667 and parameters: {'n_estimators': 729, 'max_depth': 4, 'learning_rate': 0.009499414786248894, 'min_child_weight': 2, 'subsample': 0.8422016847274884, 'colsample_bytree': 0.7311325360956022, 'gamma': 0.15276037775084997, 'reg_alpha': 0.7130400255365121, 'reg_lambda': 0.7127880537663065}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  93%|█████████▎| 93/100 [05:31<00:33,  4.82s/it]

[I 2025-11-16 13:03:46,810] Trial 92 finished with value: 0.712 and parameters: {'n_estimators': 756, 'max_depth': 4, 'learning_rate': 0.010015332555052518, 'min_child_weight': 2, 'subsample': 0.8266281935303975, 'colsample_bytree': 0.7155828300725174, 'gamma': 0.14497556351105123, 'reg_alpha': 0.6774670457565416, 'reg_lambda': 0.6630083039727117}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  94%|█████████▍| 94/100 [05:41<00:38,  6.39s/it]

[I 2025-11-16 13:03:56,860] Trial 93 finished with value: 0.7053333333333333 and parameters: {'n_estimators': 859, 'max_depth': 7, 'learning_rate': 0.010974171985079465, 'min_child_weight': 3, 'subsample': 0.8670366601790883, 'colsample_bytree': 0.7315908508319298, 'gamma': 0.13034103554168658, 'reg_alpha': 0.6159072270579016, 'reg_lambda': 0.7093262725344595}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  95%|█████████▌| 95/100 [05:45<00:27,  5.60s/it]

[I 2025-11-16 13:04:00,618] Trial 94 finished with value: 0.7166666666666666 and parameters: {'n_estimators': 675, 'max_depth': 4, 'learning_rate': 0.00794332866619542, 'min_child_weight': 2, 'subsample': 0.8411666563761289, 'colsample_bytree': 0.7655178756997632, 'gamma': 0.1654022429629866, 'reg_alpha': 0.6521776302200517, 'reg_lambda': 0.7577735112841171}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  96%|█████████▌| 96/100 [05:52<00:24,  6.04s/it]

[I 2025-11-16 13:04:07,681] Trial 95 finished with value: 0.7100000000000001 and parameters: {'n_estimators': 727, 'max_depth': 5, 'learning_rate': 0.004808514757341435, 'min_child_weight': 2, 'subsample': 0.8379519505889812, 'colsample_bytree': 0.7619241594603159, 'gamma': 0.16753351923497345, 'reg_alpha': 0.6512775637469406, 'reg_lambda': 0.765198162993078}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  97%|█████████▋| 97/100 [05:56<00:16,  5.39s/it]

[I 2025-11-16 13:04:11,558] Trial 96 finished with value: 0.7166666666666666 and parameters: {'n_estimators': 676, 'max_depth': 4, 'learning_rate': 0.008145715085644678, 'min_child_weight': 2, 'subsample': 0.8508166602148857, 'colsample_bytree': 0.7401221939003972, 'gamma': 0.1522933599757645, 'reg_alpha': 0.6937431903653012, 'reg_lambda': 0.8631266619531125}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  98%|█████████▊| 98/100 [06:00<00:09,  4.99s/it]

[I 2025-11-16 13:04:15,626] Trial 97 finished with value: 0.7126666666666666 and parameters: {'n_estimators': 685, 'max_depth': 4, 'learning_rate': 0.006072205250493584, 'min_child_weight': 2, 'subsample': 0.851008465303482, 'colsample_bytree': 0.750380241895864, 'gamma': 0.16310332256256216, 'reg_alpha': 0.7276563763688106, 'reg_lambda': 0.9193081197022341}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333:  99%|█████████▉| 99/100 [06:06<00:05,  5.26s/it]

[I 2025-11-16 13:04:21,495] Trial 98 finished with value: 0.7140000000000001 and parameters: {'n_estimators': 717, 'max_depth': 5, 'learning_rate': 0.007267231922003335, 'min_child_weight': 3, 'subsample': 0.860227812387218, 'colsample_bytree': 0.7257860983889316, 'gamma': 0.18162382264924828, 'reg_alpha': 0.7039892764667254, 'reg_lambda': 0.8618905951430216}. Best is trial 51 with value: 0.7173333333333333.


Best trial: 51. Best value: 0.717333: 100%|██████████| 100/100 [06:10<00:00,  3.71s/it]


[I 2025-11-16 13:04:25,703] Trial 99 finished with value: 0.7066666666666667 and parameters: {'n_estimators': 676, 'max_depth': 4, 'learning_rate': 0.002946106630437834, 'min_child_weight': 3, 'subsample': 0.8756277477817229, 'colsample_bytree': 0.7719106008897197, 'gamma': 0.15442928290778057, 'reg_alpha': 0.7479546421571341, 'reg_lambda': 0.822483080271052}. Best is trial 51 with value: 0.7173333333333333.

Best XGBoost accuracy: 0.7173
Best parameters: {'n_estimators': 516, 'max_depth': 4, 'learning_rate': 0.011966949223279103, 'min_child_weight': 3, 'subsample': 0.8391969393161539, 'colsample_bytree': 0.8394626350861055, 'gamma': 0.14762912463169817, 'reg_alpha': 0.6726765817268627, 'reg_lambda': 0.5758401882979282}

XGBoost CV Accuracy: 0.7167 ± 0.0264
XGBoost F1-Macro: 0.5344
CPU times: total: 4min 16s
Wall time: 6min 25s


### 4.2 LightGBM with Hyperparameter Tuning

In [8]:
%%time

print("\n" + "="*80)
print("Training LightGBM with Hyperparameter Tuning")
print("="*80)

if OPTUNA_AVAILABLE:
    def objective_lgb(trial):
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'num_leaves': trial.suggest_int('num_leaves', 15, 255),
            'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
            'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 50),
            'feature_fraction': trial.suggest_float('feature_fraction', 0.6, 1.0),
            'bagging_fraction': trial.suggest_float('bagging_fraction', 0.6, 1.0),
            'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
            'lambda_l1': trial.suggest_float('lambda_l1', 0, 10),
            'lambda_l2': trial.suggest_float('lambda_l2', 0, 10),
            'random_state': RANDOM_STATE,
            'device': 'gpu',
            'verbose': -1
        }
        
        model = lgb.LGBMClassifier(**params)
        scores = cross_val_score(model, X, y_encoded, cv=skf, scoring='accuracy', n_jobs=-1)
        return scores.mean()
    
    study_lgb = optuna.create_study(direction='maximize', study_name='lightgbm')
    study_lgb.optimize(objective_lgb, n_trials=100, show_progress_bar=True)
    
    print(f"\nBest LightGBM accuracy: {study_lgb.best_value:.4f}")
    print(f"Best parameters: {study_lgb.best_params}")
    
    lgb_model = lgb.LGBMClassifier(**study_lgb.best_params)
    
else:
    param_dist = {
        'n_estimators': [100, 300, 500, 700, 1000],
        'num_leaves': [15, 31, 63, 127, 255],
        'learning_rate': [0.001, 0.01, 0.05, 0.1],
        'min_data_in_leaf': [10, 20, 30, 50],
        'feature_fraction': [0.6, 0.7, 0.8, 0.9, 1.0],
        'bagging_fraction': [0.6, 0.7, 0.8, 0.9, 1.0],
        'lambda_l1': [0, 0.1, 1, 10],
        'lambda_l2': [0, 0.1, 1, 10]
    }
    lgb_base = lgb.LGBMClassifier(random_state=RANDOM_STATE, device='gpu', verbose=-1)
    lgb_base = lgb.LGBMClassifier(random_state=RANDOM_STATE, verbose=-1)
    random_search = RandomizedSearchCV(lgb_base, param_dist, n_iter=100, cv=5,
                                      scoring='accuracy', n_jobs=-1, random_state=RANDOM_STATE)
    random_search.fit(X, y_encoded)
    
    print(f"\nBest LightGBM accuracy: {random_search.best_score_:.4f}")
    print(f"Best parameters: {random_search.best_params_}")
    
    lgb_model = random_search.best_estimator_

cv_results = cross_validate(lgb_model, X, y_encoded, cv=skf,
                           scoring=['accuracy', 'f1_macro'], return_train_score=True)

model_results['LightGBM'] = {
    'cv_accuracy': cv_results['test_accuracy'].mean(),
    'cv_std': cv_results['test_accuracy'].std(),
    'f1_macro': cv_results['test_f1_macro'].mean(),
    'model': lgb_model
}

print(f"\nLightGBM CV Accuracy: {model_results['LightGBM']['cv_accuracy']:.4f} ± {model_results['LightGBM']['cv_std']:.4f}")
print(f"LightGBM F1-Macro: {model_results['LightGBM']['f1_macro']:.4f}")

[I 2025-11-16 13:59:14,616] A new study created in memory with name: lightgbm



Training LightGBM with Hyperparameter Tuning


  0%|          | 0/100 [00:00<?, ?it/s]

Best trial: 0. Best value: 0.672667:   1%|          | 1/100 [00:54<1:29:24, 54.19s/it]

[I 2025-11-16 14:00:08,816] Trial 0 finished with value: 0.6726666666666666 and parameters: {'n_estimators': 498, 'num_leaves': 166, 'learning_rate': 0.0011151356450256468, 'min_data_in_leaf': 45, 'feature_fraction': 0.9504970109595525, 'bagging_fraction': 0.992082348903307, 'bagging_freq': 5, 'lambda_l1': 9.42243043792762, 'lambda_l2': 9.46289472157861}. Best is trial 0 with value: 0.6726666666666666.


Best trial: 0. Best value: 0.672667:   1%|          | 1/100 [03:41<6:04:49, 221.11s/it]

[W 2025-11-16 14:02:55,722] Trial 1 failed with parameters: {'n_estimators': 838, 'num_leaves': 53, 'learning_rate': 0.004747461821408483, 'min_data_in_leaf': 19, 'feature_fraction': 0.7169034614622614, 'bagging_fraction': 0.9452237521134776, 'bagging_freq': 3, 'lambda_l1': 3.059142688617631, 'lambda_l2': 2.6874401767788045} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\HP\anaconda3\envs\Vision\Lib\site-packages\optuna\study\_optimize.py", line 205, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "<timed exec>", line 23, in objective_lgb
  File "c:\Users\HP\anaconda3\envs\Vision\Lib\site-packages\sklearn\utils\_param_validation.py", line 218, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\HP\anaconda3\envs\Vision\Lib\site-packages\sklearn\model_selection\_validation.py", line 677, in cross_val_score
    cv_results = cross_validate(
         




KeyboardInterrupt: 

### 4.3 CatBoost with Hyperparameter Tuning

In [None]:
%%time

print("\n" + "="*80)
print("Training CatBoost with Hyperparameter Tuning")
print("="*80)

if OPTUNA_AVAILABLE:
    def objective_cat(trial):
        params = {
            'iterations': trial.suggest_int('iterations', 500, 3000),
            'depth': trial.suggest_int('depth', 4, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
            'l2_leaf_reg': trial.suggest_int('l2_leaf_reg', 1, 9),
            'border_count': trial.suggest_int('border_count', 32, 254),
            'random_state': RANDOM_STATE,
            'task_type': 'GPU',
            'verbose': 0
        }
        
        model = CatBoostClassifier(**params)
        scores = cross_val_score(model, X, y_encoded, cv=skf, scoring='accuracy', n_jobs=-1)
        return scores.mean()
    
    study_cat = optuna.create_study(direction='maximize', study_name='catboost')
    study_cat.optimize(objective_cat, n_trials=50, show_progress_bar=True)  # Fewer trials due to longer training
    
    print(f"\nBest CatBoost accuracy: {study_cat.best_value:.4f}")
    print(f"Best parameters: {study_cat.best_params}")
    
    cat_model = CatBoostClassifier(**study_cat.best_params)
    
else:
    param_dist = {
        'iterations': [500, 1000, 2000, 3000],
        'depth': [4, 6, 8, 10],
        'learning_rate': [0.001, 0.01, 0.05, 0.1],
        'l2_leaf_reg': [1, 3, 5, 7, 9],
        'border_count': [32, 64, 128, 254]
    }
    cat_base = CatBoostClassifier(random_state=RANDOM_STATE, task_type='GPU', verbose=0)
    cat_base = CatBoostClassifier(random_state=RANDOM_STATE, verbose=0)
    random_search = RandomizedSearchCV(cat_base, param_dist, n_iter=50, cv=5,
                                      scoring='accuracy', n_jobs=-1, random_state=RANDOM_STATE)
    random_search.fit(X, y_encoded)
    
    print(f"\nBest CatBoost accuracy: {random_search.best_score_:.4f}")
    print(f"Best parameters: {random_search.best_params_}")
    
    cat_model = random_search.best_estimator_

cv_results = cross_validate(cat_model, X, y_encoded, cv=skf,
                           scoring=['accuracy', 'f1_macro'], return_train_score=True)

model_results['CatBoost'] = {
    'cv_accuracy': cv_results['test_accuracy'].mean(),
    'cv_std': cv_results['test_accuracy'].std(),
    'f1_macro': cv_results['test_f1_macro'].mean(),
    'model': cat_model
}

print(f"\nCatBoost CV Accuracy: {model_results['CatBoost']['cv_accuracy']:.4f} ± {model_results['CatBoost']['cv_std']:.4f}")
print(f"CatBoost F1-Macro: {model_results['CatBoost']['f1_macro']:.4f}")

### 4.4 Random Forest (Deep)

In [None]:
%%time

print("\n" + "="*80)
print("Training Random Forest with Hyperparameter Tuning")
print("="*80)

param_grid = {
    'n_estimators': [500, 1000, 1500],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}

rf_base = RandomForestClassifier(random_state=RANDOM_STATE, n_jobs=-1)
random_search = RandomizedSearchCV(rf_base, param_grid, n_iter=50, cv=5,
                                  scoring='accuracy', n_jobs=-1, random_state=RANDOM_STATE)
random_search.fit(X, y_encoded)

print(f"\nBest Random Forest accuracy: {random_search.best_score_:.4f}")
print(f"Best parameters: {random_search.best_params_}")

rf_model = random_search.best_estimator_

cv_results = cross_validate(rf_model, X, y_encoded, cv=skf,
                           scoring=['accuracy', 'f1_macro'], return_train_score=True)

model_results['RandomForest'] = {
    'cv_accuracy': cv_results['test_accuracy'].mean(),
    'cv_std': cv_results['test_accuracy'].std(),
    'f1_macro': cv_results['test_f1_macro'].mean(),
    'model': rf_model
}

print(f"\nRandom Forest CV Accuracy: {model_results['RandomForest']['cv_accuracy']:.4f} ± {model_results['RandomForest']['cv_std']:.4f}")
print(f"Random Forest F1-Macro: {model_results['RandomForest']['f1_macro']:.4f}")

### 4.5 Extra Trees Classifier

In [None]:
%%time

print("\n" + "="*80)
print("Training Extra Trees with Hyperparameter Tuning")
print("="*80)

param_grid = {
    'n_estimators': [500, 1000, 1500],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}

et_base = ExtraTreesClassifier(random_state=RANDOM_STATE, n_jobs=-1)
random_search = RandomizedSearchCV(et_base, param_grid, n_iter=50, cv=5,
                                  scoring='accuracy', n_jobs=-1, random_state=RANDOM_STATE)
random_search.fit(X, y_encoded)

print(f"\nBest Extra Trees accuracy: {random_search.best_score_:.4f}")
print(f"Best parameters: {random_search.best_params_}")

et_model = random_search.best_estimator_

cv_results = cross_validate(et_model, X, y_encoded, cv=skf,
                           scoring=['accuracy', 'f1_macro'], return_train_score=True)

model_results['ExtraTrees'] = {
    'cv_accuracy': cv_results['test_accuracy'].mean(),
    'cv_std': cv_results['test_accuracy'].std(),
    'f1_macro': cv_results['test_f1_macro'].mean(),
    'model': et_model
}

print(f"\nExtra Trees CV Accuracy: {model_results['ExtraTrees']['cv_accuracy']:.4f} ± {model_results['ExtraTrees']['cv_std']:.4f}")
print(f"Extra Trees F1-Macro: {model_results['ExtraTrees']['f1_macro']:.4f}")

### 4.6 Neural Network (Deep)

In [7]:
%%time

print("\n" + "="*80)
print("Training Neural Network with Hyperparameter Tuning")
print("="*80)

param_grid = {
    'hidden_layer_sizes': [(256, 128, 64, 32), (512, 256, 128), (1024, 512, 256)],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.0001, 0.001, 0.01],
    'batch_size': [32, 64, 128]
}

nn_base = MLPClassifier(max_iter=1000, early_stopping=True, random_state=RANDOM_STATE)
random_search = RandomizedSearchCV(nn_base, param_grid, n_iter=30, cv=5,
                                  scoring='accuracy', n_jobs=-1, random_state=RANDOM_STATE)
random_search.fit(X, y_encoded)

print(f"\nBest Neural Network accuracy: {random_search.best_score_:.4f}")
print(f"Best parameters: {random_search.best_params_}")

nn_model = random_search.best_estimator_

cv_results = cross_validate(nn_model, X, y_encoded, cv=skf,
                           scoring=['accuracy', 'f1_macro'], return_train_score=True)

model_results['NeuralNetwork'] = {
    'cv_accuracy': cv_results['test_accuracy'].mean(),
    'cv_std': cv_results['test_accuracy'].std(),
    'f1_macro': cv_results['test_f1_macro'].mean(),
    'model': nn_model
}

print(f"\nNeural Network CV Accuracy: {model_results['NeuralNetwork']['cv_accuracy']:.4f} ± {model_results['NeuralNetwork']['cv_std']:.4f}")
print(f"Neural Network F1-Macro: {model_results['NeuralNetwork']['f1_macro']:.4f}")


Training Neural Network with Hyperparameter Tuning
CPU times: total: 2.17 s
Wall time: 3min 3s


KeyboardInterrupt: 

## 5. Base Model Comparison

In [None]:
# Create comparison dataframe
comparison_df = pd.DataFrame([
    {
        'Model': name,
        'CV Accuracy': f"{results['cv_accuracy']:.4f}",
        'CV Std': f"{results['cv_std']:.4f}",
        'F1-Macro': f"{results['f1_macro']:.4f}"
    }
    for name, results in model_results.items()
]).sort_values('CV Accuracy', ascending=False)

print("\n" + "="*80)
print("BASE MODEL PERFORMANCE COMPARISON")
print("="*80)
print(comparison_df.to_string(index=False))

# Visualize results
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Accuracy comparison
models = list(model_results.keys())
accuracies = [model_results[m]['cv_accuracy'] for m in models]
stds = [model_results[m]['cv_std'] for m in models]

axes[0].barh(models, accuracies, xerr=stds, capsize=5)
axes[0].set_xlabel('CV Accuracy')
axes[0].set_title('Base Model Accuracy Comparison')
axes[0].set_xlim([min(accuracies) - 0.05, max(accuracies) + 0.05])

# F1-Score comparison
f1_scores = [model_results[m]['f1_macro'] for m in models]
axes[1].barh(models, f1_scores)
axes[1].set_xlabel('F1-Macro Score')
axes[1].set_title('Base Model F1-Score Comparison')
axes[1].set_xlim([min(f1_scores) - 0.05, max(f1_scores) + 0.05])

plt.tight_layout()
plt.savefig('visualizations/base_model_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nVisualization saved to: visualizations/base_model_comparison.png")

## 6. Layer 2: Stacking Ensemble

In [None]:
%%time

print("\n" + "="*80)
print("Building Stacked Ensemble")
print("="*80)

# Select top 5 base models
sorted_models = sorted(model_results.items(), key=lambda x: x[1]['cv_accuracy'], reverse=True)
top_5_models = sorted_models[:5]

print("\nTop 5 base models for stacking:")
for i, (name, results) in enumerate(top_5_models, 1):
    print(f"{i}. {name}: {results['cv_accuracy']:.4f}")

# Create estimators list for stacking
estimators = [(name, results['model']) for name, results in top_5_models]

# Meta-model 1: Logistic Regression
print("\n" + "-"*80)
print("Stacking with Logistic Regression Meta-Model")
print("-"*80)

stack_lr = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(max_iter=1000, random_state=RANDOM_STATE),
    cv=5,
    n_jobs=-1
)

cv_results_lr = cross_validate(stack_lr, X, y_encoded, cv=skf,
                              scoring=['accuracy', 'f1_macro'], return_train_score=True)

model_results['Stack_LogReg'] = {
    'cv_accuracy': cv_results_lr['test_accuracy'].mean(),
    'cv_std': cv_results_lr['test_accuracy'].std(),
    'f1_macro': cv_results_lr['test_f1_macro'].mean(),
    'model': stack_lr
}

print(f"Stacking (LogReg) CV Accuracy: {model_results['Stack_LogReg']['cv_accuracy']:.4f} ± {model_results['Stack_LogReg']['cv_std']:.4f}")
print(f"Stacking (LogReg) F1-Macro: {model_results['Stack_LogReg']['f1_macro']:.4f}")

# Meta-model 2: XGBoost
print("\n" + "-"*80)
print("Stacking with XGBoost Meta-Model")
print("-"*80)

stack_xgb = StackingClassifier(
    estimators=estimators,
    final_estimator=xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=RANDOM_STATE),
    cv=5,
    n_jobs=-1
)

cv_results_xgb = cross_validate(stack_xgb, X, y_encoded, cv=skf,
                               scoring=['accuracy', 'f1_macro'], return_train_score=True)

model_results['Stack_XGB'] = {
    'cv_accuracy': cv_results_xgb['test_accuracy'].mean(),
    'cv_std': cv_results_xgb['test_accuracy'].std(),
    'f1_macro': cv_results_xgb['test_f1_macro'].mean(),
    'model': stack_xgb
}

print(f"Stacking (XGB) CV Accuracy: {model_results['Stack_XGB']['cv_accuracy']:.4f} ± {model_results['Stack_XGB']['cv_std']:.4f}")
print(f"Stacking (XGB) F1-Macro: {model_results['Stack_XGB']['f1_macro']:.4f}")

## 7. Voting Ensemble

In [None]:
%%time

print("\n" + "="*80)
print("Building Voting Ensemble")
print("="*80)

# Soft voting with top 5 models
voting_clf = VotingClassifier(
    estimators=estimators,
    voting='soft',
    n_jobs=-1
)

cv_results_vote = cross_validate(voting_clf, X, y_encoded, cv=skf,
                                scoring=['accuracy', 'f1_macro'], return_train_score=True)

model_results['Voting_Soft'] = {
    'cv_accuracy': cv_results_vote['test_accuracy'].mean(),
    'cv_std': cv_results_vote['test_accuracy'].std(),
    'f1_macro': cv_results_vote['test_f1_macro'].mean(),
    'model': voting_clf
}

print(f"Voting (Soft) CV Accuracy: {model_results['Voting_Soft']['cv_accuracy']:.4f} ± {model_results['Voting_Soft']['cv_std']:.4f}")
print(f"Voting (Soft) F1-Macro: {model_results['Voting_Soft']['f1_macro']:.4f}")

## 8. Final Model Selection and Comparison

In [None]:
# Final comparison of all models
final_comparison_df = pd.DataFrame([
    {
        'Model': name,
        'CV Accuracy': results['cv_accuracy'],
        'CV Std': results['cv_std'],
        'F1-Macro': results['f1_macro']
    }
    for name, results in model_results.items()
]).sort_values('CV Accuracy', ascending=False)

print("\n" + "="*80)
print("FINAL MODEL PERFORMANCE COMPARISON (ALL MODELS)")
print("="*80)
print(final_comparison_df.to_string(index=False))

# Select best model
best_model_name = final_comparison_df.iloc[0]['Model']
best_model = model_results[best_model_name]['model']
best_accuracy = final_comparison_df.iloc[0]['CV Accuracy']

print(f"\n{'='*80}")
print(f"BEST MODEL: {best_model_name}")
print(f"Best CV Accuracy: {best_accuracy:.4f}")
print(f"{'='*80}")

# Visualize final comparison
plt.figure(figsize=(12, 8))
plt.barh(final_comparison_df['Model'], final_comparison_df['CV Accuracy'], 
         xerr=final_comparison_df['CV Std'], capsize=5)
plt.xlabel('CV Accuracy', fontsize=12)
plt.ylabel('Model', fontsize=12)
plt.title('Final Model Performance Comparison', fontsize=14, fontweight='bold')
plt.axvline(x=best_accuracy, color='r', linestyle='--', alpha=0.7, label=f'Best: {best_accuracy:.4f}')
plt.legend()
plt.tight_layout()
plt.savefig('visualizations/final_model_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nVisualization saved to: visualizations/final_model_comparison.png")

## 9. Train Final Model on Full Dataset

In [None]:
%%time

print("\n" + "="*80)
print(f"Training {best_model_name} on Full Dataset")
print("="*80)

# Train on full training data
best_model.fit(X, y_encoded)

# Training predictions
train_pred = best_model.predict(X)
train_accuracy = accuracy_score(y_encoded, train_pred)

print(f"\nTraining Accuracy: {train_accuracy:.4f}")
print(f"\nClassification Report (Training):")
print(classification_report(y_encoded, train_pred, target_names=le.classes_))

# Confusion matrix
cm = confusion_matrix(y_encoded, train_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=le.classes_, yticklabels=le.classes_)
plt.title(f'{best_model_name} - Confusion Matrix (Training)', fontsize=14, fontweight='bold')
plt.ylabel('Actual', fontsize=12)
plt.xlabel('Predicted', fontsize=12)
plt.tight_layout()
plt.savefig('visualizations/confusion_matrix_training.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nConfusion matrix saved to: visualizations/confusion_matrix_training.png")

## 10. Generate Test Predictions

In [None]:
%%time

print("\n" + "="*80)
print("Generating Test Predictions")
print("="*80)

# Make predictions on test set
test_pred_encoded = best_model.predict(X_test)
test_pred = le.inverse_transform(test_pred_encoded)

# Get prediction probabilities
test_pred_proba = best_model.predict_proba(X_test)

print(f"\nTest predictions generated: {len(test_pred)}")
print(f"\nPrediction distribution:")
print(pd.Series(test_pred).value_counts())

# Create submission file
submission = pd.DataFrame({
    'id': test_ids,
    'drug_category': test_pred
})

submission_filename = f"submission_{best_model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
submission.to_csv(submission_filename, index=False)

print(f"\nSubmission file created: {submission_filename}")
print(f"\nFirst 10 predictions:")
print(submission.head(10))

## 11. Save Models

In [None]:
# Create models directory
os.makedirs('models', exist_ok=True)

# Save best model
best_model_filename = f"models/best_model_{best_model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pkl"
joblib.dump(best_model, best_model_filename)
print(f"Best model saved: {best_model_filename}")

# Save label encoder
le_filename = "models/label_encoder.pkl"
joblib.dump(le, le_filename)
print(f"Label encoder saved: {le_filename}")

# Save all model results
results_filename = "models/model_results.pkl"
joblib.dump(model_results, results_filename)
print(f"All model results saved: {results_filename}")

# Save comparison dataframe
final_comparison_df.to_csv('models/model_comparison.csv', index=False)
print("Model comparison saved: models/model_comparison.csv")

## 12. Summary and Recommendations

In [None]:
print("\n" + "="*80)
print("TRAINING SUMMARY")
print("="*80)

print(f"\nTotal models trained: {len(model_results)}")
print(f"\nBest performing model: {best_model_name}")
print(f"Best CV Accuracy: {best_accuracy:.4f}")
print(f"Best F1-Macro: {final_comparison_df.iloc[0]['F1-Macro']:.4f}")

print(f"\n{'='*80}")
print("TOP 5 MODELS")
print("="*80)
print(final_comparison_df.head().to_string(index=False))

print(f"\n{'='*80}")
print("RECOMMENDATIONS")
print("="*80)
print("\n1. Use the best model for final submission")
print("2. Consider ensemble of top 3-5 models for potentially better results")
print("3. Monitor for overfitting - check train vs validation gap")
print("4. If time permits, run additional Bayesian optimization trials")
print("5. Consider feature engineering if accuracy needs improvement")

print(f"\n{'='*80}")
print("FILES GENERATED")
print("="*80)
print(f"1. Submission: {submission_filename}")
print(f"2. Best model: {best_model_filename}")
print(f"3. Label encoder: {le_filename}")
print(f"4. Model results: {results_filename}")
print("5. Visualizations: visualizations/")
print("6. Model comparison: models/model_comparison.csv")

print(f"\n{'='*80}")
print("TRAINING COMPLETE!")
print("="*80)