### 1: Install Required Libraries

In [1]:
# Install required libraries (run this first)
%pip install optuna xgboost lightgbm catboost scikit-learn imbalanced-learn shap plotly matplotlib seaborn

Collecting numpy (from optuna)
  Downloading numpy-2.2.6-cp313-cp313-win_amd64.whl.metadata (60 kB)
Downloading numpy-2.2.6-cp313-cp313-win_amd64.whl (12.6 MB)
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--


  You can safely remove it manually.
  You can safely remove it manually.


In [1]:

# Import all necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Machine Learning
from sklearn.model_selection import KFold, StratifiedKFold, cross_validate
from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix, matthews_corrcoef, classification_report
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.base import clone

# Imbalanced Learning
from imblearn.ensemble import BalancedRandomForestClassifier, EasyEnsembleClassifier, BalancedBaggingClassifier

# Gradient Boosting
import xgboost as xgb
import lightgbm as lgb
import catboost as cb

# Hyperparameter Optimization
import optuna
from optuna.samplers import TPESampler

# Feature Selection
from sklearn.feature_selection import SelectKBest, mutual_info_classif, RFECV

# Visualization
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

# Interpretation
import shap

print("All libraries imported successfully!")

All libraries imported successfully!


### Step 2: Enhanced Data Preprocessing with Advanced Techniques

In [2]:
# Load Training Dataset
data = pd.read_csv('DIA_trainingset_RDKit_descriptors.csv')

# extract features and target variable
X_train = data.iloc[:, 2:]
Y_train = data.iloc[:, 0]

# Load Test Dataset
test_data = pd.read_csv('DIA_testset_RDKit_descriptors.csv')
X_test = test_data.iloc[:, 2:]
Y_test = test_data.iloc[:, 0]

In [3]:
X_train.head()

Unnamed: 0,BalabanJ,BertzCT,Chi0,Chi0n,Chi0v,Chi1,Chi1n,Chi1v,Chi2n,Chi2v,...,fr_sulfide,fr_sulfonamd,fr_sulfone,fr_term_acetylene,fr_tetrazole,fr_thiazole,fr_thiocyan,fr_thiophene,fr_unbrch_alkane,fr_urea
0,1.821,1266.407,22.121,16.781,16.781,14.901,9.203,9.203,6.668,6.668,...,0,0,0,0,0,0,0,0,0,0
1,2.363,490.434,11.707,8.752,9.569,7.592,4.854,5.67,3.545,4.661,...,0,0,0,0,0,0,0,1,0,1
2,3.551,93.092,6.784,5.471,5.471,3.417,2.42,2.42,2.82,2.82,...,0,0,0,0,0,0,0,0,0,0
3,2.076,1053.003,21.836,16.995,16.995,14.274,9.926,9.926,7.662,7.662,...,0,0,0,0,0,0,0,0,0,0
4,2.888,549.823,14.629,9.746,9.746,8.752,5.04,5.04,3.601,3.601,...,0,0,0,0,0,0,0,0,0,0


In [4]:
X_test.head()

Unnamed: 0,BalabanJ,BertzCT,Chi0,Chi0n,Chi0v,Chi1,Chi1n,Chi1v,Chi2n,Chi2v,...,fr_sulfide,fr_sulfonamd,fr_sulfone,fr_term_acetylene,fr_tetrazole,fr_thiazole,fr_thiocyan,fr_thiophene,fr_unbrch_alkane,fr_urea
0,1.484,743.207,21.466,18.764,18.764,14.292,12.106,12.106,10.736,10.736,...,0,0,0,0,0,0,0,0,0,0
1,1.472,868.947,21.14,16.736,17.553,14.453,10.268,11.084,7.662,8.746,...,0,0,0,0,0,0,0,0,0,0
2,0.837,1409.004,39.189,32.904,32.904,26.011,20.941,20.941,18.816,18.816,...,0,0,0,0,0,0,0,0,0,0
3,2.406,621.298,13.828,10.297,10.297,9.092,5.847,5.847,4.217,4.217,...,0,0,0,0,0,0,0,0,0,0
4,1.32,2127.996,37.955,30.849,31.666,25.91,18.066,19.115,14.93,16.06,...,1,0,0,0,0,0,0,0,0,0


In [5]:
def advanced_preprocessing(X_train, X_test, y_train, method='robust'):
    """
    Advanced preprocessing with multiple scaling options and outlier handling
    
    Parameters:
    - method: 'standard', 'robust', 'minmax', 'quantile'
    """
    from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, QuantileTransformer
    
    scalers = {
        'standard': StandardScaler(),
        'robust': RobustScaler(),
        'minmax': MinMaxScaler(),
        'quantile': QuantileTransformer(output_distribution='normal')
    }
    
    scaler = scalers[method]
    
    # Apply scaling
    X_train_scaled = pd.DataFrame(
        scaler.fit_transform(X_train),
        columns=X_train.columns,
        index=X_train.index
    )
    
    X_test_scaled = pd.DataFrame(
        scaler.transform(X_test),
        columns=X_test.columns,
        index=X_test.index
    )
    
    print(f"Applied {method} scaling")
    return X_train_scaled, X_test_scaled, scaler

def remove_highly_correlated_features(X, threshold=0.95):
    """
    Remove highly correlated features using advanced correlation analysis
    """
    corr_matrix = X.corr().abs()
    upper_triangle = corr_matrix.where(
        np.triu(np.ones(corr_matrix.shape), k=1).astype(bool)
    )
    
    # Find features to drop
    to_drop = [column for column in upper_triangle.columns 
               if any(upper_triangle[column] > threshold)]
    
    X_reduced = X.drop(columns=to_drop)
    print(f"Removed {len(to_drop)} highly correlated features (threshold: {threshold})")
    print(f"Features reduced from {X.shape[1]} to {X_reduced.shape[1]}")
    
    return X_reduced, to_drop

# Apply advanced preprocessing
X_train_enhanced, X_test_enhanced, scaler = advanced_preprocessing(
    X_train, X_test, Y_train, method='robust'
)

# Remove highly correlated features
X_train_final, dropped_features = remove_highly_correlated_features(
    X_train_enhanced, threshold=0.95
)
X_test_final = X_test_enhanced.drop(columns=dropped_features)

print(f"Final dataset shape: Training {X_train_final.shape}, Test {X_test_final.shape}")

Applied robust scaling
Removed 31 highly correlated features (threshold: 0.95)
Features reduced from 196 to 165
Final dataset shape: Training (477, 165), Test (120, 165)


### Step 3: Advanced Feature Selection with Multiple Methods

In [6]:
class AdvancedFeatureSelector:
    """
    Comprehensive feature selection using multiple methods
    """
    
    def __init__(self, random_state=42):
        self.random_state = random_state
        self.selected_features = {}
        
    def mutual_information_selection(self, X, y, k='auto'):
        """Enhanced mutual information selection"""
        if k == 'auto':
            k = min(50, X.shape[1] // 2)  # Adaptive k selection
            
        mi_scores = mutual_info_classif(X, y, random_state=self.random_state)
        feature_scores = pd.Series(mi_scores, index=X.columns).sort_values(ascending=False)
        
        # Select top k features
        selected_features = feature_scores.head(k).index.tolist()
        self.selected_features['mutual_info'] = selected_features
        
        return selected_features, feature_scores
    
    def recursive_feature_elimination(self, X, y, estimator=None):
        """RFECV with cross-validation"""
        if estimator is None:
            estimator = RandomForestClassifier(n_estimators=100, random_state=self.random_state)
            
        rfecv = RFECV(
            estimator=estimator,
            step=1,
            cv=5,
            scoring='roc_auc',
            n_jobs=-1
        )
        
        rfecv.fit(X, y)
        selected_features = X.columns[rfecv.support_].tolist()
        self.selected_features['rfecv'] = selected_features
        
        return selected_features, rfecv
    
    def variance_threshold_selection(self, X, threshold=0.01):
        """Remove low variance features"""
        from sklearn.feature_selection import VarianceThreshold
        
        selector = VarianceThreshold(threshold=threshold)
        selector.fit(X)
        selected_features = X.columns[selector.get_support()].tolist()
        self.selected_features['variance'] = selected_features
        
        return selected_features, selector
    
    def statistical_selection(self, X, y, method='f_classif', k=50):
        """Statistical feature selection"""
        from sklearn.feature_selection import f_classif, chi2
        
        if method == 'f_classif':
            selector = SelectKBest(f_classif, k=k)
        elif method == 'chi2':
            # Ensure non-negative values for chi2
            X_positive = X - X.min() + 1e-5
            selector = SelectKBest(chi2, k=k)
            X = X_positive
            
        selector.fit(X, y)
        selected_features = X.columns[selector.get_support()].tolist()
        self.selected_features['statistical'] = selected_features
        
        return selected_features, selector
    
    def ensemble_selection(self, X, y, methods=['mutual_info', 'rfecv', 'statistical']):
        """Combine multiple selection methods"""
        all_selected = []
        
        if 'mutual_info' in methods:
            features, _ = self.mutual_information_selection(X, y)
            all_selected.extend(features)
            
        if 'rfecv' in methods:
            features, _ = self.recursive_feature_elimination(X, y)
            all_selected.extend(features)
            
        if 'statistical' in methods:
            features, _ = self.statistical_selection(X, y)
            all_selected.extend(features)
            
        # Count feature frequency
        feature_counts = pd.Series(all_selected).value_counts()
        
        # Select features that appear in at least 2 methods
        ensemble_features = feature_counts[feature_counts >= 2].index.tolist()
        self.selected_features['ensemble'] = ensemble_features
        
        return ensemble_features, feature_counts

# Apply advanced feature selection
feature_selector = AdvancedFeatureSelector(random_state=42)

# Run ensemble selection
ensemble_features, feature_counts = feature_selector.ensemble_selection(
    X_train_final, Y_train, methods=['mutual_info', 'rfecv', 'statistical']
)

print(f"Ensemble selection chose {len(ensemble_features)} features")
print(f"Top 10 most selected features:")
print(feature_counts.head(10))

# Create final feature set
X_train_selected = X_train_final[ensemble_features]
X_test_selected = X_test_final[ensemble_features]

print(f"Final feature set shape: {X_train_selected.shape}")

Ensemble selection chose 75 features
Top 10 most selected features:
EState_VSA9          3
EState_VSA1          3
NumAliphaticRings    3
PEOE_VSA6            3
EState_VSA4          3
PEOE_VSA9            3
SlogP_VSA10          3
fr_urea              3
EState_VSA10         3
fr_piperdine         3
Name: count, dtype: int64
Final feature set shape: (477, 75)


### Step 4: Advanced Model Development with Optuna Optimization

In [7]:
class OptunaModelOptimizer:
    """
    Advanced hyperparameter optimization using Optuna
    """
    
    def __init__(self, n_trials=100, cv_folds=5, random_state=42):
        self.n_trials = n_trials
        self.cv_folds = cv_folds
        self.random_state = random_state
        self.best_params = {}
        self.study_results = {}
    
    def objective_function(self, trial, model_type, X, y):
        """Unified objective function for all models"""
        
        if model_type == 'random_forest':
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 100, 500),
                'max_depth': trial.suggest_int('max_depth', 5, 30),
                'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
                'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
                'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', 0.5, 0.7]),
                'random_state': self.random_state,
                'n_jobs': -1
            }
            model = RandomForestClassifier(**params)
            
        elif model_type == 'xgboost':
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
                'max_depth': trial.suggest_int('max_depth', 3, 15),
                'subsample': trial.suggest_float('subsample', 0.6, 1.0),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
                'reg_alpha': trial.suggest_float('reg_alpha', 0, 10),
                'reg_lambda': trial.suggest_float('reg_lambda', 0, 10),
                'random_state': self.random_state,
                'eval_metric': 'logloss',
                'verbosity': 0
            }
            model = xgb.XGBClassifier(**params)
            
        elif model_type == 'lightgbm':
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
                'max_depth': trial.suggest_int('max_depth', 3, 15),
                'num_leaves': trial.suggest_int('num_leaves', 10, 300),
                'subsample': trial.suggest_float('subsample', 0.6, 1.0),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
                'reg_alpha': trial.suggest_float('reg_alpha', 0, 10),
                'reg_lambda': trial.suggest_float('reg_lambda', 0, 10),
                'random_state': self.random_state,
                'verbosity': -1,
                'force_col_wise': True
            }
            model = lgb.LGBMClassifier(**params)
            
        elif model_type == 'catboost':
            params = {
                'iterations': trial.suggest_int('iterations', 100, 1000),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
                'depth': trial.suggest_int('depth', 3, 10),
                'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
                'random_state': self.random_state,
                'verbose': False
            }
            model = cb.CatBoostClassifier(**params)
            
        elif model_type == 'balanced_rf':
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 100, 500),
                'max_depth': trial.suggest_int('max_depth', 5, 30),
                'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
                'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
                'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', 0.5, 0.7]),
                'random_state': self.random_state,
                'n_jobs': -1
            }
            model = BalancedRandomForestClassifier(**params)
        
        # Cross-validation
        cv = StratifiedKFold(n_splits=self.cv_folds, shuffle=True, random_state=self.random_state)
        scores = cross_validate(
            model, X, y, 
            cv=cv, 
            scoring=['roc_auc', 'accuracy', 'f1'],
            n_jobs=-1
        )
        
        # Return weighted score (prioritize AUC and F1)
        return 0.5 * scores['test_roc_auc'].mean() + 0.3 * scores['test_f1'].mean() + 0.2 * scores['test_accuracy'].mean()
    
    def optimize_model(self, model_type, X, y):
        """Optimize a specific model type"""
        print(f"Optimizing {model_type}...")
        
        study = optuna.create_study(
            direction='maximize',
            sampler=TPESampler(seed=self.random_state)
        )
        
        study.optimize(
            lambda trial: self.objective_function(trial, model_type, X, y),
            n_trials=self.n_trials,
            show_progress_bar=True
        )
        
        self.best_params[model_type] = study.best_params
        self.study_results[model_type] = study
        
        print(f"Best {model_type} score: {study.best_value:.4f}")
        print(f"Best {model_type} params: {study.best_params}")
        
        return study.best_params, study.best_value
    
    def optimize_all_models(self, X, y, models=['balanced_rf', 'xgboost', 'lightgbm', 'catboost']):
        """Optimize all specified models"""
        results = {}
        
        for model_type in models:
            params, score = self.optimize_model(model_type, X, y)
            results[model_type] = {'params': params, 'score': score}
            
        return results

    # Add this method to your OptunaModelOptimizer class
    def create_optimized_models(self, optimization_results):
        """Create models with optimized parameters"""
        models = {}
        
        for model_type, result in optimization_results.items():
            params = result['params']
            params['random_state'] = self.random_state
            
            if model_type == 'balanced_rf':
                params['n_jobs'] = -1
                models[model_type] = BalancedRandomForestClassifier(**params)
            elif model_type == 'xgboost':
                params['eval_metric'] = 'logloss'
                params['verbosity'] = 0
                models[model_type] = xgb.XGBClassifier(**params)
            elif model_type == 'lightgbm':
                params['verbosity'] = -1
                params['force_col_wise'] = True
                models[model_type] = lgb.LGBMClassifier(**params)
            elif model_type == 'catboost':
                params['verbose'] = False
                models[model_type] = cb.CatBoostClassifier(**params)
                
        return models

# Initialize optimizer
optimizer = OptunaModelOptimizer(n_trials=50, cv_folds=5)  # Reduce trials for demo

# Optimize models (this will take some time)
optimization_results = optimizer.optimize_all_models(
    X_train_selected, Y_train, 
    models=['balanced_rf', 'xgboost', 'lightgbm']  # Start with these three
)

print("\nOptimization completed!")
for model, result in optimization_results.items():
    print(f"{model}: Score = {result['score']:.4f}")

[I 2025-08-16 12:26:04,881] A new study created in memory with name: no-name-289189b4-cc02-4b85-9ab1-b877d1456417


Optimizing balanced_rf...


Best trial: 0. Best value: 0.75023:   2%|▏         | 1/50 [00:02<01:50,  2.25s/it]

[I 2025-08-16 12:26:07,134] Trial 0 finished with value: 0.7502298860850967 and parameters: {'n_estimators': 250, 'max_depth': 29, 'min_samples_split': 15, 'min_samples_leaf': 6, 'max_features': 0.7}. Best is trial 0 with value: 0.7502298860850967.


Best trial: 0. Best value: 0.75023:   4%|▍         | 2/50 [00:04<01:49,  2.27s/it]

[I 2025-08-16 12:26:09,420] Trial 1 finished with value: 0.7384036658407246 and parameters: {'n_estimators': 341, 'max_depth': 23, 'min_samples_split': 2, 'min_samples_leaf': 10, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.7502298860850967.


Best trial: 2. Best value: 0.766927:   6%|▌         | 3/50 [00:06<01:35,  2.03s/it]

[I 2025-08-16 12:26:11,153] Trial 2 finished with value: 0.76692705969684 and parameters: {'n_estimators': 222, 'max_depth': 18, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_features': 'sqrt'}. Best is trial 2 with value: 0.76692705969684.


Best trial: 2. Best value: 0.766927:   8%|▊         | 4/50 [00:07<01:14,  1.62s/it]

[I 2025-08-16 12:26:12,141] Trial 3 finished with value: 0.7652354240822895 and parameters: {'n_estimators': 282, 'max_depth': 25, 'min_samples_split': 5, 'min_samples_leaf': 6, 'max_features': 0.5}. Best is trial 2 with value: 0.76692705969684.


Best trial: 2. Best value: 0.766927:  10%|█         | 5/50 [00:07<00:53,  1.19s/it]

[I 2025-08-16 12:26:12,570] Trial 4 finished with value: 0.7350414165029564 and parameters: {'n_estimators': 126, 'max_depth': 29, 'min_samples_split': 20, 'min_samples_leaf': 9, 'max_features': 0.5}. Best is trial 2 with value: 0.76692705969684.


Best trial: 2. Best value: 0.766927:  12%|█▏        | 6/50 [00:08<00:40,  1.08it/s]

[I 2025-08-16 12:26:12,980] Trial 5 finished with value: 0.7263519250973733 and parameters: {'n_estimators': 148, 'max_depth': 17, 'min_samples_split': 2, 'min_samples_leaf': 10, 'max_features': 'log2'}. Best is trial 2 with value: 0.76692705969684.


Best trial: 2. Best value: 0.766927:  14%|█▍        | 7/50 [00:09<00:39,  1.09it/s]

[I 2025-08-16 12:26:13,895] Trial 6 finished with value: 0.739018353161122 and parameters: {'n_estimators': 319, 'max_depth': 9, 'min_samples_split': 20, 'min_samples_leaf': 8, 'max_features': 'sqrt'}. Best is trial 2 with value: 0.76692705969684.


Best trial: 7. Best value: 0.774319:  16%|█▌        | 8/50 [00:09<00:32,  1.29it/s]

[I 2025-08-16 12:26:14,354] Trial 7 finished with value: 0.7743185283753116 and parameters: {'n_estimators': 135, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 0.5}. Best is trial 7 with value: 0.7743185283753116.


Best trial: 7. Best value: 0.774319:  18%|█▊        | 9/50 [00:10<00:29,  1.40it/s]

[I 2025-08-16 12:26:14,939] Trial 8 finished with value: 0.7405151729906153 and parameters: {'n_estimators': 212, 'max_depth': 19, 'min_samples_split': 4, 'min_samples_leaf': 9, 'max_features': 'log2'}. Best is trial 7 with value: 0.7743185283753116.


Best trial: 7. Best value: 0.774319:  20%|██        | 10/50 [00:10<00:23,  1.68it/s]

[I 2025-08-16 12:26:15,266] Trial 9 finished with value: 0.7503825755846946 and parameters: {'n_estimators': 102, 'max_depth': 26, 'min_samples_split': 15, 'min_samples_leaf': 8, 'max_features': 'sqrt'}. Best is trial 7 with value: 0.7743185283753116.


Best trial: 7. Best value: 0.774319:  22%|██▏       | 11/50 [00:11<00:32,  1.18it/s]

[I 2025-08-16 12:26:16,676] Trial 10 finished with value: 0.7634282924523244 and parameters: {'n_estimators': 478, 'max_depth': 5, 'min_samples_split': 8, 'min_samples_leaf': 1, 'max_features': 0.5}. Best is trial 7 with value: 0.7743185283753116.


Best trial: 7. Best value: 0.774319:  24%|██▍       | 12/50 [00:12<00:30,  1.25it/s]

[I 2025-08-16 12:26:17,377] Trial 11 finished with value: 0.7621929488455865 and parameters: {'n_estimators': 190, 'max_depth': 13, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': 0.7}. Best is trial 7 with value: 0.7743185283753116.


Best trial: 7. Best value: 0.774319:  26%|██▌       | 13/50 [00:13<00:33,  1.09it/s]

[I 2025-08-16 12:26:18,556] Trial 12 finished with value: 0.7686063699045359 and parameters: {'n_estimators': 388, 'max_depth': 14, 'min_samples_split': 14, 'min_samples_leaf': 3, 'max_features': 'sqrt'}. Best is trial 7 with value: 0.7743185283753116.


Best trial: 7. Best value: 0.774319:  28%|██▊       | 14/50 [00:14<00:37,  1.03s/it]

[I 2025-08-16 12:26:19,853] Trial 13 finished with value: 0.7558602434322967 and parameters: {'n_estimators': 402, 'max_depth': 11, 'min_samples_split': 14, 'min_samples_leaf': 4, 'max_features': 0.5}. Best is trial 7 with value: 0.7743185283753116.


Best trial: 7. Best value: 0.774319:  30%|███       | 15/50 [00:16<00:36,  1.04s/it]

[I 2025-08-16 12:26:20,917] Trial 14 finished with value: 0.7556961863747541 and parameters: {'n_estimators': 391, 'max_depth': 6, 'min_samples_split': 13, 'min_samples_leaf': 4, 'max_features': 'sqrt'}. Best is trial 7 with value: 0.7743185283753116.


Best trial: 7. Best value: 0.774319:  32%|███▏      | 16/50 [00:17<00:39,  1.17s/it]

[I 2025-08-16 12:26:22,399] Trial 15 finished with value: 0.7563815029855725 and parameters: {'n_estimators': 494, 'max_depth': 14, 'min_samples_split': 17, 'min_samples_leaf': 5, 'max_features': 0.5}. Best is trial 7 with value: 0.7743185283753116.


Best trial: 16. Best value: 0.776812:  34%|███▍      | 17/50 [00:18<00:38,  1.16s/it]

[I 2025-08-16 12:26:23,520] Trial 16 finished with value: 0.7768124254765679 and parameters: {'n_estimators': 415, 'max_depth': 9, 'min_samples_split': 8, 'min_samples_leaf': 1, 'max_features': 'log2'}. Best is trial 16 with value: 0.7768124254765679.


Best trial: 16. Best value: 0.776812:  36%|███▌      | 18/50 [00:19<00:37,  1.16s/it]

[I 2025-08-16 12:26:24,701] Trial 17 finished with value: 0.7673638616509402 and parameters: {'n_estimators': 445, 'max_depth': 8, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_features': 'log2'}. Best is trial 16 with value: 0.7768124254765679.


Best trial: 16. Best value: 0.776812:  38%|███▊      | 19/50 [00:20<00:35,  1.14s/it]

[I 2025-08-16 12:26:25,769] Trial 18 finished with value: 0.7749433635257861 and parameters: {'n_estimators': 353, 'max_depth': 10, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'log2'}. Best is trial 16 with value: 0.7768124254765679.


Best trial: 16. Best value: 0.776812:  40%|████      | 20/50 [00:21<00:32,  1.09s/it]

[I 2025-08-16 12:26:26,760] Trial 19 finished with value: 0.7684753265231947 and parameters: {'n_estimators': 364, 'max_depth': 7, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'log2'}. Best is trial 16 with value: 0.7768124254765679.


Best trial: 16. Best value: 0.776812:  42%|████▏     | 21/50 [00:23<00:32,  1.11s/it]

[I 2025-08-16 12:26:27,923] Trial 20 finished with value: 0.7744427287180697 and parameters: {'n_estimators': 429, 'max_depth': 12, 'min_samples_split': 9, 'min_samples_leaf': 1, 'max_features': 'log2'}. Best is trial 16 with value: 0.7768124254765679.


Best trial: 21. Best value: 0.778039:  44%|████▍     | 22/50 [00:24<00:31,  1.13s/it]

[I 2025-08-16 12:26:29,084] Trial 21 finished with value: 0.778039015864265 and parameters: {'n_estimators': 435, 'max_depth': 12, 'min_samples_split': 9, 'min_samples_leaf': 1, 'max_features': 'log2'}. Best is trial 21 with value: 0.778039015864265.


Best trial: 22. Best value: 0.77957:  46%|████▌     | 23/50 [00:25<00:30,  1.14s/it] 

[I 2025-08-16 12:26:30,258] Trial 22 finished with value: 0.7795699308614442 and parameters: {'n_estimators': 440, 'max_depth': 16, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'log2'}. Best is trial 22 with value: 0.7795699308614442.


Best trial: 22. Best value: 0.77957:  48%|████▊     | 24/50 [00:26<00:30,  1.15s/it]

[I 2025-08-16 12:26:31,442] Trial 23 finished with value: 0.7691053450752098 and parameters: {'n_estimators': 455, 'max_depth': 16, 'min_samples_split': 12, 'min_samples_leaf': 1, 'max_features': 'log2'}. Best is trial 22 with value: 0.7795699308614442.


Best trial: 22. Best value: 0.77957:  50%|█████     | 25/50 [00:27<00:28,  1.15s/it]

[I 2025-08-16 12:26:32,580] Trial 24 finished with value: 0.7711291120217623 and parameters: {'n_estimators': 423, 'max_depth': 20, 'min_samples_split': 4, 'min_samples_leaf': 3, 'max_features': 'log2'}. Best is trial 22 with value: 0.7795699308614442.


Best trial: 22. Best value: 0.77957:  52%|█████▏    | 26/50 [00:28<00:28,  1.18s/it]

[I 2025-08-16 12:26:33,836] Trial 25 finished with value: 0.76608268113327 and parameters: {'n_estimators': 461, 'max_depth': 16, 'min_samples_split': 11, 'min_samples_leaf': 2, 'max_features': 'log2'}. Best is trial 22 with value: 0.7795699308614442.


Best trial: 22. Best value: 0.77957:  54%|█████▍    | 27/50 [00:30<00:26,  1.16s/it]

[I 2025-08-16 12:26:34,958] Trial 26 finished with value: 0.7745653206359948 and parameters: {'n_estimators': 414, 'max_depth': 22, 'min_samples_split': 9, 'min_samples_leaf': 1, 'max_features': 'log2'}. Best is trial 22 with value: 0.7795699308614442.


Best trial: 22. Best value: 0.77957:  56%|█████▌    | 28/50 [00:31<00:28,  1.30s/it]

[I 2025-08-16 12:26:36,589] Trial 27 finished with value: 0.7750589515794063 and parameters: {'n_estimators': 494, 'max_depth': 15, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 0.7}. Best is trial 22 with value: 0.7795699308614442.


Best trial: 22. Best value: 0.77957:  58%|█████▊    | 29/50 [00:32<00:24,  1.15s/it]

[I 2025-08-16 12:26:37,395] Trial 28 finished with value: 0.7766322163073607 and parameters: {'n_estimators': 293, 'max_depth': 12, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 'log2'}. Best is trial 22 with value: 0.7795699308614442.


Best trial: 22. Best value: 0.77957:  60%|██████    | 30/50 [00:33<00:23,  1.16s/it]

[I 2025-08-16 12:26:38,580] Trial 29 finished with value: 0.7628226125768813 and parameters: {'n_estimators': 371, 'max_depth': 8, 'min_samples_split': 4, 'min_samples_leaf': 5, 'max_features': 0.7}. Best is trial 22 with value: 0.7795699308614442.


Best trial: 22. Best value: 0.77957:  62%|██████▏   | 31/50 [00:34<00:20,  1.09s/it]

[I 2025-08-16 12:26:39,508] Trial 30 finished with value: 0.7323013307446489 and parameters: {'n_estimators': 337, 'max_depth': 20, 'min_samples_split': 11, 'min_samples_leaf': 7, 'max_features': 'log2'}. Best is trial 22 with value: 0.7795699308614442.


Best trial: 31. Best value: 0.780525:  64%|██████▍   | 32/50 [00:35<00:17,  1.00it/s]

[I 2025-08-16 12:26:40,281] Trial 31 finished with value: 0.7805254174156367 and parameters: {'n_estimators': 281, 'max_depth': 12, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 'log2'}. Best is trial 31 with value: 0.7805254174156367.


Best trial: 32. Best value: 0.780674:  66%|██████▌   | 33/50 [00:36<00:15,  1.10it/s]

[I 2025-08-16 12:26:40,992] Trial 32 finished with value: 0.780674383582999 and parameters: {'n_estimators': 257, 'max_depth': 12, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'log2'}. Best is trial 32 with value: 0.780674383582999.


Best trial: 32. Best value: 0.780674:  68%|██████▊   | 34/50 [00:36<00:13,  1.16it/s]

[I 2025-08-16 12:26:41,732] Trial 33 finished with value: 0.7773126237891121 and parameters: {'n_estimators': 261, 'max_depth': 13, 'min_samples_split': 6, 'min_samples_leaf': 3, 'max_features': 'log2'}. Best is trial 32 with value: 0.780674383582999.


Best trial: 32. Best value: 0.780674:  70%|███████   | 35/50 [00:37<00:12,  1.23it/s]

[I 2025-08-16 12:26:42,443] Trial 34 finished with value: 0.7747078497472331 and parameters: {'n_estimators': 254, 'max_depth': 15, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': 'log2'}. Best is trial 32 with value: 0.780674383582999.


Best trial: 32. Best value: 0.780674:  72%|███████▏  | 36/50 [00:38<00:11,  1.21it/s]

[I 2025-08-16 12:26:43,301] Trial 35 finished with value: 0.7633940294763717 and parameters: {'n_estimators': 316, 'max_depth': 18, 'min_samples_split': 9, 'min_samples_leaf': 4, 'max_features': 'log2'}. Best is trial 32 with value: 0.780674383582999.


Best trial: 32. Best value: 0.780674:  74%|███████▍  | 37/50 [00:39<00:10,  1.29it/s]

[I 2025-08-16 12:26:43,953] Trial 36 finished with value: 0.765097676160494 and parameters: {'n_estimators': 224, 'max_depth': 11, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'log2'}. Best is trial 32 with value: 0.780674383582999.


Best trial: 32. Best value: 0.780674:  76%|███████▌  | 38/50 [00:39<00:09,  1.22it/s]

[I 2025-08-16 12:26:44,867] Trial 37 finished with value: 0.7520766933558178 and parameters: {'n_estimators': 271, 'max_depth': 17, 'min_samples_split': 7, 'min_samples_leaf': 6, 'max_features': 0.7}. Best is trial 32 with value: 0.780674383582999.


Best trial: 38. Best value: 0.783201:  78%|███████▊  | 39/50 [00:40<00:08,  1.36it/s]

[I 2025-08-16 12:26:45,413] Trial 38 finished with value: 0.7832011776653058 and parameters: {'n_estimators': 186, 'max_depth': 13, 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_features': 'log2'}. Best is trial 38 with value: 0.7832011776653058.


Best trial: 38. Best value: 0.783201:  80%|████████  | 40/50 [00:41<00:06,  1.49it/s]

[I 2025-08-16 12:26:45,931] Trial 39 finished with value: 0.7724004690430929 and parameters: {'n_estimators': 172, 'max_depth': 14, 'min_samples_split': 3, 'min_samples_leaf': 3, 'max_features': 'log2'}. Best is trial 38 with value: 0.7832011776653058.


Best trial: 38. Best value: 0.783201:  82%|████████▏ | 41/50 [00:41<00:05,  1.51it/s]

[I 2025-08-16 12:26:46,573] Trial 40 finished with value: 0.7575449302763726 and parameters: {'n_estimators': 229, 'max_depth': 30, 'min_samples_split': 3, 'min_samples_leaf': 5, 'max_features': 'log2'}. Best is trial 38 with value: 0.7832011776653058.


Best trial: 38. Best value: 0.783201:  84%|████████▍ | 42/50 [00:42<00:05,  1.39it/s]

[I 2025-08-16 12:26:47,420] Trial 41 finished with value: 0.7747327511778404 and parameters: {'n_estimators': 308, 'max_depth': 12, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'log2'}. Best is trial 38 with value: 0.7832011776653058.


Best trial: 38. Best value: 0.783201:  86%|████████▌ | 43/50 [00:43<00:04,  1.52it/s]

[I 2025-08-16 12:26:47,937] Trial 42 finished with value: 0.7759421713032063 and parameters: {'n_estimators': 169, 'max_depth': 10, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'log2'}. Best is trial 38 with value: 0.7832011776653058.


Best trial: 38. Best value: 0.783201:  88%|████████▊ | 44/50 [00:43<00:03,  1.52it/s]

[I 2025-08-16 12:26:48,597] Trial 43 finished with value: 0.7701059838221496 and parameters: {'n_estimators': 237, 'max_depth': 16, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_features': 'log2'}. Best is trial 38 with value: 0.7832011776653058.


Best trial: 38. Best value: 0.783201:  90%|█████████ | 45/50 [00:44<00:03,  1.42it/s]

[I 2025-08-16 12:26:49,405] Trial 44 finished with value: 0.7797052704743667 and parameters: {'n_estimators': 284, 'max_depth': 13, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2'}. Best is trial 38 with value: 0.7832011776653058.


Best trial: 38. Best value: 0.783201:  92%|█████████▏| 46/50 [00:45<00:02,  1.49it/s]

[I 2025-08-16 12:26:50,005] Trial 45 finished with value: 0.765062881550259 and parameters: {'n_estimators': 202, 'max_depth': 13, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'sqrt'}. Best is trial 38 with value: 0.7832011776653058.


Best trial: 38. Best value: 0.783201:  94%|█████████▍| 47/50 [00:45<00:02,  1.41it/s]

[I 2025-08-16 12:26:50,792] Trial 46 finished with value: 0.7634711560782712 and parameters: {'n_estimators': 284, 'max_depth': 18, 'min_samples_split': 3, 'min_samples_leaf': 4, 'max_features': 'log2'}. Best is trial 38 with value: 0.7832011776653058.


Best trial: 38. Best value: 0.783201:  96%|█████████▌| 48/50 [00:46<00:01,  1.23it/s]

[I 2025-08-16 12:26:51,856] Trial 47 finished with value: 0.7584914729302871 and parameters: {'n_estimators': 336, 'max_depth': 14, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 0.5}. Best is trial 38 with value: 0.7832011776653058.


Best trial: 38. Best value: 0.783201:  98%|█████████▊| 49/50 [00:47<00:00,  1.28it/s]

[I 2025-08-16 12:26:52,557] Trial 48 finished with value: 0.7739266993694363 and parameters: {'n_estimators': 242, 'max_depth': 15, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'sqrt'}. Best is trial 38 with value: 0.7832011776653058.


Best trial: 38. Best value: 0.783201: 100%|██████████| 50/50 [00:48<00:00,  1.03it/s]
[I 2025-08-16 12:26:53,496] A new study created in memory with name: no-name-c1943cf4-713c-480f-acdd-d08b5735066f


[I 2025-08-16 12:26:53,492] Trial 49 finished with value: 0.7682934404780236 and parameters: {'n_estimators': 274, 'max_depth': 11, 'min_samples_split': 3, 'min_samples_leaf': 3, 'max_features': 0.7}. Best is trial 38 with value: 0.7832011776653058.
Best balanced_rf score: 0.7832
Best balanced_rf params: {'n_estimators': 186, 'max_depth': 13, 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_features': 'log2'}
Optimizing xgboost...


Best trial: 0. Best value: 0.735858:   2%|▏         | 1/50 [00:00<00:24,  1.98it/s]

[I 2025-08-16 12:26:54,001] Trial 0 finished with value: 0.7358583393672348 and parameters: {'n_estimators': 437, 'learning_rate': 0.28570714885887566, 'max_depth': 12, 'subsample': 0.8394633936788146, 'colsample_bytree': 0.6624074561769746, 'reg_alpha': 1.5599452033620265, 'reg_lambda': 0.5808361216819946}. Best is trial 0 with value: 0.7358583393672348.


Best trial: 0. Best value: 0.735858:   4%|▍         | 2/50 [00:01<00:24,  1.96it/s]

[I 2025-08-16 12:26:54,515] Trial 1 finished with value: 0.6912435896388998 and parameters: {'n_estimators': 880, 'learning_rate': 0.18432335340553055, 'max_depth': 12, 'subsample': 0.608233797718321, 'colsample_bytree': 0.9879639408647978, 'reg_alpha': 8.324426408004218, 'reg_lambda': 2.1233911067827616}. Best is trial 0 with value: 0.7358583393672348.


Best trial: 2. Best value: 0.743138:   6%|▌         | 3/50 [00:01<00:24,  1.94it/s]

[I 2025-08-16 12:26:55,034] Trial 2 finished with value: 0.7431380034422541 and parameters: {'n_estimators': 263, 'learning_rate': 0.06318730785749581, 'max_depth': 6, 'subsample': 0.8099025726528951, 'colsample_bytree': 0.7727780074568463, 'reg_alpha': 2.9122914019804194, 'reg_lambda': 6.118528947223795}. Best is trial 2 with value: 0.7431380034422541.


Best trial: 3. Best value: 0.757272:   8%|▊         | 4/50 [00:02<00:23,  1.98it/s]

[I 2025-08-16 12:26:55,527] Trial 3 finished with value: 0.7572717961704245 and parameters: {'n_estimators': 225, 'learning_rate': 0.09472194807521325, 'max_depth': 7, 'subsample': 0.7824279936868144, 'colsample_bytree': 0.9140703845572055, 'reg_alpha': 1.9967378215835974, 'reg_lambda': 5.142344384136116}. Best is trial 3 with value: 0.7572717961704245.


Best trial: 3. Best value: 0.757272:  10%|█         | 5/50 [00:02<00:23,  1.95it/s]

[I 2025-08-16 12:26:56,054] Trial 4 finished with value: 0.6155139937447738 and parameters: {'n_estimators': 633, 'learning_rate': 0.02347061968879934, 'max_depth': 10, 'subsample': 0.6682096494749166, 'colsample_bytree': 0.6260206371941118, 'reg_alpha': 9.488855372533333, 'reg_lambda': 9.656320330745594}. Best is trial 3 with value: 0.7572717961704245.


Best trial: 5. Best value: 0.76126:  14%|█▍        | 7/50 [00:03<00:18,  2.36it/s] 

[I 2025-08-16 12:26:56,680] Trial 5 finished with value: 0.7612601056841692 and parameters: {'n_estimators': 828, 'learning_rate': 0.09833799306027749, 'max_depth': 4, 'subsample': 0.8736932106048627, 'colsample_bytree': 0.7760609974958406, 'reg_alpha': 1.2203823484477883, 'reg_lambda': 4.951769101112702}. Best is trial 5 with value: 0.7612601056841692.
[I 2025-08-16 12:26:56,842] Trial 6 finished with value: 0.7251151304698815 and parameters: {'n_estimators': 130, 'learning_rate': 0.2737029166028468, 'max_depth': 6, 'subsample': 0.8650089137415928, 'colsample_bytree': 0.7246844304357644, 'reg_alpha': 5.200680211778108, 'reg_lambda': 5.4671027934327965}. Best is trial 5 with value: 0.7612601056841692.


Best trial: 5. Best value: 0.76126:  16%|█▌        | 8/50 [00:03<00:15,  2.77it/s]

[I 2025-08-16 12:26:57,066] Trial 7 finished with value: 0.7062324041712195 and parameters: {'n_estimators': 266, 'learning_rate': 0.291179542051722, 'max_depth': 13, 'subsample': 0.9757995766256756, 'colsample_bytree': 0.9579309401710595, 'reg_alpha': 5.978999788110851, 'reg_lambda': 9.218742350231167}. Best is trial 5 with value: 0.7612601056841692.


Best trial: 5. Best value: 0.76126:  18%|█▊        | 9/50 [00:03<00:12,  3.17it/s]

[I 2025-08-16 12:26:57,280] Trial 8 finished with value: 0.7025060207282264 and parameters: {'n_estimators': 179, 'learning_rate': 0.0668350301015521, 'max_depth': 3, 'subsample': 0.7301321323053057, 'colsample_bytree': 0.7554709158757928, 'reg_alpha': 2.713490317738959, 'reg_lambda': 8.287375091519294}. Best is trial 5 with value: 0.7612601056841692.


Best trial: 5. Best value: 0.76126:  20%|██        | 10/50 [00:04<00:17,  2.32it/s]

[I 2025-08-16 12:26:57,972] Trial 9 finished with value: 0.7545479819328926 and parameters: {'n_estimators': 421, 'learning_rate': 0.09147100780934041, 'max_depth': 10, 'subsample': 0.6563696899899051, 'colsample_bytree': 0.9208787923016158, 'reg_alpha': 0.7455064367977082, 'reg_lambda': 9.868869366005173}. Best is trial 5 with value: 0.7612601056841692.


Best trial: 5. Best value: 0.76126:  22%|██▏       | 11/50 [00:04<00:17,  2.29it/s]

[I 2025-08-16 12:26:58,422] Trial 10 finished with value: 0.7366306148769917 and parameters: {'n_estimators': 979, 'learning_rate': 0.17206676049466557, 'max_depth': 3, 'subsample': 0.9481974559098771, 'colsample_bytree': 0.8560354009870226, 'reg_alpha': 4.0642885991626425, 'reg_lambda': 3.310460817165841}. Best is trial 5 with value: 0.7612601056841692.


Best trial: 5. Best value: 0.76126:  24%|██▍       | 12/50 [00:05<00:20,  1.86it/s]

[I 2025-08-16 12:26:59,191] Trial 11 finished with value: 0.7567572242395405 and parameters: {'n_estimators': 712, 'learning_rate': 0.13787127825090473, 'max_depth': 6, 'subsample': 0.9029275796221661, 'colsample_bytree': 0.8560597236540552, 'reg_alpha': 0.26664184118260326, 'reg_lambda': 6.857389579575569}. Best is trial 5 with value: 0.7612601056841692.


Best trial: 5. Best value: 0.76126:  26%|██▌       | 13/50 [00:06<00:21,  1.70it/s]

[I 2025-08-16 12:26:59,895] Trial 12 finished with value: 0.7526291337527136 and parameters: {'n_estimators': 777, 'learning_rate': 0.11700946572580845, 'max_depth': 8, 'subsample': 0.7643900954643358, 'colsample_bytree': 0.8528862413275126, 'reg_alpha': 2.14371635598811, 'reg_lambda': 4.11118282907679}. Best is trial 5 with value: 0.7612601056841692.


Best trial: 5. Best value: 0.76126:  28%|██▊       | 14/50 [00:06<00:18,  1.90it/s]

[I 2025-08-16 12:27:00,274] Trial 13 finished with value: 0.752552495833203 and parameters: {'n_estimators': 516, 'learning_rate': 0.2122458566313215, 'max_depth': 5, 'subsample': 0.7558679767551296, 'colsample_bytree': 0.9074759504187729, 'reg_alpha': 3.679874119364419, 'reg_lambda': 7.119778022134117}. Best is trial 5 with value: 0.7612601056841692.


Best trial: 5. Best value: 0.76126:  30%|███       | 15/50 [00:07<00:24,  1.41it/s]

[I 2025-08-16 12:27:01,410] Trial 14 finished with value: 0.6809059883779878 and parameters: {'n_estimators': 845, 'learning_rate': 0.011538234347225965, 'max_depth': 15, 'subsample': 0.9041411621197417, 'colsample_bytree': 0.6978753170579768, 'reg_alpha': 6.634064286119825, 'reg_lambda': 4.372871826201566}. Best is trial 5 with value: 0.7612601056841692.


Best trial: 15. Best value: 0.763992:  32%|███▏      | 16/50 [00:08<00:25,  1.34it/s]

[I 2025-08-16 12:27:02,243] Trial 15 finished with value: 0.7639923449128374 and parameters: {'n_estimators': 998, 'learning_rate': 0.10634943216578897, 'max_depth': 8, 'subsample': 0.7993161330686468, 'colsample_bytree': 0.8074717850017038, 'reg_alpha': 0.9963266324999739, 'reg_lambda': 2.5723828563595044}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  34%|███▍      | 17/50 [00:09<00:22,  1.46it/s]

[I 2025-08-16 12:27:02,783] Trial 16 finished with value: 0.7421290128377216 and parameters: {'n_estimators': 985, 'learning_rate': 0.23048746014516241, 'max_depth': 4, 'subsample': 0.8715861879790445, 'colsample_bytree': 0.8115867305511093, 'reg_alpha': 0.03383135439595697, 'reg_lambda': 2.271860634017754}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  36%|███▌      | 18/50 [00:09<00:21,  1.51it/s]

[I 2025-08-16 12:27:03,389] Trial 17 finished with value: 0.7580683912612615 and parameters: {'n_estimators': 888, 'learning_rate': 0.13178906312768046, 'max_depth': 9, 'subsample': 0.7144083225862191, 'colsample_bytree': 0.8136510152040614, 'reg_alpha': 1.135215342684284, 'reg_lambda': 0.20913566483403834}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  38%|███▊      | 19/50 [00:10<00:20,  1.49it/s]

[I 2025-08-16 12:27:04,088] Trial 18 finished with value: 0.7358112037223455 and parameters: {'n_estimators': 720, 'learning_rate': 0.048723742174163434, 'max_depth': 8, 'subsample': 0.8190649625879555, 'colsample_bytree': 0.7242757678791987, 'reg_alpha': 3.8876919478638627, 'reg_lambda': 2.5814933494405357}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  40%|████      | 20/50 [00:11<00:18,  1.60it/s]

[I 2025-08-16 12:27:04,598] Trial 19 finished with value: 0.7604415206707164 and parameters: {'n_estimators': 992, 'learning_rate': 0.1575446181570262, 'max_depth': 4, 'subsample': 0.9181943275564509, 'colsample_bytree': 0.7794206098246804, 'reg_alpha': 1.2093484591805055, 'reg_lambda': 1.3389557464773583}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  42%|████▏     | 21/50 [00:11<00:15,  1.84it/s]

[I 2025-08-16 12:27:04,956] Trial 20 finished with value: 0.6506318592046307 and parameters: {'n_estimators': 616, 'learning_rate': 0.0989369598357031, 'max_depth': 10, 'subsample': 0.9860930071516779, 'colsample_bytree': 0.6733221423256304, 'reg_alpha': 7.425758344526891, 'reg_lambda': 3.407546981441344}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  44%|████▍     | 22/50 [00:11<00:14,  1.92it/s]

[I 2025-08-16 12:27:05,427] Trial 21 finished with value: 0.7545088492388968 and parameters: {'n_estimators': 993, 'learning_rate': 0.16152824071241334, 'max_depth': 4, 'subsample': 0.9329457014880672, 'colsample_bytree': 0.775808712907188, 'reg_alpha': 1.331123357861958, 'reg_lambda': 1.0867695640214654}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  46%|████▌     | 23/50 [00:12<00:13,  1.97it/s]

[I 2025-08-16 12:27:05,899] Trial 22 finished with value: 0.7462121694218699 and parameters: {'n_estimators': 899, 'learning_rate': 0.1932583709209336, 'max_depth': 5, 'subsample': 0.8530521571049274, 'colsample_bytree': 0.8128562553209228, 'reg_alpha': 2.8556812623855627, 'reg_lambda': 1.520950925905088}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  48%|████▊     | 24/50 [00:12<00:13,  1.91it/s]

[I 2025-08-16 12:27:06,461] Trial 23 finished with value: 0.7575816918839249 and parameters: {'n_estimators': 808, 'learning_rate': 0.14430442272567495, 'max_depth': 4, 'subsample': 0.8992298801267097, 'colsample_bytree': 0.7403818969980785, 'reg_alpha': 0.6793651525075655, 'reg_lambda': 3.263004743773977}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  50%|█████     | 25/50 [00:13<00:13,  1.85it/s]

[I 2025-08-16 12:27:07,046] Trial 24 finished with value: 0.7553624165702839 and parameters: {'n_estimators': 934, 'learning_rate': 0.1175847160011874, 'max_depth': 7, 'subsample': 0.9352277149996444, 'colsample_bytree': 0.7908802065171119, 'reg_alpha': 2.1068334648370226, 'reg_lambda': 1.5186203585760405}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  52%|█████▏    | 26/50 [00:13<00:12,  1.96it/s]

[I 2025-08-16 12:27:07,483] Trial 25 finished with value: 0.7330923668521243 and parameters: {'n_estimators': 757, 'learning_rate': 0.07865550044982511, 'max_depth': 3, 'subsample': 0.8189101515622529, 'colsample_bytree': 0.8545664168599634, 'reg_alpha': 5.0439375869663055, 'reg_lambda': 4.28419308496827}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  54%|█████▍    | 27/50 [00:14<00:11,  1.96it/s]

[I 2025-08-16 12:27:07,994] Trial 26 finished with value: 0.7400547425182507 and parameters: {'n_estimators': 927, 'learning_rate': 0.11607374008897865, 'max_depth': 5, 'subsample': 0.8871975834497376, 'colsample_bytree': 0.8266609065920151, 'reg_alpha': 3.538912065468056, 'reg_lambda': 2.5022106344777533}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  56%|█████▌    | 28/50 [00:15<00:13,  1.57it/s]

[I 2025-08-16 12:27:08,922] Trial 27 finished with value: 0.7607164929826503 and parameters: {'n_estimators': 842, 'learning_rate': 0.041679070123142564, 'max_depth': 7, 'subsample': 0.8395760274820593, 'colsample_bytree': 0.8900646198797657, 'reg_alpha': 0.065998300060648, 'reg_lambda': 0.9583761144631582}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  58%|█████▊    | 29/50 [00:16<00:13,  1.51it/s]

[I 2025-08-16 12:27:09,653] Trial 28 finished with value: 0.7576486489587373 and parameters: {'n_estimators': 668, 'learning_rate': 0.044864639447712976, 'max_depth': 8, 'subsample': 0.788343975597808, 'colsample_bytree': 0.8883560052980131, 'reg_alpha': 0.1447753899452281, 'reg_lambda': 0.1490781700668522}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  60%|██████    | 30/50 [00:17<00:14,  1.38it/s]

[I 2025-08-16 12:27:10,520] Trial 29 finished with value: 0.7469678837954554 and parameters: {'n_estimators': 539, 'learning_rate': 0.0387882252096551, 'max_depth': 11, 'subsample': 0.8496459634771579, 'colsample_bytree': 0.8809079046395857, 'reg_alpha': 2.052717795262336, 'reg_lambda': 0.7710110400691172}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  62%|██████▏   | 31/50 [00:17<00:14,  1.33it/s]

[I 2025-08-16 12:27:11,325] Trial 30 finished with value: 0.7499342404774991 and parameters: {'n_estimators': 424, 'learning_rate': 0.07592098824880371, 'max_depth': 9, 'subsample': 0.8358425908003372, 'colsample_bytree': 0.9440163417454057, 'reg_alpha': 1.6398455109965577, 'reg_lambda': 6.248795206786371}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  64%|██████▍   | 32/50 [00:18<00:12,  1.44it/s]

[I 2025-08-16 12:27:11,890] Trial 31 finished with value: 0.7582720242351995 and parameters: {'n_estimators': 832, 'learning_rate': 0.15061050048102034, 'max_depth': 7, 'subsample': 0.9243717279765166, 'colsample_bytree': 0.7647936062963865, 'reg_alpha': 0.8858255356452741, 'reg_lambda': 1.459067624523573}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  66%|██████▌   | 33/50 [00:19<00:11,  1.47it/s]

[I 2025-08-16 12:27:12,535] Trial 32 finished with value: 0.7508624966433648 and parameters: {'n_estimators': 935, 'learning_rate': 0.10511180386922273, 'max_depth': 5, 'subsample': 0.8331085837562113, 'colsample_bytree': 0.7873844403262044, 'reg_alpha': 1.49720606495635, 'reg_lambda': 1.9726077439149228}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  68%|██████▊   | 34/50 [00:19<00:11,  1.35it/s]

[I 2025-08-16 12:27:13,421] Trial 33 finished with value: 0.750966828797097 and parameters: {'n_estimators': 862, 'learning_rate': 0.060184415436309674, 'max_depth': 6, 'subsample': 0.8775379941655105, 'colsample_bytree': 0.9786167796977798, 'reg_alpha': 0.6287328141902806, 'reg_lambda': 0.8542342663660235}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  70%|███████   | 35/50 [00:20<00:10,  1.48it/s]

[I 2025-08-16 12:27:13,942] Trial 34 finished with value: 0.7536203038162164 and parameters: {'n_estimators': 998, 'learning_rate': 0.18736919071150548, 'max_depth': 7, 'subsample': 0.966437870069295, 'colsample_bytree': 0.8328724556471565, 'reg_alpha': 2.6825996583652976, 'reg_lambda': 3.0667075934231827}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  72%|███████▏  | 36/50 [00:21<00:10,  1.38it/s]

[I 2025-08-16 12:27:14,774] Trial 35 finished with value: 0.7539004014960515 and parameters: {'n_estimators': 794, 'learning_rate': 0.028765911503455678, 'max_depth': 4, 'subsample': 0.7906601097441224, 'colsample_bytree': 0.7082214910645133, 'reg_alpha': 1.1763765189674493, 'reg_lambda': 3.8925182673577168}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  74%|███████▍  | 37/50 [00:22<00:09,  1.33it/s]

[I 2025-08-16 12:27:15,604] Trial 36 finished with value: 0.7562734551268302 and parameters: {'n_estimators': 932, 'learning_rate': 0.08338584451441469, 'max_depth': 6, 'subsample': 0.80540630933865, 'colsample_bytree': 0.6195196661673542, 'reg_alpha': 1.6135775117517401, 'reg_lambda': 4.982269474224089}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  76%|███████▌  | 38/50 [00:22<00:07,  1.57it/s]

[I 2025-08-16 12:27:15,962] Trial 37 finished with value: 0.6732111247225678 and parameters: {'n_estimators': 870, 'learning_rate': 0.25549980502299047, 'max_depth': 9, 'subsample': 0.7541948801616104, 'colsample_bytree': 0.6584846570294197, 'reg_alpha': 9.982835861660035, 'reg_lambda': 1.9051211464508355}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  78%|███████▊  | 39/50 [00:22<00:06,  1.77it/s]

[I 2025-08-16 12:27:16,360] Trial 38 finished with value: 0.6778876753576502 and parameters: {'n_estimators': 742, 'learning_rate': 0.12457176240742585, 'max_depth': 12, 'subsample': 0.9160938280039049, 'colsample_bytree': 0.7486555700663549, 'reg_alpha': 8.31212283300736, 'reg_lambda': 2.749228228050774}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  80%|████████  | 40/50 [00:23<00:04,  2.12it/s]

[I 2025-08-16 12:27:16,616] Trial 39 finished with value: 0.7201980367063199 and parameters: {'n_estimators': 482, 'learning_rate': 0.1667015368349067, 'max_depth': 3, 'subsample': 0.9998439652050216, 'colsample_bytree': 0.8840231465372183, 'reg_alpha': 3.3093398906058042, 'reg_lambda': 4.873573294679303}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  82%|████████▏ | 41/50 [00:24<00:05,  1.55it/s]

[I 2025-08-16 12:27:17,668] Trial 40 finished with value: 0.7573243711300961 and parameters: {'n_estimators': 629, 'learning_rate': 0.05807501221433218, 'max_depth': 8, 'subsample': 0.9595809628558825, 'colsample_bytree': 0.7817223110110744, 'reg_alpha': 0.41072638519900795, 'reg_lambda': 5.8706238447490255}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  84%|████████▍ | 42/50 [00:24<00:05,  1.59it/s]

[I 2025-08-16 12:27:18,254] Trial 41 finished with value: 0.7474137167765762 and parameters: {'n_estimators': 823, 'learning_rate': 0.14541052987867747, 'max_depth': 7, 'subsample': 0.9272796021046285, 'colsample_bytree': 0.7680853959990371, 'reg_alpha': 0.8618834333481645, 'reg_lambda': 1.4336339028157568}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  86%|████████▌ | 43/50 [00:25<00:04,  1.68it/s]

[I 2025-08-16 12:27:18,779] Trial 42 finished with value: 0.7564517183222363 and parameters: {'n_estimators': 961, 'learning_rate': 0.15632207567457584, 'max_depth': 6, 'subsample': 0.86320953619154, 'colsample_bytree': 0.7602976612807691, 'reg_alpha': 2.323508922276618, 'reg_lambda': 0.5275600840314185}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 15. Best value: 0.763992:  88%|████████▊ | 44/50 [00:25<00:03,  1.89it/s]

[I 2025-08-16 12:27:19,150] Trial 43 finished with value: 0.7357820477047275 and parameters: {'n_estimators': 670, 'learning_rate': 0.19927733988172666, 'max_depth': 7, 'subsample': 0.8897293258616683, 'colsample_bytree': 0.7943729120219895, 'reg_alpha': 4.357172896644085, 'reg_lambda': 1.351360925956321}. Best is trial 15 with value: 0.7639923449128374.


Best trial: 44. Best value: 0.769539:  90%|█████████ | 45/50 [00:26<00:02,  1.77it/s]

[I 2025-08-16 12:27:19,796] Trial 44 finished with value: 0.7695386921814811 and parameters: {'n_estimators': 843, 'learning_rate': 0.10312067087966213, 'max_depth': 5, 'subsample': 0.8325482926975885, 'colsample_bytree': 0.7261669818136228, 'reg_alpha': 0.9782901073346899, 'reg_lambda': 1.9942914029552579}. Best is trial 44 with value: 0.7695386921814811.


Best trial: 44. Best value: 0.769539:  92%|█████████▏| 46/50 [00:26<00:02,  1.69it/s]

[I 2025-08-16 12:27:20,456] Trial 45 finished with value: 0.7679626834125355 and parameters: {'n_estimators': 890, 'learning_rate': 0.08405376612202681, 'max_depth': 5, 'subsample': 0.834532959034714, 'colsample_bytree': 0.7313783442060275, 'reg_alpha': 1.616455442676839, 'reg_lambda': 2.020432221795278}. Best is trial 44 with value: 0.7695386921814811.


Best trial: 44. Best value: 0.769539:  94%|█████████▍| 47/50 [00:27<00:01,  1.81it/s]

[I 2025-08-16 12:27:20,909] Trial 46 finished with value: 0.7569285657664322 and parameters: {'n_estimators': 349, 'learning_rate': 0.10219770816971559, 'max_depth': 5, 'subsample': 0.8225686387987796, 'colsample_bytree': 0.6835684569462417, 'reg_alpha': 1.749043883311312, 'reg_lambda': 7.522721139123716}. Best is trial 44 with value: 0.7695386921814811.


Best trial: 44. Best value: 0.769539:  96%|█████████▌| 48/50 [00:28<00:01,  1.69it/s]

[I 2025-08-16 12:27:21,598] Trial 47 finished with value: 0.7593762674371095 and parameters: {'n_estimators': 897, 'learning_rate': 0.08786311250721653, 'max_depth': 6, 'subsample': 0.7765576910406307, 'colsample_bytree': 0.7257700880822635, 'reg_alpha': 2.4555321453169117, 'reg_lambda': 3.7672782146918222}. Best is trial 44 with value: 0.7695386921814811.


Best trial: 44. Best value: 0.769539:  98%|█████████▊| 49/50 [00:28<00:00,  1.70it/s]

[I 2025-08-16 12:27:22,171] Trial 48 finished with value: 0.7467417147601262 and parameters: {'n_estimators': 788, 'learning_rate': 0.06988212222940385, 'max_depth': 5, 'subsample': 0.7091299506469884, 'colsample_bytree': 0.6486805904761237, 'reg_alpha': 3.2006190791075477, 'reg_lambda': 2.0213078024813695}. Best is trial 44 with value: 0.7695386921814811.


Best trial: 44. Best value: 0.769539: 100%|██████████| 50/50 [00:29<00:00,  1.71it/s]
[I 2025-08-16 12:27:22,734] A new study created in memory with name: no-name-c232d970-91f8-48dc-a990-70ba9257d4f9


[I 2025-08-16 12:27:22,727] Trial 49 finished with value: 0.7435413834789318 and parameters: {'n_estimators': 851, 'learning_rate': 0.10980689348303938, 'max_depth': 3, 'subsample': 0.6005800047171014, 'colsample_bytree': 0.7018729710398393, 'reg_alpha': 0.032629361846618056, 'reg_lambda': 8.627121320322841}. Best is trial 44 with value: 0.7695386921814811.
Best xgboost score: 0.7695
Best xgboost params: {'n_estimators': 843, 'learning_rate': 0.10312067087966213, 'max_depth': 5, 'subsample': 0.8325482926975885, 'colsample_bytree': 0.7261669818136228, 'reg_alpha': 0.9782901073346899, 'reg_lambda': 1.9942914029552579}
Optimizing lightgbm...


Best trial: 0. Best value: 0.750156:   2%|▏         | 1/50 [00:00<00:43,  1.12it/s]

[I 2025-08-16 12:27:23,621] Trial 0 finished with value: 0.7501559351980175 and parameters: {'n_estimators': 437, 'learning_rate': 0.28570714885887566, 'max_depth': 12, 'num_leaves': 184, 'subsample': 0.6624074561769746, 'colsample_bytree': 0.662397808134481, 'reg_alpha': 0.5808361216819946, 'reg_lambda': 8.661761457749352}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:   4%|▍         | 2/50 [00:01<00:33,  1.42it/s]

[I 2025-08-16 12:27:24,197] Trial 1 finished with value: 0.7404119623443393 and parameters: {'n_estimators': 641, 'learning_rate': 0.21534104756085318, 'max_depth': 3, 'num_leaves': 292, 'subsample': 0.9329770563201687, 'colsample_bytree': 0.6849356442713105, 'reg_alpha': 1.8182496720710062, 'reg_lambda': 1.8340450985343382}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:   6%|▌         | 3/50 [00:02<00:29,  1.58it/s]

[I 2025-08-16 12:27:24,742] Trial 2 finished with value: 0.721620679090957 and parameters: {'n_estimators': 374, 'learning_rate': 0.16217936517334897, 'max_depth': 8, 'num_leaves': 94, 'subsample': 0.8447411578889518, 'colsample_bytree': 0.6557975442608167, 'reg_alpha': 2.9214464853521815, 'reg_lambda': 3.663618432936917}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  10%|█         | 5/50 [00:02<00:18,  2.42it/s]

[I 2025-08-16 12:27:25,187] Trial 3 finished with value: 0.6340922568578278 and parameters: {'n_estimators': 510, 'learning_rate': 0.23770102880397392, 'max_depth': 5, 'num_leaves': 159, 'subsample': 0.836965827544817, 'colsample_bytree': 0.6185801650879991, 'reg_alpha': 6.075448519014383, 'reg_lambda': 1.7052412368729153}. Best is trial 0 with value: 0.7501559351980175.
[I 2025-08-16 12:27:25,346] Trial 4 finished with value: 0.5934341182359012 and parameters: {'n_estimators': 158, 'learning_rate': 0.2851768058034666, 'max_depth': 15, 'num_leaves': 245, 'subsample': 0.7218455076693483, 'colsample_bytree': 0.6390688456025535, 'reg_alpha': 6.842330265121569, 'reg_lambda': 4.4015249373960135}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  12%|█▏        | 6/50 [00:02<00:15,  2.79it/s]

[I 2025-08-16 12:27:25,596] Trial 5 finished with value: 0.7144857382380262 and parameters: {'n_estimators': 209, 'learning_rate': 0.15360130393226834, 'max_depth': 3, 'num_leaves': 274, 'subsample': 0.7035119926400067, 'colsample_bytree': 0.8650089137415928, 'reg_alpha': 3.1171107608941098, 'reg_lambda': 5.200680211778108}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  16%|█▌        | 8/50 [00:03<00:14,  2.95it/s]

[I 2025-08-16 12:27:26,113] Trial 6 finished with value: 0.6296153844721751 and parameters: {'n_estimators': 592, 'learning_rate': 0.06360779210240283, 'max_depth': 15, 'num_leaves': 235, 'subsample': 0.9757995766256756, 'colsample_bytree': 0.9579309401710595, 'reg_alpha': 5.978999788110851, 'reg_lambda': 9.218742350231167}. Best is trial 0 with value: 0.7501559351980175.
[I 2025-08-16 12:27:26,302] Trial 7 finished with value: 0.5461535763217717 and parameters: {'n_estimators': 179, 'learning_rate': 0.0668350301015521, 'max_depth': 3, 'num_leaves': 104, 'subsample': 0.7554709158757928, 'colsample_bytree': 0.7085396127095583, 'reg_alpha': 8.287375091519294, 'reg_lambda': 3.567533266935893}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  20%|██        | 10/50 [00:03<00:10,  3.74it/s]

[I 2025-08-16 12:27:26,549] Trial 8 finished with value: 0.5583529108770247 and parameters: {'n_estimators': 353, 'learning_rate': 0.16738186411589207, 'max_depth': 4, 'num_leaves': 243, 'subsample': 0.6298202574719083, 'colsample_bytree': 0.9947547746402069, 'reg_alpha': 7.722447692966574, 'reg_lambda': 1.987156815341724}. Best is trial 0 with value: 0.7501559351980175.
[I 2025-08-16 12:27:26,719] Trial 9 finished with value: 0.7086415826281768 and parameters: {'n_estimators': 104, 'learning_rate': 0.2464838142519019, 'max_depth': 12, 'num_leaves': 222, 'subsample': 0.9085081386743783, 'colsample_bytree': 0.6296178606936361, 'reg_alpha': 3.5846572854427263, 'reg_lambda': 1.1586905952512971}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  22%|██▏       | 11/50 [00:05<00:31,  1.25it/s]

[I 2025-08-16 12:27:28,723] Trial 10 finished with value: 0.7267020281175085 and parameters: {'n_estimators': 906, 'learning_rate': 0.011739183057186992, 'max_depth': 11, 'num_leaves': 15, 'subsample': 0.6061470949312417, 'colsample_bytree': 0.775232370984732, 'reg_alpha': 0.1514423710275684, 'reg_lambda': 9.761398998579951}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  24%|██▍       | 12/50 [00:06<00:30,  1.26it/s]

[I 2025-08-16 12:27:29,514] Trial 11 finished with value: 0.7461654693366571 and parameters: {'n_estimators': 721, 'learning_rate': 0.29782308456351225, 'max_depth': 7, 'num_leaves': 175, 'subsample': 0.9801238964661397, 'colsample_bytree': 0.7348640964697393, 'reg_alpha': 0.33632604824042645, 'reg_lambda': 7.447500193911582}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  26%|██▌       | 13/50 [00:07<00:32,  1.12it/s]

[I 2025-08-16 12:27:30,624] Trial 12 finished with value: 0.7388195573328655 and parameters: {'n_estimators': 813, 'learning_rate': 0.2997090806065816, 'max_depth': 7, 'num_leaves': 170, 'subsample': 0.6588310736593839, 'colsample_bytree': 0.7457769212953824, 'reg_alpha': 0.20155949830155026, 'reg_lambda': 7.459953002299737}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  28%|██▊       | 14/50 [00:08<00:25,  1.39it/s]

[I 2025-08-16 12:27:30,950] Trial 13 finished with value: 0.51816991349013 and parameters: {'n_estimators': 733, 'learning_rate': 0.26665867487396183, 'max_depth': 10, 'num_leaves': 197, 'subsample': 0.7875317841632906, 'colsample_bytree': 0.8504636010866535, 'reg_alpha': 9.993624386415513, 'reg_lambda': 7.17749971688129}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  30%|███       | 15/50 [00:08<00:22,  1.55it/s]

[I 2025-08-16 12:27:31,426] Trial 14 finished with value: 0.7397081019991791 and parameters: {'n_estimators': 479, 'learning_rate': 0.20249899649591557, 'max_depth': 13, 'num_leaves': 106, 'subsample': 0.9903555915203195, 'colsample_bytree': 0.7219922567707062, 'reg_alpha': 1.388128375525878, 'reg_lambda': 7.5848609042382495}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  32%|███▏      | 16/50 [00:09<00:19,  1.75it/s]

[I 2025-08-16 12:27:31,821] Trial 15 finished with value: 0.6982809302141368 and parameters: {'n_estimators': 999, 'learning_rate': 0.2924966294977339, 'max_depth': 7, 'num_leaves': 131, 'subsample': 0.8819708330406043, 'colsample_bytree': 0.8204843656037641, 'reg_alpha': 4.446697824011409, 'reg_lambda': 6.187888985394442}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  34%|███▍      | 17/50 [00:09<00:21,  1.57it/s]

[I 2025-08-16 12:27:32,612] Trial 16 finished with value: 0.7426583541421425 and parameters: {'n_estimators': 696, 'learning_rate': 0.11194128085863145, 'max_depth': 10, 'num_leaves': 188, 'subsample': 0.7144083225862191, 'colsample_bytree': 0.7747985445665656, 'reg_alpha': 1.3745631896084256, 'reg_lambda': 8.786376368013872}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  36%|███▌      | 18/50 [00:10<00:17,  1.81it/s]

[I 2025-08-16 12:27:32,965] Trial 17 finished with value: 0.7394907180837076 and parameters: {'n_estimators': 379, 'learning_rate': 0.2524611162853017, 'max_depth': 6, 'num_leaves': 52, 'subsample': 0.6721524828008189, 'colsample_bytree': 0.6812699712112874, 'reg_alpha': 2.125692665960712, 'reg_lambda': 8.388599236833954}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  38%|███▊      | 19/50 [00:10<00:18,  1.66it/s]

[I 2025-08-16 12:27:33,687] Trial 18 finished with value: 0.741776157408724 and parameters: {'n_estimators': 801, 'learning_rate': 0.21025503792531147, 'max_depth': 13, 'num_leaves': 143, 'subsample': 0.7800253102912211, 'colsample_bytree': 0.9255974822311328, 'reg_alpha': 0.5649774648110591, 'reg_lambda': 6.18875476705127}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  40%|████      | 20/50 [00:11<00:14,  2.01it/s]

[I 2025-08-16 12:27:33,937] Trial 19 finished with value: 0.7002908758224021 and parameters: {'n_estimators': 284, 'learning_rate': 0.27663663755477963, 'max_depth': 9, 'num_leaves': 202, 'subsample': 0.8416634198004798, 'colsample_bytree': 0.73964158693172, 'reg_alpha': 4.3529589795941295, 'reg_lambda': 6.301661325079771}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  42%|████▏     | 21/50 [00:11<00:13,  2.13it/s]

[I 2025-08-16 12:27:34,345] Trial 20 finished with value: 0.7331521948794768 and parameters: {'n_estimators': 459, 'learning_rate': 0.18610588251119256, 'max_depth': 9, 'num_leaves': 68, 'subsample': 0.9439388065643592, 'colsample_bytree': 0.8074150913459086, 'reg_alpha': 2.3551856473389225, 'reg_lambda': 8.233339566027105}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  44%|████▍     | 22/50 [00:12<00:16,  1.72it/s]

[I 2025-08-16 12:27:35,191] Trial 21 finished with value: 0.7414569153756541 and parameters: {'n_estimators': 679, 'learning_rate': 0.1258449270373396, 'max_depth': 11, 'num_leaves': 185, 'subsample': 0.6976292924191496, 'colsample_bytree': 0.758761640311636, 'reg_alpha': 1.029578110371975, 'reg_lambda': 8.951732539937689}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  46%|████▌     | 23/50 [00:13<00:17,  1.54it/s]

[I 2025-08-16 12:27:35,996] Trial 22 finished with value: 0.7387275983526709 and parameters: {'n_estimators': 569, 'learning_rate': 0.11512323087763236, 'max_depth': 10, 'num_leaves': 182, 'subsample': 0.7422075034738161, 'colsample_bytree': 0.7881117492312342, 'reg_alpha': 1.2663202592572746, 'reg_lambda': 9.804056213199761}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  48%|████▊     | 24/50 [00:14<00:20,  1.25it/s]

[I 2025-08-16 12:27:37,154] Trial 23 finished with value: 0.7367794284521294 and parameters: {'n_estimators': 747, 'learning_rate': 0.09150339328986107, 'max_depth': 13, 'num_leaves': 140, 'subsample': 0.6636976461559223, 'colsample_bytree': 0.6802407359630315, 'reg_alpha': 0.8796388409564218, 'reg_lambda': 8.44811151410423}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  50%|█████     | 25/50 [00:14<00:18,  1.38it/s]

[I 2025-08-16 12:27:37,696] Trial 24 finished with value: 0.7425391322046554 and parameters: {'n_estimators': 887, 'learning_rate': 0.13962026399312072, 'max_depth': 8, 'num_leaves': 214, 'subsample': 0.6282572457368232, 'colsample_bytree': 0.7121687476627149, 'reg_alpha': 2.3040284206862527, 'reg_lambda': 6.848493569974815}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  52%|█████▏    | 26/50 [00:16<00:20,  1.14it/s]

[I 2025-08-16 12:27:38,917] Trial 25 finished with value: 0.7375793723797671 and parameters: {'n_estimators': 632, 'learning_rate': 0.23018662796732575, 'max_depth': 11, 'num_leaves': 159, 'subsample': 0.7416263006360255, 'colsample_bytree': 0.8393231197408701, 'reg_alpha': 0.22874372105263596, 'reg_lambda': 7.958880323830638}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  54%|█████▍    | 27/50 [00:16<00:19,  1.17it/s]

[I 2025-08-16 12:27:39,732] Trial 26 finished with value: 0.7416322847538086 and parameters: {'n_estimators': 697, 'learning_rate': 0.09692057382559767, 'max_depth': 6, 'num_leaves': 264, 'subsample': 0.6960178587469262, 'colsample_bytree': 0.8842700469806409, 'reg_alpha': 1.5889874121785372, 'reg_lambda': 9.09664966161597}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 0. Best value: 0.750156:  56%|█████▌    | 28/50 [00:17<00:19,  1.15it/s]

[I 2025-08-16 12:27:40,623] Trial 27 finished with value: 0.7097165791055429 and parameters: {'n_estimators': 434, 'learning_rate': 0.02135366473605274, 'max_depth': 14, 'num_leaves': 132, 'subsample': 0.8073547949211446, 'colsample_bytree': 0.6055590232716066, 'reg_alpha': 3.600597382810002, 'reg_lambda': 5.3664025121245515}. Best is trial 0 with value: 0.7501559351980175.


Best trial: 28. Best value: 0.756629:  58%|█████▊    | 29/50 [00:18<00:16,  1.25it/s]

[I 2025-08-16 12:27:41,267] Trial 28 finished with value: 0.7566285242708902 and parameters: {'n_estimators': 525, 'learning_rate': 0.2595987194854613, 'max_depth': 10, 'num_leaves': 187, 'subsample': 0.6500873355832663, 'colsample_bytree': 0.6588205512817252, 'reg_alpha': 0.026801287186797418, 'reg_lambda': 0.25328595278902544}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  60%|██████    | 30/50 [00:18<00:13,  1.49it/s]

[I 2025-08-16 12:27:41,635] Trial 29 finished with value: 0.7387894305551144 and parameters: {'n_estimators': 541, 'learning_rate': 0.26362629921020553, 'max_depth': 12, 'num_leaves': 207, 'subsample': 0.6039821043738987, 'colsample_bytree': 0.6622147649221323, 'reg_alpha': 0.8333979930675514, 'reg_lambda': 0.5732103502828645}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  62%|██████▏   | 31/50 [00:19<00:10,  1.78it/s]

[I 2025-08-16 12:27:41,943] Trial 30 finished with value: 0.7338011210868742 and parameters: {'n_estimators': 298, 'learning_rate': 0.22719322802366299, 'max_depth': 8, 'num_leaves': 170, 'subsample': 0.8796369811324729, 'colsample_bytree': 0.6956163577059217, 'reg_alpha': 1.9896667905657877, 'reg_lambda': 2.758012976497609}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  64%|██████▍   | 32/50 [00:20<00:12,  1.39it/s]

[I 2025-08-16 12:27:43,038] Trial 31 finished with value: 0.748424920128137 and parameters: {'n_estimators': 623, 'learning_rate': 0.27313763685470327, 'max_depth': 10, 'num_leaves': 191, 'subsample': 0.6469269457700841, 'colsample_bytree': 0.6557248418303252, 'reg_alpha': 0.1898911471331765, 'reg_lambda': 6.724877033900641}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  66%|██████▌   | 33/50 [00:21<00:15,  1.07it/s]

[I 2025-08-16 12:27:44,482] Trial 32 finished with value: 0.7451514675025045 and parameters: {'n_estimators': 615, 'learning_rate': 0.27583715189450564, 'max_depth': 9, 'num_leaves': 294, 'subsample': 0.6425915912725652, 'colsample_bytree': 0.659485571293783, 'reg_alpha': 0.0483569600641115, 'reg_lambda': 6.529129230076661}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  68%|██████▊   | 34/50 [00:22<00:12,  1.30it/s]

[I 2025-08-16 12:27:44,853] Trial 33 finished with value: 0.7356249253451408 and parameters: {'n_estimators': 527, 'learning_rate': 0.25659683295286184, 'max_depth': 12, 'num_leaves': 229, 'subsample': 0.645065325377807, 'colsample_bytree': 0.6459044709854953, 'reg_alpha': 0.8245289428317721, 'reg_lambda': 0.2535659843942657}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  70%|███████   | 35/50 [00:22<00:09,  1.58it/s]

[I 2025-08-16 12:27:45,170] Trial 34 finished with value: 0.7158991714525201 and parameters: {'n_estimators': 422, 'learning_rate': 0.2967802615414027, 'max_depth': 7, 'num_leaves': 172, 'subsample': 0.6787354831124199, 'colsample_bytree': 0.6010584540690886, 'reg_alpha': 2.8938646920207116, 'reg_lambda': 4.008657445169629}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  72%|███████▏  | 36/50 [00:23<00:11,  1.26it/s]

[I 2025-08-16 12:27:46,347] Trial 35 finished with value: 0.7413845371547798 and parameters: {'n_estimators': 506, 'learning_rate': 0.24213740156261385, 'max_depth': 10, 'num_leaves': 258, 'subsample': 0.9502915318715661, 'colsample_bytree': 0.6687974252853386, 'reg_alpha': 0.018681643737653464, 'reg_lambda': 5.549176513555686}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  74%|███████▍  | 37/50 [00:24<00:08,  1.46it/s]

[I 2025-08-16 12:27:46,778] Trial 36 finished with value: 0.7495025726949143 and parameters: {'n_estimators': 657, 'learning_rate': 0.27956938933844905, 'max_depth': 8, 'num_leaves': 151, 'subsample': 0.6233761150066758, 'colsample_bytree': 0.6289966357176783, 'reg_alpha': 1.7607032071517956, 'reg_lambda': 4.885701019337033}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  76%|███████▌  | 38/50 [00:24<00:07,  1.68it/s]

[I 2025-08-16 12:27:47,158] Trial 37 finished with value: 0.7170378929883777 and parameters: {'n_estimators': 645, 'learning_rate': 0.2747718886184077, 'max_depth': 11, 'num_leaves': 152, 'subsample': 0.6186282839002933, 'colsample_bytree': 0.625439788638375, 'reg_alpha': 2.6152366951302484, 'reg_lambda': 4.544460488114596}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  78%|███████▊  | 39/50 [00:24<00:05,  1.93it/s]

[I 2025-08-16 12:27:47,501] Trial 38 finished with value: 0.6256364098981637 and parameters: {'n_estimators': 587, 'learning_rate': 0.18181360897409593, 'max_depth': 8, 'num_leaves': 109, 'subsample': 0.6833170608300624, 'colsample_bytree': 0.6411283224105865, 'reg_alpha': 5.576267343225454, 'reg_lambda': 3.060917753037913}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  80%|████████  | 40/50 [00:25<00:04,  2.17it/s]

[I 2025-08-16 12:27:47,824] Trial 39 finished with value: 0.736302250673502 and parameters: {'n_estimators': 398, 'learning_rate': 0.22007087689895977, 'max_depth': 14, 'num_leaves': 125, 'subsample': 0.6468072290711504, 'colsample_bytree': 0.6948026174837052, 'reg_alpha': 1.7733398518338592, 'reg_lambda': 1.7212437787082422}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  82%|████████▏ | 41/50 [00:25<00:03,  2.45it/s]

[I 2025-08-16 12:27:48,110] Trial 40 finished with value: 0.7143315685347988 and parameters: {'n_estimators': 305, 'learning_rate': 0.28123355534717603, 'max_depth': 9, 'num_leaves': 156, 'subsample': 0.7245481464395109, 'colsample_bytree': 0.619465222242369, 'reg_alpha': 3.303047877552574, 'reg_lambda': 2.5159260719597394}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  84%|████████▍ | 42/50 [00:26<00:03,  2.05it/s]

[I 2025-08-16 12:27:48,786] Trial 41 finished with value: 0.741650094566073 and parameters: {'n_estimators': 756, 'learning_rate': 0.26046912691987545, 'max_depth': 6, 'num_leaves': 192, 'subsample': 0.6279900584935664, 'colsample_bytree': 0.6474779782398161, 'reg_alpha': 0.6127194583270895, 'reg_lambda': 7.6593254096323555}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  86%|████████▌ | 43/50 [00:26<00:03,  1.94it/s]

[I 2025-08-16 12:27:49,362] Trial 42 finished with value: 0.7452558720756528 and parameters: {'n_estimators': 660, 'learning_rate': 0.2877181131246317, 'max_depth': 5, 'num_leaves': 213, 'subsample': 0.6001915749819189, 'colsample_bytree': 0.671427395154791, 'reg_alpha': 0.5776317022160474, 'reg_lambda': 5.703741668642688}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  88%|████████▊ | 44/50 [00:27<00:03,  1.99it/s]

[I 2025-08-16 12:27:49,836] Trial 43 finished with value: 0.7470093002321876 and parameters: {'n_estimators': 563, 'learning_rate': 0.2391995877885264, 'max_depth': 8, 'num_leaves': 172, 'subsample': 0.659090158578622, 'colsample_bytree': 0.7327054017054669, 'reg_alpha': 1.5425394991975334, 'reg_lambda': 7.056910932981925}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  90%|█████████ | 45/50 [00:27<00:02,  2.05it/s]

[I 2025-08-16 12:27:50,290] Trial 44 finished with value: 0.7461328226702799 and parameters: {'n_estimators': 558, 'learning_rate': 0.24297321442420555, 'max_depth': 8, 'num_leaves': 223, 'subsample': 0.6546039912540428, 'colsample_bytree': 0.7010290005881998, 'reg_alpha': 1.6835731396150164, 'reg_lambda': 9.509261863674677}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  92%|█████████▏| 46/50 [00:28<00:01,  2.02it/s]

[I 2025-08-16 12:27:50,805] Trial 45 finished with value: 0.7404581695100734 and parameters: {'n_estimators': 492, 'learning_rate': 0.19772247323835676, 'max_depth': 10, 'num_leaves': 118, 'subsample': 0.685445296463094, 'colsample_bytree': 0.6324480685743102, 'reg_alpha': 1.150287794222645, 'reg_lambda': 4.748300980140339}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  94%|█████████▍| 47/50 [00:28<00:01,  2.18it/s]

[I 2025-08-16 12:27:51,176] Trial 46 finished with value: 0.7068011305172024 and parameters: {'n_estimators': 582, 'learning_rate': 0.2337909984203403, 'max_depth': 12, 'num_leaves': 91, 'subsample': 0.6182880032560921, 'colsample_bytree': 0.7215984263623578, 'reg_alpha': 4.008465356907665, 'reg_lambda': 6.829350439609705}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  96%|█████████▌| 48/50 [00:28<00:00,  2.48it/s]

[I 2025-08-16 12:27:51,450] Trial 47 finished with value: 0.587607616268969 and parameters: {'n_estimators': 351, 'learning_rate': 0.26889015507828856, 'max_depth': 8, 'num_leaves': 163, 'subsample': 0.7126701388802139, 'colsample_bytree': 0.6132629975671265, 'reg_alpha': 7.6616792526415125, 'reg_lambda': 0.8251558241316492}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629:  98%|█████████▊| 49/50 [00:29<00:00,  2.52it/s]

[I 2025-08-16 12:27:51,834] Trial 48 finished with value: 0.711775540337085 and parameters: {'n_estimators': 615, 'learning_rate': 0.2514668682671904, 'max_depth': 9, 'num_leaves': 149, 'subsample': 0.6396699381973204, 'colsample_bytree': 0.6598266238696134, 'reg_alpha': 2.6711158574141827, 'reg_lambda': 3.92668423100056}. Best is trial 28 with value: 0.7566285242708902.


Best trial: 28. Best value: 0.756629: 100%|██████████| 50/50 [00:29<00:00,  1.70it/s]

[I 2025-08-16 12:27:52,125] Trial 49 finished with value: 0.6597851524046356 and parameters: {'n_estimators': 451, 'learning_rate': 0.2846068485101273, 'max_depth': 10, 'num_leaves': 250, 'subsample': 0.6664745030022224, 'colsample_bytree': 0.7571827673751963, 'reg_alpha': 5.15851674645753, 'reg_lambda': 5.770949327500449}. Best is trial 28 with value: 0.7566285242708902.
Best lightgbm score: 0.7566
Best lightgbm params: {'n_estimators': 525, 'learning_rate': 0.2595987194854613, 'max_depth': 10, 'num_leaves': 187, 'subsample': 0.6500873355832663, 'colsample_bytree': 0.6588205512817252, 'reg_alpha': 0.026801287186797418, 'reg_lambda': 0.25328595278902544}

Optimization completed!
balanced_rf: Score = 0.7832
xgboost: Score = 0.7695
lightgbm: Score = 0.7566





### Step 5: Advanced Model Ensemble and Stacking

In [8]:
class AdvancedEnsemble:
    """
    Advanced ensemble methods including stacking and blending
    """
    
    def __init__(self, base_models, meta_model=None, cv_folds=5, random_state=42):
        self.base_models = base_models
        self.meta_model = meta_model or lgb.LGBMClassifier(random_state=random_state, verbosity=-1)
        self.cv_folds = cv_folds
        self.random_state = random_state
        self.trained_models = {}
        
    def create_optimized_models(self, optimization_results):
        """Create models with optimized parameters"""
        models = {}
        
        for model_type, result in optimization_results.items():
            params = result['params']
            params['random_state'] = self.random_state
            
            if model_type == 'balanced_rf':
                params['n_jobs'] = -1
                models[model_type] = BalancedRandomForestClassifier(**params)
            elif model_type == 'xgboost':
                params['eval_metric'] = 'logloss'
                params['verbosity'] = 0
                models[model_type] = xgb.XGBClassifier(**params)
            elif model_type == 'lightgbm':
                params['verbosity'] = -1
                params['force_col_wise'] = True
                models[model_type] = lgb.LGBMClassifier(**params)
            elif model_type == 'catboost':
                params['verbose'] = False
                models[model_type] = cb.CatBoostClassifier(**params)
                
        return models
    
    def stacking_cv(self, X, y):
        """Generate meta-features using cross-validation"""
        cv = StratifiedKFold(n_splits=self.cv_folds, shuffle=True, random_state=self.random_state)
        meta_features = np.zeros((X.shape[0], len(self.base_models)))
        
        for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
            X_train_fold = X.iloc[train_idx]
            y_train_fold = y.iloc[train_idx]
            X_val_fold = X.iloc[val_idx]
            
            for i, (name, model) in enumerate(self.base_models.items()):
                # Clone and train model
                model_clone = clone(model)
                model_clone.fit(X_train_fold, y_train_fold)
                
                # Predict on validation set
                pred_proba = model_clone.predict_proba(X_val_fold)[:, 1]
                meta_features[val_idx, i] = pred_proba
                
        return meta_features
    
    def fit_stacking(self, X, y):
        """Fit stacking ensemble"""
        print("Generating meta-features...")
        meta_features = self.stacking_cv(X, y)
        
        print("Training meta-model...")
        self.meta_model.fit(meta_features, y)
        
        # Train base models on full data
        print("Training base models on full data...")
        for name, model in self.base_models.items():
            model.fit(X, y)
            self.trained_models[name] = model
            
        return self
    
    def predict_stacking(self, X):
        """Predict using stacking ensemble"""
        meta_features = np.zeros((X.shape[0], len(self.base_models)))
        
        for i, (name, model) in enumerate(self.trained_models.items()):
            pred_proba = model.predict_proba(X)[:, 1]
            meta_features[:, i] = pred_proba
            
        return self.meta_model.predict(meta_features)
    
    def predict_proba_stacking(self, X):
        """Predict probabilities using stacking ensemble"""
        meta_features = np.zeros((X.shape[0], len(self.base_models)))
        
        for i, (name, model) in enumerate(self.trained_models.items()):
            pred_proba = model.predict_proba(X)[:, 1]
            meta_features[:, i] = pred_proba
            
        return self.meta_model.predict_proba(meta_features)
    
    def weighted_average_ensemble(self, X, weights=None):
        """Simple weighted average ensemble"""
        if weights is None:
            weights = np.ones(len(self.trained_models)) / len(self.trained_models)
            
        predictions = np.zeros(X.shape[0])
        
        for i, (name, model) in enumerate(self.trained_models.items()):
            pred_proba = model.predict_proba(X)[:, 1]
            predictions += weights[i] * pred_proba
            
        return (predictions > 0.5).astype(int), predictions

# Create optimized models
optimized_models = optimizer.create_optimized_models(optimization_results)

# Create ensemble
ensemble = AdvancedEnsemble(optimized_models)

# Fit stacking ensemble
ensemble.fit_stacking(X_train_selected, Y_train)

print("Ensemble training completed!")

Generating meta-features...


  File "e:\miniconda3\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
        "wmic CPU Get NumberOfCores /Format:csv".split(),
        capture_output=True,
        text=True,
    )
  File "e:\miniconda3\Lib\subprocess.py", line 556, in run
    with Popen(*popenargs, **kwargs) as process:
         ~~~~~^^^^^^^^^^^^^^^^^^^^^^
  File "e:\miniconda3\Lib\subprocess.py", line 1038, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
    ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                        pass_fds, cwd, env,
                        ^^^^^^^^^^^^^^^^^^^
    ...<5 lines>...
                        gid, gids, uid, umask,
                        ^^^^^^^^^^^^^^^^^^^^^^
                        start_new_session, process_group)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "e:\miniconda3\Lib\subprocess.py", line 1550, in _execute_child
    hp, ht

Training meta-model...
Training base models on full data...
Ensemble training completed!


### Step 6: Comprehensive Model Evaluation

In [9]:
class ComprehensiveEvaluator:
    """
    Comprehensive model evaluation with multiple metrics and visualizations
    """
    
    def __init__(self):
        self.results = {}
        
    def calculate_metrics(self, y_true, y_pred, y_pred_proba=None):
        """Calculate comprehensive metrics"""
        from sklearn.metrics import (
            accuracy_score, precision_score, recall_score, f1_score,
            roc_auc_score, matthews_corrcoef, confusion_matrix,
            classification_report
        )
        
        metrics = {
            'accuracy': accuracy_score(y_true, y_pred),
            'precision': precision_score(y_true, y_pred),
            'recall': recall_score(y_true, y_pred),
            'f1': f1_score(y_true, y_pred),
            'mcc': matthews_corrcoef(y_true, y_pred)
        }
        
        if y_pred_proba is not None:
            metrics['auc'] = roc_auc_score(y_true, y_pred_proba)
            
        # Confusion matrix components
        tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
        metrics.update({
            'sensitivity': tp / (tp + fn),
            'specificity': tn / (tn + fp),
            'tp': tp, 'tn': tn, 'fp': fp, 'fn': fn
        })
        
        return metrics
    
    def cross_validation_evaluation(self, model, X, y, cv_folds=5):
        """Comprehensive cross-validation evaluation"""
        cv = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=42)
        
        cv_results = {
            'accuracy': [], 'precision': [], 'recall': [], 'f1': [],
            'auc': [], 'mcc': [], 'sensitivity': [], 'specificity': []
        }
        
        for train_idx, val_idx in cv.split(X, y):
            X_train_fold = X.iloc[train_idx]
            y_train_fold = y.iloc[train_idx]
            X_val_fold = X.iloc[val_idx]
            y_val_fold = y.iloc[val_idx]
            
            # Train and predict
            model_clone = clone(model)
            model_clone.fit(X_train_fold, y_train_fold)
            y_pred = model_clone.predict(X_val_fold)
            y_pred_proba = model_clone.predict_proba(X_val_fold)[:, 1]
            
            # Calculate metrics
            fold_metrics = self.calculate_metrics(y_val_fold, y_pred, y_pred_proba)
            
            for metric in cv_results:
                if metric in fold_metrics:
                    cv_results[metric].append(fold_metrics[metric])
        
        # Calculate means and stds
        cv_summary = {}
        for metric, values in cv_results.items():
            cv_summary[f'{metric}_mean'] = np.mean(values)
            cv_summary[f'{metric}_std'] = np.std(values)
            
        return cv_summary
    
    def evaluate_model(self, model, X_train, y_train, X_test, y_test, model_name):
        """Complete model evaluation"""
        print(f"Evaluating {model_name}...")
        
        # Cross-validation results
        cv_results = self.cross_validation_evaluation(model, X_train, y_train)
        
        # Train on full training set and test
        model.fit(X_train, y_train)
        y_pred_test = model.predict(X_test)
        y_pred_proba_test = model.predict_proba(X_test)[:, 1]
        
        # Test set metrics
        test_metrics = self.calculate_metrics(y_test, y_pred_test, y_pred_proba_test)
        
        # Store results
        self.results[model_name] = {
            'cv_results': cv_results,
            'test_metrics': test_metrics,
            'predictions': {
                'y_pred': y_pred_test,
                'y_pred_proba': y_pred_proba_test
            }
        }
        
        return cv_results, test_metrics
    
    def create_results_dataframe(self):
        """Create comprehensive results DataFrame"""
        data = []
        
        for model_name, result in self.results.items():
            row = {'Model': model_name}
            
            # Add CV results (only if they exist)
            if 'cv_results' in result:
                for metric, value in result['cv_results'].items():
                    row[f'CV_{metric}'] = value
            else:
                # Fill with NaN for missing CV results
                cv_metrics = ['accuracy_mean', 'accuracy_std', 'precision_mean', 'precision_std', 
                            'recall_mean', 'recall_std', 'f1_mean', 'f1_std', 'auc_mean', 'auc_std',
                            'mcc_mean', 'mcc_std', 'sensitivity_mean', 'sensitivity_std', 
                            'specificity_mean', 'specificity_std']
                for metric in cv_metrics:
                    row[f'CV_{metric}'] = np.nan
                
            # Add test results
            for metric, value in result['test_metrics'].items():
                if metric not in ['tp', 'tn', 'fp', 'fn']:
                    row[f'Test_{metric}'] = value
                    
            data.append(row)
            
        return pd.DataFrame(data)
    
    def plot_roc_curves(self, X_test, y_test):
        """Plot ROC curves for all models"""
        fig = go.Figure()
        
        for model_name, results in self.results.items():
            y_pred_proba = results['predictions']['y_pred_proba']
            
            from sklearn.metrics import roc_curve
            fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
            auc_score = results['test_metrics']['auc']
            
            fig.add_trace(go.Scatter(
                x=fpr, y=tpr,
                mode='lines',
                name=f'{model_name} (AUC = {auc_score:.3f})',
                line=dict(width=3)
            ))
        
        # Add diagonal line
        fig.add_trace(go.Scatter(
            x=[0, 1], y=[0, 1],
            mode='lines',
            name='Random (AUC = 0.5)',
            line=dict(dash='dash', color='gray')
        ))
        
        fig.update_layout(
            title='ROC Curves Comparison',
            xaxis_title='False Positive Rate',
            yaxis_title='True Positive Rate',
            width=800, height=600
        )
        
        fig.show()
        
    def plot_feature_importance(self, model, feature_names, model_name, top_n=20):
        """Plot feature importance"""
        if hasattr(model, 'feature_importances_'):
            importances = model.feature_importances_
        elif hasattr(model, 'coef_'):
            importances = np.abs(model.coef_[0])
        else:
            print(f"Cannot extract feature importance for {model_name}")
            return
            
        feature_imp = pd.DataFrame({
            'feature': feature_names,
            'importance': importances
        }).sort_values('importance', ascending=False)
        
        top_features = feature_imp.head(top_n)
        
        fig = px.bar(
            top_features.iloc[::-1],  # Reverse for better visualization
            x='importance',
            y='feature',
            orientation='h',
            title=f'Top {top_n} Feature Importances - {model_name}'
        )
        
        fig.update_layout(height=600)
        fig.show()

# Evaluate individual models
evaluator = ComprehensiveEvaluator()

# Evaluate optimized models
for model_name, model in optimized_models.items():
    evaluator.evaluate_model(
        model, X_train_selected, Y_train, X_test_selected, Y_test, model_name
    )

# Evaluate ensemble
ensemble_pred = ensemble.predict_stacking(X_test_selected)
ensemble_pred_proba = ensemble.predict_proba_stacking(X_test_selected)[:, 1]

# Add ensemble results manually
ensemble_test_metrics = evaluator.calculate_metrics(Y_test, ensemble_pred, ensemble_pred_proba)

# # cross-validate ensemble
# cv_results = evaluator.cross_validation_evaluation(ensemble, X_train_selected, Y_train)

evaluator.results['Stacking_Ensemble'] = {
    # 'cv_results': cv_results,
    'test_metrics': ensemble_test_metrics,
    'predictions': {
        'y_pred': ensemble_pred,
        'y_pred_proba': ensemble_pred_proba
    }
}

# Create results summary
results_df = evaluator.create_results_dataframe()
print("\nModel Comparison Results:")
print(results_df.round(4))

# Plot ROC curves
evaluator.plot_roc_curves(X_test_selected, Y_test)

Evaluating balanced_rf...
Evaluating xgboost...
Evaluating lightgbm...

Model Comparison Results:
               Model  CV_accuracy_mean  CV_accuracy_std  CV_precision_mean  \
0        balanced_rf            0.8240           0.0423             0.6624   
1            xgboost            0.8281           0.0136             0.7660   
2           lightgbm            0.8239           0.0214             0.7190   
3  Stacking_Ensemble               NaN              NaN                NaN   

   CV_precision_std  CV_recall_mean  CV_recall_std  CV_f1_mean  CV_f1_std  \
0            0.0993          0.6362         0.0832      0.6434     0.0680   
1            0.0362          0.4413         0.0667      0.5567     0.0552   
2            0.0559          0.4750         0.0519      0.5712     0.0511   
3               NaN             NaN            NaN         NaN        NaN   

   CV_auc_mean  ...  CV_specificity_mean  CV_specificity_std  Test_accuracy  \
0       0.8507  ...               0.8857      

# Advancement for Higher Performance

### Step 7: Advanced Techniques for Higher Performance

In [10]:
### Advanced Feature Engineering and Selection

# 1. Polynomial Features for Selected Important Features
from sklearn.preprocessing import PolynomialFeatures
from sklearn.feature_selection import SelectFromModel

def create_polynomial_features(X_train, X_test, top_features, degree=2):
    """Create polynomial features for most important features"""
    # Select top features from best model
    best_model_name = results_df.loc[results_df['Test_auc'].idxmax(), 'Model']
    best_model = optimized_models[best_model_name]
    
    if hasattr(best_model, 'feature_importances_'):
        importances = best_model.feature_importances_
        top_idx = np.argsort(importances)[-top_features:]
        selected_cols = X_train.columns[top_idx]
    else:
        # Use top features from mutual information
        selected_cols = X_train.columns[:top_features]
    
    print(f"Creating polynomial features for top {len(selected_cols)} features")
    
    # Create polynomial features
    poly = PolynomialFeatures(degree=degree, interaction_only=True, include_bias=False)
    
    X_train_poly = poly.fit_transform(X_train[selected_cols])
    X_test_poly = poly.transform(X_test[selected_cols])
    
    # Get feature names
    poly_names = poly.get_feature_names_out(selected_cols)
    
    # Convert back to DataFrame
    X_train_poly_df = pd.DataFrame(X_train_poly, columns=poly_names, index=X_train.index)
    X_test_poly_df = pd.DataFrame(X_test_poly, columns=poly_names, index=X_test.index)
    
    # Combine with original features
    X_train_enhanced = pd.concat([X_train, X_train_poly_df], axis=1)
    X_test_enhanced = pd.concat([X_test, X_test_poly_df], axis=1)
    
    print(f"Enhanced features: {X_train_enhanced.shape[1]} (added {X_train_poly_df.shape[1]} polynomial features)")
    
    return X_train_enhanced, X_test_enhanced

# Create polynomial features
X_train_poly, X_test_poly = create_polynomial_features(
    X_train_selected, X_test_selected, top_features=15, degree=2
)

Creating polynomial features for top 15 features
Enhanced features: 195 (added 120 polynomial features)


In [11]:
# 2. Advanced Feature Selection with Recursive Feature Elimination
from sklearn.feature_selection import RFECV

def advanced_feature_selection_v2(X_train, X_test, y_train):
    """More aggressive feature selection"""
    
    # Remove low variance features more aggressively
    from sklearn.feature_selection import VarianceThreshold
    var_selector = VarianceThreshold(threshold=0.05)  # More aggressive
    X_train_var = var_selector.fit_transform(X_train)
    X_test_var = var_selector.transform(X_test)
    
    var_features = X_train.columns[var_selector.get_support()]
    X_train_var_df = pd.DataFrame(X_train_var, columns=var_features, index=X_train.index)
    X_test_var_df = pd.DataFrame(X_test_var, columns=var_features, index=X_test.index)
    
    print(f"After variance filtering: {X_train_var_df.shape[1]} features")
    
    # Use XGBoost for feature selection (often better than RF)
    xgb_selector = xgb.XGBClassifier(
        n_estimators=100, 
        random_state=42, 
        eval_metric='logloss',
        verbosity=0
    )
    
    # RFECV with XGBoost
    rfecv = RFECV(
        estimator=xgb_selector,
        step=5,  # Remove 5 features at a time
        cv=5,
        scoring='roc_auc',
        n_jobs=-1,
        min_features_to_select=30  # Minimum features to keep
    )
    
    rfecv.fit(X_train_var_df, y_train)
    
    selected_features = var_features[rfecv.support_]
    X_train_selected = X_train_var_df[selected_features]
    X_test_selected = X_test_var_df[selected_features]
    
    print(f"RFECV selected {len(selected_features)} features")
    print(f"Optimal number of features: {rfecv.n_features_}")
    
    return X_train_selected, X_test_selected, selected_features

# Apply advanced feature selection
X_train_final_v2, X_test_final_v2, final_features = advanced_feature_selection_v2(
    X_train_poly, X_test_poly, Y_train
)

After variance filtering: 156 features
RFECV selected 56 features
Optimal number of features: 56


In [None]:
# 3. Advanced Hyperparameter Optimization with More Trials and Better Objectives
class AdvancedOptunaOptimizer:
    """Enhanced Optuna optimizer with better objective functions"""
    
    def __init__(self, n_trials=200, cv_folds=5, random_state=42):
        self.n_trials = n_trials
        self.cv_folds = cv_folds
        self.random_state = random_state
        self.best_params = {}
        self.study_results = {}
    
    def objective_function_v2(self, trial, model_type, X, y):
        """Enhanced objective function focusing on AUC"""
        
        if model_type == 'xgboost_v2':
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 500, 2000),
                'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.2, log=True),
                'max_depth': trial.suggest_int('max_depth', 3, 12),
                'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
                'subsample': trial.suggest_float('subsample', 0.7, 1.0),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
                'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.7, 1.0),
                'reg_alpha': trial.suggest_float('reg_alpha', 0, 10),
                'reg_lambda': trial.suggest_float('reg_lambda', 1, 10),
                'gamma': trial.suggest_float('gamma', 0, 5),
                'random_state': self.random_state,
                'eval_metric': 'auc',
                'verbosity': 0,
                'tree_method': 'hist'
            }
            model = xgb.XGBClassifier(**params)
            
        elif model_type == 'lightgbm_v2':
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 500, 2000),
                'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.2, log=True),
                'max_depth': trial.suggest_int('max_depth', 3, 12),
                'num_leaves': trial.suggest_int('num_leaves', 31, 500),
                'min_child_samples': trial.suggest_int('min_child_samples', 10, 100),
                'subsample': trial.suggest_float('subsample', 0.7, 1.0),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
                'reg_alpha': trial.suggest_float('reg_alpha', 0, 10),
                'reg_lambda': trial.suggest_float('reg_lambda', 1, 10),
                'min_gain_to_split': trial.suggest_float('min_gain_to_split', 0, 1),
                'random_state': self.random_state,
                'verbosity': -1,
                'force_col_wise': True,
                'objective': 'binary',
                'metric': 'auc'
            }
            model = lgb.LGBMClassifier(**params)
            
        elif model_type == 'catboost_v2':
            params = {
                'iterations': trial.suggest_int('iterations', 500, 2000),
                'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.2, log=True),
                'depth': trial.suggest_int('depth', 4, 10),
                'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
                'border_count': trial.suggest_int('border_count', 32, 255),
                'bagging_temperature': trial.suggest_float('bagging_temperature', 0, 10),
                'random_strength': trial.suggest_float('random_strength', 0, 10),
                'random_state': self.random_state,
                'verbose': False,
                'eval_metric': 'AUC',
                'task_type': 'CPU'
            }
            model = cb.CatBoostClassifier(**params)
        
        # Enhanced cross-validation with stratification
        from sklearn.model_selection import StratifiedKFold
        cv = StratifiedKFold(n_splits=self.cv_folds, shuffle=True, random_state=self.random_state)
        
        # Convert to numpy arrays to avoid DataFrame issues
        X_array = X.values if hasattr(X, 'values') else X
        y_array = y.values if hasattr(y, 'values') else y
        
        auc_scores = []
        for train_idx, val_idx in cv.split(X_array, y_array):
            X_train_fold = X_array[train_idx]
            y_train_fold = y_array[train_idx]
            X_val_fold = X_array[val_idx]
            y_val_fold = y_array[val_idx]
            
            try:
                model_clone = clone(model)
                model_clone.fit(X_train_fold, y_train_fold)
                y_pred_proba = model_clone.predict_proba(X_val_fold)[:, 1]
                auc = roc_auc_score(y_val_fold, y_pred_proba)
                auc_scores.append(auc)
            except Exception as e:
                print(f"Error in fold: {e}")
                return 0.5  # Return baseline score if error occurs
        
        return np.mean(auc_scores) if auc_scores else 0.5
    
    def optimize_model_v2(self, model_type, X, y):
        """Optimize with focus on AUC"""
        print(f"Optimizing {model_type} with enhanced parameters...")
        
        # Reset DataFrame indices to avoid issues
        if hasattr(X, 'reset_index'):
            X = X.reset_index(drop=True)
        if hasattr(y, 'reset_index'):
            y = y.reset_index(drop=True)
        
        study = optuna.create_study(
            direction='maximize',
            sampler=TPESampler(seed=self.random_state, n_startup_trials=50)
        )
        
        try:
            study.optimize(
                lambda trial: self.objective_function_v2(trial, model_type, X, y),
                n_trials=self.n_trials,
                show_progress_bar=True
            )
            
            self.best_params[model_type] = study.best_params
            self.study_results[model_type] = study
            
            print(f"Best {model_type} AUC: {study.best_value:.4f}")
            print(f"Best {model_type} params: {study.best_params}")
            
            return study.best_params, study.best_value
            
        except Exception as e:
            print(f"Error optimizing {model_type}: {e}")
            # Return default parameters if optimization fails
            default_params = self._get_default_params(model_type)
            return default_params, 0.5
    
    def _get_default_params(self, model_type):
        """Get default parameters for models"""
        defaults = {
            'xgboost_v2': {
                'n_estimators': 1000,
                'learning_rate': 0.1,
                'max_depth': 6,
                'min_child_weight': 1,
                'subsample': 0.8,
                'colsample_bytree': 0.8,
                'reg_alpha': 0,
                'reg_lambda': 1,
                'gamma': 0
            },
            'lightgbm_v2': {
                'n_estimators': 1000,
                'learning_rate': 0.1,
                'max_depth': 6,
                'num_leaves': 31,
                'min_child_samples': 20,
                'subsample': 0.8,
                'colsample_bytree': 0.8,
                'reg_alpha': 0,
                'reg_lambda': 0,
                'min_gain_to_split': 0
            },
            'catboost_v2': {
                'iterations': 1000,
                'learning_rate': 0.1,
                'depth': 6,
                'l2_leaf_reg': 3,
                'border_count': 128,
                'bagging_temperature': 1,
                'random_strength': 1
            }
        }
        return defaults.get(model_type, {})

# Reset indices of the data before optimization
print("Resetting DataFrame indices...")
X_train_final_v2 = X_train_final_v2.reset_index(drop=True)
X_test_final_v2 = X_test_final_v2.reset_index(drop=True)
Y_train = Y_train.reset_index(drop=True)

print(f"Data shapes after reset: X_train: {X_train_final_v2.shape}, X_test: {X_test_final_v2.shape}, Y_train: {len(Y_train)}")

# Initialize enhanced optimizer
advanced_optimizer = AdvancedOptunaOptimizer(n_trials=50, cv_folds=5)  # Reduce trials for testing

# Optimize advanced models
advanced_models = ['xgboost_v2', 'lightgbm_v2', 'catboost_v2']
advanced_results = {}

for model_type in advanced_models:
    params, score = advanced_optimizer.optimize_model_v2(model_type, X_train_final_v2, Y_train)
    advanced_results[model_type] = {'params': params, 'score': score}

print("\nAdvanced Optimization Results:")
for model, result in advanced_results.items():
    print(f"{model}: AUC = {result['score']:.4f}")

[I 2025-08-16 12:28:14,022] A new study created in memory with name: no-name-c5031e3e-34db-48b1-a678-a0f8c29b1bff


Resetting DataFrame indices...
Data shapes after reset: X_train: (477, 65), X_test: (120, 65), Y_train: 477
Optimizing xgboost_v2 with enhanced parameters...


Best trial: 0. Best value: 0.673015:   2%|▏         | 1/50 [00:01<01:01,  1.25s/it]

[I 2025-08-16 12:28:15,274] Trial 0 finished with value: 0.6730150073144179 and parameters: {'n_estimators': 1062, 'learning_rate': 0.1667521176194013, 'max_depth': 10, 'min_child_weight': 6, 'subsample': 0.7468055921327309, 'colsample_bytree': 0.7467983561008608, 'colsample_bylevel': 0.7174250836504598, 'reg_alpha': 8.661761457749352, 'reg_lambda': 6.41003510568888, 'gamma': 3.540362888980227}. Best is trial 0 with value: 0.6730150073144179.


Best trial: 1. Best value: 0.755256:   4%|▍         | 2/50 [00:01<00:44,  1.08it/s]

[I 2025-08-16 12:28:15,972] Trial 1 finished with value: 0.755256210167608 and parameters: {'n_estimators': 530, 'learning_rate': 0.17898794163735265, 'max_depth': 11, 'min_child_weight': 3, 'subsample': 0.7545474901621302, 'colsample_bytree': 0.7550213529560301, 'colsample_bylevel': 0.7912726728878613, 'reg_alpha': 5.247564316322379, 'reg_lambda': 4.887505167779041, 'gamma': 1.4561457009902097}. Best is trial 1 with value: 0.755256210167608.


Best trial: 2. Best value: 0.808322:   6%|▌         | 3/50 [00:05<01:37,  2.08s/it]

[I 2025-08-16 12:28:19,433] Trial 2 finished with value: 0.8083224892835273 and parameters: {'n_estimators': 1418, 'learning_rate': 0.008364645453054504, 'max_depth': 5, 'min_child_weight': 4, 'subsample': 0.8368209952651108, 'colsample_bytree': 0.935552788417904, 'colsample_bylevel': 0.7599021346475079, 'reg_alpha': 5.142344384136116, 'reg_lambda': 6.331731119758382, 'gamma': 0.23225206359998862}. Best is trial 2 with value: 0.8083224892835273.


Best trial: 2. Best value: 0.808322:   8%|▊         | 4/50 [00:07<01:36,  2.10s/it]

[I 2025-08-16 12:28:21,546] Trial 3 finished with value: 0.7783797288562292 and parameters: {'n_estimators': 1411, 'learning_rate': 0.009379072229909205, 'max_depth': 3, 'min_child_weight': 10, 'subsample': 0.9896896099223678, 'colsample_bytree': 0.9425192044349383, 'colsample_bylevel': 0.7913841307520112, 'reg_alpha': 0.9767211400638387, 'reg_lambda': 7.158097238609412, 'gamma': 2.2007624686980067}. Best is trial 2 with value: 0.8083224892835273.


Best trial: 2. Best value: 0.808322:  10%|█         | 5/50 [00:08<01:17,  1.71s/it]

[I 2025-08-16 12:28:22,579] Trial 4 finished with value: 0.7443844832278697 and parameters: {'n_estimators': 683, 'learning_rate': 0.031065126086242026, 'max_depth': 3, 'min_child_weight': 10, 'subsample': 0.777633994480005, 'colsample_bytree': 0.8987566853061946, 'colsample_bylevel': 0.7935133228268233, 'reg_alpha': 5.200680211778108, 'reg_lambda': 5.920392514089517, 'gamma': 0.9242722776276352}. Best is trial 2 with value: 0.8083224892835273.


Best trial: 5. Best value: 0.818483:  12%|█▏        | 6/50 [00:10<01:25,  1.94s/it]

[I 2025-08-16 12:28:24,952] Trial 5 finished with value: 0.8184830804472568 and parameters: {'n_estimators': 1955, 'learning_rate': 0.08725278311502205, 'max_depth': 12, 'min_child_weight': 9, 'subsample': 0.8793699936433255, 'colsample_bytree': 0.976562270506935, 'colsample_bylevel': 0.7265477506155759, 'reg_alpha': 1.959828624191452, 'reg_lambda': 1.4070456001948426, 'gamma': 1.6266516538163218}. Best is trial 5 with value: 0.8184830804472568.


Best trial: 5. Best value: 0.818483:  14%|█▍        | 7/50 [00:12<01:14,  1.73s/it]

[I 2025-08-16 12:28:26,248] Trial 6 finished with value: 0.6838271628563652 and parameters: {'n_estimators': 1083, 'learning_rate': 0.013604651830782358, 'max_depth': 11, 'min_child_weight': 4, 'subsample': 0.7842803529062142, 'colsample_bytree': 0.8628088249474746, 'colsample_bylevel': 0.7422772674924287, 'reg_alpha': 8.021969807540398, 'reg_lambda': 1.6709557931179373, 'gamma': 4.9344346830025865}. Best is trial 5 with value: 0.8184830804472568.


Best trial: 7. Best value: 0.824303:  16%|█▌        | 8/50 [00:15<01:38,  2.34s/it]

[I 2025-08-16 12:28:29,904] Trial 7 finished with value: 0.8243033583951374 and parameters: {'n_estimators': 1659, 'learning_rate': 0.01040697346842839, 'max_depth': 3, 'min_child_weight': 9, 'subsample': 0.9120572031542851, 'colsample_bytree': 0.9187021504122962, 'colsample_bylevel': 0.9313811040057838, 'reg_alpha': 0.7404465173409036, 'reg_lambda': 4.226191556898454, 'gamma': 0.5793452976256486}. Best is trial 7 with value: 0.8243033583951374.


Best trial: 7. Best value: 0.824303:  18%|█▊        | 9/50 [00:18<01:33,  2.29s/it]

[I 2025-08-16 12:28:32,066] Trial 8 finished with value: 0.7343854896690936 and parameters: {'n_estimators': 1795, 'learning_rate': 0.04983460221146937, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.7932946965146986, 'colsample_bytree': 0.7975549966080241, 'colsample_bylevel': 0.9188818535014192, 'reg_alpha': 6.3755747135521315, 'reg_lambda': 8.98491468318694, 'gamma': 2.3610746258097466}. Best is trial 7 with value: 0.8243033583951374.


Best trial: 9. Best value: 0.824385:  20%|██        | 10/50 [00:19<01:15,  1.88s/it]

[I 2025-08-16 12:28:33,051] Trial 9 finished with value: 0.8243845824544692 and parameters: {'n_estimators': 679, 'learning_rate': 0.0694433945478456, 'max_depth': 10, 'min_child_weight': 6, 'subsample': 0.9312901539863683, 'colsample_bytree': 0.8481386789093173, 'colsample_bylevel': 0.8568198488145982, 'reg_alpha': 4.275410183585496, 'reg_lambda': 1.2287721406968566, 'gamma': 0.5394571349665223}. Best is trial 9 with value: 0.8243845824544692.


Best trial: 9. Best value: 0.824385:  22%|██▏       | 11/50 [00:19<01:00,  1.55s/it]

[I 2025-08-16 12:28:33,834] Trial 10 finished with value: 0.7537559394207436 and parameters: {'n_estimators': 547, 'learning_rate': 0.0523043322856166, 'max_depth': 6, 'min_child_weight': 6, 'subsample': 0.9722699421778279, 'colsample_bytree': 0.7747876687446624, 'colsample_bylevel': 0.8231148769106889, 'reg_alpha': 7.555511385430487, 'reg_lambda': 3.059183489424602, 'gamma': 0.38489954914396496}. Best is trial 9 with value: 0.8243845824544692.


Best trial: 9. Best value: 0.824385:  24%|██▍       | 12/50 [00:21<01:00,  1.60s/it]

[I 2025-08-16 12:28:35,550] Trial 11 finished with value: 0.7562390850740514 and parameters: {'n_estimators': 934, 'learning_rate': 0.009062670253081015, 'max_depth': 12, 'min_child_weight': 9, 'subsample': 0.8900211269531271, 'colsample_bytree': 0.9614381770563153, 'colsample_bylevel': 0.9411016230697343, 'reg_alpha': 1.8657005888603584, 'reg_lambda': 9.0330309864098, 'gamma': 2.6967112095782535}. Best is trial 9 with value: 0.8243845824544692.


Best trial: 9. Best value: 0.824385:  26%|██▌       | 13/50 [00:23<01:04,  1.75s/it]

[I 2025-08-16 12:28:37,640] Trial 12 finished with value: 0.7172590140278061 and parameters: {'n_estimators': 1711, 'learning_rate': 0.13632076811780444, 'max_depth': 6, 'min_child_weight': 2, 'subsample': 0.7683805487625824, 'colsample_bytree': 0.8281323365878769, 'colsample_bylevel': 0.9454044297767479, 'reg_alpha': 8.607305832563434, 'reg_lambda': 1.0625691747807164, 'gamma': 2.553736512887829}. Best is trial 9 with value: 0.8243845824544692.


Best trial: 9. Best value: 0.824385:  28%|██▊       | 14/50 [00:24<00:58,  1.63s/it]

[I 2025-08-16 12:28:39,001] Trial 13 finished with value: 0.6695077014016466 and parameters: {'n_estimators': 1126, 'learning_rate': 0.011344883519735885, 'max_depth': 4, 'min_child_weight': 4, 'subsample': 0.9828729111737557, 'colsample_bytree': 0.7969608796062265, 'colsample_bylevel': 0.8556371865230098, 'reg_alpha': 7.030189588951778, 'reg_lambda': 4.272666421413646, 'gamma': 4.858910413604804}. Best is trial 9 with value: 0.8243845824544692.


Best trial: 9. Best value: 0.824385:  30%|███       | 15/50 [00:27<01:08,  1.97s/it]

[I 2025-08-16 12:28:41,748] Trial 14 finished with value: 0.7827137340953936 and parameters: {'n_estimators': 1944, 'learning_rate': 0.012657278819106912, 'max_depth': 7, 'min_child_weight': 4, 'subsample': 0.7854521483132403, 'colsample_bytree': 0.7110660842063597, 'colsample_bylevel': 0.882869300193969, 'reg_alpha': 5.026790232288615, 'reg_lambda': 1.463308761249904, 'gamma': 1.3932323211830573}. Best is trial 9 with value: 0.8243845824544692.


Best trial: 9. Best value: 0.824385:  32%|███▏      | 16/50 [00:29<01:09,  2.04s/it]

[I 2025-08-16 12:28:43,951] Trial 15 finished with value: 0.6707923314848835 and parameters: {'n_estimators': 1863, 'learning_rate': 0.012099363511988979, 'max_depth': 4, 'min_child_weight': 5, 'subsample': 0.9956951362331803, 'colsample_bytree': 0.7726165814534501, 'colsample_bylevel': 0.9016406642217636, 'reg_alpha': 7.616196153287175, 'reg_lambda': 3.1387378959315972, 'gamma': 3.641081743059298}. Best is trial 9 with value: 0.8243845824544692.


Best trial: 9. Best value: 0.824385:  34%|███▍      | 17/50 [00:31<01:00,  1.83s/it]

[I 2025-08-16 12:28:45,310] Trial 16 finished with value: 0.7960420635730648 and parameters: {'n_estimators': 1052, 'learning_rate': 0.05151834200133766, 'max_depth': 9, 'min_child_weight': 6, 'subsample': 0.7270869310163225, 'colsample_bytree': 0.9505907486767714, 'colsample_bylevel': 0.7962340194915207, 'reg_alpha': 1.8651851039985423, 'reg_lambda': 1.3669762739928752, 'gamma': 2.954464715941209}. Best is trial 9 with value: 0.8243845824544692.


Best trial: 9. Best value: 0.824385:  36%|███▌      | 18/50 [00:35<01:20,  2.51s/it]

[I 2025-08-16 12:28:49,386] Trial 17 finished with value: 0.7940227313964302 and parameters: {'n_estimators': 1517, 'learning_rate': 0.005315507085608234, 'max_depth': 8, 'min_child_weight': 3, 'subsample': 0.8935518371228349, 'colsample_bytree': 0.7523099287014974, 'colsample_bylevel': 0.9072813214307398, 'reg_alpha': 3.867353463005374, 'reg_lambda': 9.43056989863061, 'gamma': 0.6876047207299663}. Best is trial 9 with value: 0.8243845824544692.


Best trial: 9. Best value: 0.824385:  38%|███▊      | 19/50 [00:37<01:12,  2.34s/it]

[I 2025-08-16 12:28:51,345] Trial 18 finished with value: 0.7353808033385498 and parameters: {'n_estimators': 1011, 'learning_rate': 0.007599086582259272, 'max_depth': 12, 'min_child_weight': 9, 'subsample': 0.7773824883145466, 'colsample_bytree': 0.8979952138102537, 'colsample_bylevel': 0.9451666600603648, 'reg_alpha': 5.5520081159946235, 'reg_lambda': 5.766855205204059, 'gamma': 1.2092614545022584}. Best is trial 9 with value: 0.8243845824544692.


Best trial: 9. Best value: 0.824385:  40%|████      | 20/50 [00:38<00:56,  1.89s/it]

[I 2025-08-16 12:28:52,188] Trial 19 finished with value: 0.6620640408473385 and parameters: {'n_estimators': 639, 'learning_rate': 0.1368873996962019, 'max_depth': 12, 'min_child_weight': 7, 'subsample': 0.8017089373146101, 'colsample_bytree': 0.8047628723837983, 'colsample_bylevel': 0.9177867036610718, 'reg_alpha': 8.971102599525771, 'reg_lambda': 8.983777818386056, 'gamma': 3.899377729288119}. Best is trial 9 with value: 0.8243845824544692.


Best trial: 9. Best value: 0.824385:  42%|████▏     | 21/50 [00:40<00:59,  2.05s/it]

[I 2025-08-16 12:28:54,603] Trial 20 finished with value: 0.7543868079653444 and parameters: {'n_estimators': 1463, 'learning_rate': 0.006819731493148195, 'max_depth': 4, 'min_child_weight': 9, 'subsample': 0.881928717897877, 'colsample_bytree': 0.7027591154849888, 'colsample_bylevel': 0.7304414628598096, 'reg_alpha': 6.635017691080558, 'reg_lambda': 1.0455542546159682, 'gamma': 0.8040402570874933}. Best is trial 9 with value: 0.8243845824544692.


Best trial: 9. Best value: 0.824385:  44%|████▍     | 22/50 [00:42<00:53,  1.92s/it]

[I 2025-08-16 12:28:56,229] Trial 21 finished with value: 0.6828225644122384 and parameters: {'n_estimators': 1323, 'learning_rate': 0.06418412407751078, 'max_depth': 9, 'min_child_weight': 3, 'subsample': 0.9136537664042608, 'colsample_bytree': 0.77117472624904, 'colsample_bylevel': 0.7976199094477803, 'reg_alpha': 7.464914051180242, 'reg_lambda': 6.846696091424932, 'gamma': 4.24611705247089}. Best is trial 9 with value: 0.8243845824544692.


Best trial: 9. Best value: 0.824385:  46%|████▌     | 23/50 [00:44<00:52,  1.94s/it]

[I 2025-08-16 12:28:58,205] Trial 22 finished with value: 0.7375557086480233 and parameters: {'n_estimators': 1487, 'learning_rate': 0.04068501500161551, 'max_depth': 3, 'min_child_weight': 4, 'subsample': 0.7795607103045176, 'colsample_bytree': 0.7731968930137251, 'colsample_bylevel': 0.9919031664257336, 'reg_alpha': 3.930977246667604, 'reg_lambda': 9.02841899659402, 'gamma': 3.1556931299863145}. Best is trial 9 with value: 0.8243845824544692.


Best trial: 23. Best value: 0.837379:  48%|████▊     | 24/50 [00:47<00:59,  2.30s/it]

[I 2025-08-16 12:29:01,341] Trial 23 finished with value: 0.837378801796285 and parameters: {'n_estimators': 1693, 'learning_rate': 0.031931901530863685, 'max_depth': 8, 'min_child_weight': 5, 'subsample': 0.7585728963394133, 'colsample_bytree': 0.9167356345784516, 'colsample_bylevel': 0.7842317087322567, 'reg_alpha': 0.2431596643145384, 'reg_lambda': 6.80925066316451, 'gamma': 0.8855533970352447}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  50%|█████     | 25/50 [00:49<00:59,  2.37s/it]

[I 2025-08-16 12:29:03,896] Trial 24 finished with value: 0.6232412085232815 and parameters: {'n_estimators': 1911, 'learning_rate': 0.16874107523050116, 'max_depth': 12, 'min_child_weight': 4, 'subsample': 0.7046369849586602, 'colsample_bytree': 0.9784955687763176, 'colsample_bylevel': 0.8284552444951943, 'reg_alpha': 9.666548190436696, 'reg_lambda': 9.672579793803276, 'gamma': 4.265047277336801}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  52%|█████▏    | 26/50 [00:51<00:53,  2.22s/it]

[I 2025-08-16 12:29:05,762] Trial 25 finished with value: 0.7768671610986368 and parameters: {'n_estimators': 941, 'learning_rate': 0.02069758467921053, 'max_depth': 11, 'min_child_weight': 4, 'subsample': 0.7508478240058277, 'colsample_bytree': 0.8670403787375051, 'colsample_bylevel': 0.9808464322482343, 'reg_alpha': 6.96029796674973, 'reg_lambda': 6.1305505308042845, 'gamma': 0.4858824688538427}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  54%|█████▍    | 27/50 [00:53<00:48,  2.12s/it]

[I 2025-08-16 12:29:07,640] Trial 26 finished with value: 0.7124055929781588 and parameters: {'n_estimators': 1423, 'learning_rate': 0.19279495571883232, 'max_depth': 4, 'min_child_weight': 6, 'subsample': 0.9632119215783866, 'colsample_bytree': 0.9222305853262613, 'colsample_bylevel': 0.9091047222985804, 'reg_alpha': 7.024840839871093, 'reg_lambda': 4.2354203609777965, 'gamma': 1.4679592213224668}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  56%|█████▌    | 28/50 [00:55<00:46,  2.14s/it]

[I 2025-08-16 12:29:09,813] Trial 27 finished with value: 0.6908258275498401 and parameters: {'n_estimators': 1714, 'learning_rate': 0.09927051213197452, 'max_depth': 11, 'min_child_weight': 10, 'subsample': 0.8534027196582813, 'colsample_bytree': 0.8504548884061598, 'colsample_bylevel': 0.9394885536900326, 'reg_alpha': 6.4996393077776515, 'reg_lambda': 7.31770189531933, 'gamma': 3.978963347180505}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  58%|█████▊    | 29/50 [00:58<00:47,  2.25s/it]

[I 2025-08-16 12:29:12,346] Trial 28 finished with value: 0.7408198385157969 and parameters: {'n_estimators': 1835, 'learning_rate': 0.017396365448540358, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.8734840422988521, 'colsample_bytree': 0.7107826821390226, 'colsample_bylevel': 0.8396794054397381, 'reg_alpha': 5.426446347075766, 'reg_lambda': 3.5788712691545594, 'gamma': 2.9541663028450538}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  60%|██████    | 30/50 [01:00<00:44,  2.21s/it]

[I 2025-08-16 12:29:14,445] Trial 29 finished with value: 0.7975395347122995 and parameters: {'n_estimators': 545, 'learning_rate': 0.0057385747526029875, 'max_depth': 11, 'min_child_weight': 4, 'subsample': 0.7381181537955654, 'colsample_bytree': 0.8566729780164413, 'colsample_bylevel': 0.9309980659295832, 'reg_alpha': 2.1582102749684315, 'reg_lambda': 6.606014282371002, 'gamma': 0.42673732496884}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  62%|██████▏   | 31/50 [01:01<00:36,  1.92s/it]

[I 2025-08-16 12:29:15,697] Trial 30 finished with value: 0.7850941093420426 and parameters: {'n_estimators': 577, 'learning_rate': 0.0355002981725949, 'max_depth': 8, 'min_child_weight': 7, 'subsample': 0.9178274001167984, 'colsample_bytree': 0.9927556238387604, 'colsample_bylevel': 0.8548901044903585, 'reg_alpha': 3.2295647294124596, 'reg_lambda': 8.156675752918332, 'gamma': 1.354161256310371}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  64%|██████▍   | 32/50 [01:04<00:38,  2.15s/it]

[I 2025-08-16 12:29:18,376] Trial 31 finished with value: 0.8046021296863305 and parameters: {'n_estimators': 1158, 'learning_rate': 0.006678237100523691, 'max_depth': 3, 'min_child_weight': 10, 'subsample': 0.9507940361536618, 'colsample_bytree': 0.9087922618281093, 'colsample_bylevel': 0.8226858833242809, 'reg_alpha': 1.7329432007084578, 'reg_lambda': 2.4079333840397745, 'gamma': 1.2512144908229765}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  66%|██████▌   | 33/50 [01:06<00:35,  2.07s/it]

[I 2025-08-16 12:29:20,279] Trial 32 finished with value: 0.7331847060624617 and parameters: {'n_estimators': 1324, 'learning_rate': 0.06979037663300276, 'max_depth': 9, 'min_child_weight': 3, 'subsample': 0.9864595841989583, 'colsample_bytree': 0.9213690750087306, 'colsample_bylevel': 0.8663062157534202, 'reg_alpha': 6.117207462343522, 'reg_lambda': 4.776400561850109, 'gamma': 1.2386549475057873}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  68%|██████▊   | 34/50 [01:07<00:30,  1.91s/it]

[I 2025-08-16 12:29:21,808] Trial 33 finished with value: 0.7872330095711143 and parameters: {'n_estimators': 1034, 'learning_rate': 0.08186248757017087, 'max_depth': 3, 'min_child_weight': 2, 'subsample': 0.7138007926065257, 'colsample_bytree': 0.712218640695691, 'colsample_bylevel': 0.9566381752033022, 'reg_alpha': 7.036578593800237, 'reg_lambda': 5.267564461785927, 'gamma': 0.4891708032550074}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  70%|███████   | 35/50 [01:09<00:29,  1.94s/it]

[I 2025-08-16 12:29:23,809] Trial 34 finished with value: 0.8104074527681387 and parameters: {'n_estimators': 1237, 'learning_rate': 0.02867478599222064, 'max_depth': 4, 'min_child_weight': 5, 'subsample': 0.819551420319212, 'colsample_bytree': 0.884755029415665, 'colsample_bylevel': 0.8905280952602931, 'reg_alpha': 0.4530400977204452, 'reg_lambda': 4.371513531638241, 'gamma': 3.1292995785711817}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  72%|███████▏  | 36/50 [01:11<00:26,  1.86s/it]

[I 2025-08-16 12:29:25,501] Trial 35 finished with value: 0.7338616440997029 and parameters: {'n_estimators': 1255, 'learning_rate': 0.11779262819281898, 'max_depth': 9, 'min_child_weight': 2, 'subsample': 0.7211706242201289, 'colsample_bytree': 0.8927257834618947, 'colsample_bylevel': 0.7079533931624865, 'reg_alpha': 5.8577558127346325, 'reg_lambda': 9.462072172824618, 'gamma': 2.877370889379395}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  74%|███████▍  | 37/50 [01:13<00:23,  1.77s/it]

[I 2025-08-16 12:29:27,066] Trial 36 finished with value: 0.7308493017282439 and parameters: {'n_estimators': 1082, 'learning_rate': 0.05364834414206791, 'max_depth': 7, 'min_child_weight': 6, 'subsample': 0.9824394426329576, 'colsample_bytree': 0.8158307913402323, 'colsample_bylevel': 0.9883571691471742, 'reg_alpha': 9.053506419560637, 'reg_lambda': 2.762120213103668, 'gamma': 0.3468065043758273}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  76%|███████▌  | 38/50 [01:14<00:20,  1.72s/it]

[I 2025-08-16 12:29:28,653] Trial 37 finished with value: 0.775387408995033 and parameters: {'n_estimators': 651, 'learning_rate': 0.005347643682421839, 'max_depth': 3, 'min_child_weight': 7, 'subsample': 0.7213565945380687, 'colsample_bytree': 0.7956926890881284, 'colsample_bylevel': 0.9534625932908364, 'reg_alpha': 0.23271935735825866, 'reg_lambda': 8.330216343300421, 'gamma': 1.4092738738669996}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  78%|███████▊  | 39/50 [01:15<00:16,  1.48s/it]

[I 2025-08-16 12:29:29,581] Trial 38 finished with value: 0.74503611848223 and parameters: {'n_estimators': 677, 'learning_rate': 0.06534084423030814, 'max_depth': 9, 'min_child_weight': 9, 'subsample': 0.9205213131411657, 'colsample_bytree': 0.9410442791154545, 'colsample_bylevel': 0.7846103717713919, 'reg_alpha': 1.7743954377972282, 'reg_lambda': 7.755532764767725, 'gamma': 4.03417369633632}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  80%|████████  | 40/50 [01:18<00:18,  1.82s/it]

[I 2025-08-16 12:29:32,203] Trial 39 finished with value: 0.7223364036878275 and parameters: {'n_estimators': 1986, 'learning_rate': 0.022909113096185783, 'max_depth': 6, 'min_child_weight': 8, 'subsample': 0.8022410620759053, 'colsample_bytree': 0.9792271976810694, 'colsample_bylevel': 0.9575238255529035, 'reg_alpha': 4.289940273750183, 'reg_lambda': 7.757839610123476, 'gamma': 3.7727143704234116}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  82%|████████▏ | 41/50 [01:19<00:15,  1.67s/it]

[I 2025-08-16 12:29:33,522] Trial 40 finished with value: 0.808408178539838 and parameters: {'n_estimators': 654, 'learning_rate': 0.1396091574749852, 'max_depth': 8, 'min_child_weight': 9, 'subsample': 0.7960148803091835, 'colsample_bytree': 0.9686569685488602, 'colsample_bylevel': 0.8167605036202489, 'reg_alpha': 0.1083765148029836, 'reg_lambda': 9.148437787773373, 'gamma': 0.4564333839306678}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  84%|████████▍ | 42/50 [01:20<00:12,  1.56s/it]

[I 2025-08-16 12:29:34,808] Trial 41 finished with value: 0.7298135177814067 and parameters: {'n_estimators': 979, 'learning_rate': 0.16635132753791704, 'max_depth': 12, 'min_child_weight': 6, 'subsample': 0.8895511636509398, 'colsample_bytree': 0.8345336565934959, 'colsample_bylevel': 0.7879632315094194, 'reg_alpha': 3.2866454536991596, 'reg_lambda': 7.052666104693346, 'gamma': 3.7618726471884}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  86%|████████▌ | 43/50 [01:22<00:12,  1.75s/it]

[I 2025-08-16 12:29:37,009] Trial 42 finished with value: 0.7626400512576264 and parameters: {'n_estimators': 1688, 'learning_rate': 0.09204191360672336, 'max_depth': 3, 'min_child_weight': 5, 'subsample': 0.7172676280049932, 'colsample_bytree': 0.8648586646971206, 'colsample_bylevel': 0.8324591504120131, 'reg_alpha': 8.877041827582998, 'reg_lambda': 4.158235112968708, 'gamma': 0.5853350821380293}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  88%|████████▊ | 44/50 [01:24<00:09,  1.55s/it]

[I 2025-08-16 12:29:38,090] Trial 43 finished with value: 0.7828030380349731 and parameters: {'n_estimators': 714, 'learning_rate': 0.08297661622436157, 'max_depth': 9, 'min_child_weight': 2, 'subsample': 0.7252320418344992, 'colsample_bytree': 0.910290739437736, 'colsample_bylevel': 0.721828901909258, 'reg_alpha': 8.218600592903563, 'reg_lambda': 7.3561800444084655, 'gamma': 0.4067439032094988}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  90%|█████████ | 45/50 [01:24<00:06,  1.36s/it]

[I 2025-08-16 12:29:39,005] Trial 44 finished with value: 0.7458412005851535 and parameters: {'n_estimators': 627, 'learning_rate': 0.19038196232522647, 'max_depth': 6, 'min_child_weight': 4, 'subsample': 0.9438398701772508, 'colsample_bytree': 0.9841745732151577, 'colsample_bylevel': 0.9958003191468613, 'reg_alpha': 7.533781852589416, 'reg_lambda': 4.386336269778242, 'gamma': 0.4175035834933438}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  92%|█████████▏| 46/50 [01:27<00:06,  1.70s/it]

[I 2025-08-16 12:29:41,492] Trial 45 finished with value: 0.7831974637681159 and parameters: {'n_estimators': 1666, 'learning_rate': 0.039225375689251656, 'max_depth': 7, 'min_child_weight': 10, 'subsample': 0.7333592446918453, 'colsample_bytree': 0.8477875312872577, 'colsample_bylevel': 0.7034060934302256, 'reg_alpha': 4.6866064199412625, 'reg_lambda': 1.5067294811365362, 'gamma': 0.5940895813403596}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  94%|█████████▍| 47/50 [01:28<00:04,  1.48s/it]

[I 2025-08-16 12:29:42,458] Trial 46 finished with value: 0.5981107609829671 and parameters: {'n_estimators': 676, 'learning_rate': 0.05483323330491335, 'max_depth': 10, 'min_child_weight': 6, 'subsample': 0.9886517645423626, 'colsample_bytree': 0.8124611738571113, 'colsample_bylevel': 0.7857136258845582, 'reg_alpha': 8.685991281894603, 'reg_lambda': 3.0123625466750736, 'gamma': 4.816112697203057}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  96%|█████████▌| 48/50 [01:29<00:02,  1.28s/it]

[I 2025-08-16 12:29:43,261] Trial 47 finished with value: 0.7178765224195415 and parameters: {'n_estimators': 518, 'learning_rate': 0.17896745778978254, 'max_depth': 3, 'min_child_weight': 9, 'subsample': 0.85831033272589, 'colsample_bytree': 0.9978894388357902, 'colsample_bylevel': 0.7221389694206196, 'reg_alpha': 5.538542844013207, 'reg_lambda': 9.72372282057189, 'gamma': 2.615489220850744}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379:  98%|█████████▊| 49/50 [01:31<00:01,  1.48s/it]

[I 2025-08-16 12:29:45,227] Trial 48 finished with value: 0.7309538794765371 and parameters: {'n_estimators': 1444, 'learning_rate': 0.06510302118153247, 'max_depth': 7, 'min_child_weight': 7, 'subsample': 0.8752942935769301, 'colsample_bytree': 0.9703474031472967, 'colsample_bylevel': 0.7136339141024374, 'reg_alpha': 2.809631895922303, 'reg_lambda': 9.553703356689029, 'gamma': 4.4513189194545815}. Best is trial 23 with value: 0.837378801796285.


Best trial: 23. Best value: 0.837379: 100%|██████████| 50/50 [01:33<00:00,  1.86s/it]
[I 2025-08-16 12:29:47,030] A new study created in memory with name: no-name-f500f908-aecf-4a46-9671-4f4c5e1aadad


[I 2025-08-16 12:29:47,022] Trial 49 finished with value: 0.7508039480846432 and parameters: {'n_estimators': 1183, 'learning_rate': 0.04925605534809107, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.8391095214819946, 'colsample_bytree': 0.8060056684078158, 'colsample_bylevel': 0.8750968335552616, 'reg_alpha': 0.7773463696498484, 'reg_lambda': 9.769553268995498, 'gamma': 4.931053722398015}. Best is trial 23 with value: 0.837378801796285.
Best xgboost_v2 AUC: 0.8374
Best xgboost_v2 params: {'n_estimators': 1693, 'learning_rate': 0.031931901530863685, 'max_depth': 8, 'min_child_weight': 5, 'subsample': 0.7585728963394133, 'colsample_bytree': 0.9167356345784516, 'colsample_bylevel': 0.7842317087322567, 'reg_alpha': 0.2431596643145384, 'reg_lambda': 6.80925066316451, 'gamma': 0.8855533970352447}
Optimizing lightgbm_v2 with enhanced parameters...


Best trial: 0. Best value: 0.655805:   2%|▏         | 1/50 [00:00<00:28,  1.71it/s]

[I 2025-08-16 12:29:47,614] Trial 0 finished with value: 0.6558048269488104 and parameters: {'n_estimators': 1062, 'learning_rate': 0.1667521176194013, 'max_depth': 10, 'num_leaves': 312, 'min_child_samples': 24, 'subsample': 0.7467983561008608, 'colsample_bytree': 0.7174250836504598, 'reg_alpha': 8.661761457749352, 'reg_lambda': 6.41003510568888, 'min_gain_to_split': 0.7080725777960455}. Best is trial 0 with value: 0.6558048269488104.


Best trial: 1. Best value: 0.763046:   4%|▍         | 2/50 [00:00<00:20,  2.36it/s]

[I 2025-08-16 12:29:47,924] Trial 1 finished with value: 0.7630456754212879 and parameters: {'n_estimators': 530, 'learning_rate': 0.17898794163735265, 'max_depth': 11, 'num_leaves': 130, 'min_child_samples': 26, 'subsample': 0.7550213529560301, 'colsample_bytree': 0.7912726728878613, 'reg_alpha': 5.247564316322379, 'reg_lambda': 4.887505167779041, 'min_gain_to_split': 0.2912291401980419}. Best is trial 1 with value: 0.7630456754212879.


Best trial: 2. Best value: 0.789319:   6%|▌         | 3/50 [00:02<00:47,  1.01s/it]

[I 2025-08-16 12:29:49,639] Trial 2 finished with value: 0.789318965321721 and parameters: {'n_estimators': 1418, 'learning_rate': 0.008364645453054504, 'max_depth': 5, 'num_leaves': 203, 'min_child_samples': 51, 'subsample': 0.935552788417904, 'colsample_bytree': 0.7599021346475079, 'reg_alpha': 5.142344384136116, 'reg_lambda': 6.331731119758382, 'min_gain_to_split': 0.046450412719997725}. Best is trial 2 with value: 0.789318965321721.


Best trial: 2. Best value: 0.789319:   8%|▊         | 4/50 [00:04<00:59,  1.30s/it]

[I 2025-08-16 12:29:51,376] Trial 3 finished with value: 0.7488505307205552 and parameters: {'n_estimators': 1411, 'learning_rate': 0.009379072229909205, 'max_depth': 3, 'num_leaves': 476, 'min_child_samples': 97, 'subsample': 0.9425192044349383, 'colsample_bytree': 0.7913841307520112, 'reg_alpha': 0.9767211400638387, 'reg_lambda': 7.158097238609412, 'min_gain_to_split': 0.4401524937396013}. Best is trial 2 with value: 0.789318965321721.


Best trial: 2. Best value: 0.789319:  10%|█         | 5/50 [00:04<00:47,  1.06s/it]

[I 2025-08-16 12:29:52,000] Trial 4 finished with value: 0.7637508221632533 and parameters: {'n_estimators': 683, 'learning_rate': 0.031065126086242026, 'max_depth': 3, 'num_leaves': 458, 'min_child_samples': 33, 'subsample': 0.8987566853061946, 'colsample_bytree': 0.7935133228268233, 'reg_alpha': 5.200680211778108, 'reg_lambda': 5.920392514089517, 'min_gain_to_split': 0.18485445552552704}. Best is trial 2 with value: 0.789318965321721.


Best trial: 5. Best value: 0.807965:  12%|█▏        | 6/50 [00:06<00:46,  1.05s/it]

[I 2025-08-16 12:29:53,040] Trial 5 finished with value: 0.8079652735252093 and parameters: {'n_estimators': 1955, 'learning_rate': 0.08725278311502205, 'max_depth': 12, 'num_leaves': 451, 'min_child_samples': 64, 'subsample': 0.976562270506935, 'colsample_bytree': 0.7265477506155759, 'reg_alpha': 1.959828624191452, 'reg_lambda': 1.4070456001948426, 'min_gain_to_split': 0.32533033076326434}. Best is trial 5 with value: 0.8079652735252093.


Best trial: 5. Best value: 0.807965:  14%|█▍        | 7/50 [00:06<00:38,  1.11it/s]

[I 2025-08-16 12:29:53,627] Trial 6 finished with value: 0.6770452374067271 and parameters: {'n_estimators': 1083, 'learning_rate': 0.013604651830782358, 'max_depth': 11, 'num_leaves': 198, 'min_child_samples': 35, 'subsample': 0.8628088249474746, 'colsample_bytree': 0.7422772674924287, 'reg_alpha': 8.021969807540398, 'reg_lambda': 1.6709557931179373, 'min_gain_to_split': 0.9868869366005173}. Best is trial 5 with value: 0.8079652735252093.


Best trial: 7. Best value: 0.821674:  16%|█▌        | 8/50 [00:08<00:53,  1.28s/it]

[I 2025-08-16 12:29:55,736] Trial 7 finished with value: 0.8216735700029483 and parameters: {'n_estimators': 1659, 'learning_rate': 0.01040697346842839, 'max_depth': 3, 'num_leaves': 414, 'min_child_samples': 74, 'subsample': 0.9187021504122962, 'colsample_bytree': 0.9313811040057838, 'reg_alpha': 0.7404465173409036, 'reg_lambda': 4.226191556898454, 'min_gain_to_split': 0.11586905952512971}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  18%|█▊        | 9/50 [00:09<00:45,  1.11s/it]

[I 2025-08-16 12:29:56,464] Trial 8 finished with value: 0.6997097621963666 and parameters: {'n_estimators': 1795, 'learning_rate': 0.04983460221146937, 'max_depth': 6, 'num_leaves': 60, 'min_child_samples': 38, 'subsample': 0.7975549966080241, 'colsample_bytree': 0.9188818535014192, 'reg_alpha': 6.3755747135521315, 'reg_lambda': 8.98491468318694, 'min_gain_to_split': 0.4722149251619493}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  20%|██        | 10/50 [00:09<00:36,  1.10it/s]

[I 2025-08-16 12:29:56,914] Trial 9 finished with value: 0.7619690668163571 and parameters: {'n_estimators': 679, 'learning_rate': 0.0694433945478456, 'max_depth': 10, 'num_leaves': 294, 'min_child_samples': 80, 'subsample': 0.8481386789093173, 'colsample_bytree': 0.8568198488145982, 'reg_alpha': 4.275410183585496, 'reg_lambda': 1.2287721406968566, 'min_gain_to_split': 0.10789142699330445}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  22%|██▏       | 11/50 [00:10<00:28,  1.36it/s]

[I 2025-08-16 12:29:57,269] Trial 10 finished with value: 0.6783168262797397 and parameters: {'n_estimators': 547, 'learning_rate': 0.0523043322856166, 'max_depth': 6, 'num_leaves': 270, 'min_child_samples': 92, 'subsample': 0.7747876687446624, 'colsample_bytree': 0.8231148769106889, 'reg_alpha': 7.555511385430487, 'reg_lambda': 3.059183489424602, 'min_gain_to_split': 0.07697990982879299}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  24%|██▍       | 12/50 [00:11<00:34,  1.09it/s]

[I 2025-08-16 12:29:58,599] Trial 11 finished with value: 0.786333166065637 and parameters: {'n_estimators': 934, 'learning_rate': 0.009062670253081015, 'max_depth': 12, 'num_leaves': 410, 'min_child_samples': 67, 'subsample': 0.9614381770563153, 'colsample_bytree': 0.9411016230697343, 'reg_alpha': 1.8657005888603584, 'reg_lambda': 9.0330309864098, 'min_gain_to_split': 0.5393422419156507}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  26%|██▌       | 13/50 [00:12<00:31,  1.16it/s]

[I 2025-08-16 12:29:59,338] Trial 12 finished with value: 0.674379160429566 and parameters: {'n_estimators': 1711, 'learning_rate': 0.13632076811780444, 'max_depth': 6, 'num_leaves': 82, 'min_child_samples': 30, 'subsample': 0.8281323365878769, 'colsample_bytree': 0.9454044297767479, 'reg_alpha': 8.607305832563434, 'reg_lambda': 1.0625691747807164, 'min_gain_to_split': 0.5107473025775657}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  28%|██▊       | 14/50 [00:12<00:28,  1.28it/s]

[I 2025-08-16 12:29:59,925] Trial 13 finished with value: 0.6349239711619152 and parameters: {'n_estimators': 1126, 'learning_rate': 0.011344883519735885, 'max_depth': 4, 'num_leaves': 189, 'min_child_samples': 95, 'subsample': 0.7969608796062265, 'colsample_bytree': 0.8556371865230098, 'reg_alpha': 7.030189588951778, 'reg_lambda': 4.272666421413646, 'min_gain_to_split': 0.9717820827209607}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  30%|███       | 15/50 [00:14<00:30,  1.13it/s]

[I 2025-08-16 12:30:01,051] Trial 14 finished with value: 0.7551002472159851 and parameters: {'n_estimators': 1944, 'learning_rate': 0.012657278819106912, 'max_depth': 7, 'num_leaves': 172, 'min_child_samples': 35, 'subsample': 0.7110660842063597, 'colsample_bytree': 0.882869300193969, 'reg_alpha': 5.026790232288615, 'reg_lambda': 1.463308761249904, 'min_gain_to_split': 0.27864646423661144}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  32%|███▏      | 16/50 [00:14<00:28,  1.18it/s]

[I 2025-08-16 12:30:01,802] Trial 15 finished with value: 0.636156188337756 and parameters: {'n_estimators': 1863, 'learning_rate': 0.012099363511988979, 'max_depth': 4, 'num_leaves': 261, 'min_child_samples': 99, 'subsample': 0.7726165814534501, 'colsample_bytree': 0.9016406642217636, 'reg_alpha': 7.616196153287175, 'reg_lambda': 3.1387378959315972, 'min_gain_to_split': 0.7282163486118596}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  34%|███▍      | 17/50 [00:15<00:24,  1.34it/s]

[I 2025-08-16 12:30:02,317] Trial 16 finished with value: 0.8179833327663243 and parameters: {'n_estimators': 1052, 'learning_rate': 0.05151834200133766, 'max_depth': 9, 'num_leaves': 282, 'min_child_samples': 18, 'subsample': 0.9505907486767714, 'colsample_bytree': 0.7962340194915207, 'reg_alpha': 1.8651851039985423, 'reg_lambda': 1.3669762739928752, 'min_gain_to_split': 0.5908929431882418}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  36%|███▌      | 18/50 [00:16<00:30,  1.04it/s]

[I 2025-08-16 12:30:03,786] Trial 17 finished with value: 0.7685167466149554 and parameters: {'n_estimators': 1517, 'learning_rate': 0.005315507085608234, 'max_depth': 8, 'num_leaves': 137, 'min_child_samples': 68, 'subsample': 0.7523099287014974, 'colsample_bytree': 0.9072813214307398, 'reg_alpha': 3.867353463005374, 'reg_lambda': 9.43056989863061, 'min_gain_to_split': 0.13752094414599325}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  38%|███▊      | 19/50 [00:17<00:30,  1.03it/s]

[I 2025-08-16 12:30:04,788] Trial 18 finished with value: 0.7474521444285682 and parameters: {'n_estimators': 1011, 'learning_rate': 0.007599086582259272, 'max_depth': 12, 'num_leaves': 443, 'min_child_samples': 33, 'subsample': 0.8979952138102537, 'colsample_bytree': 0.9451666600603648, 'reg_alpha': 5.5520081159946235, 'reg_lambda': 5.766855205204059, 'min_gain_to_split': 0.24185229090045168}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  40%|████      | 20/50 [00:18<00:23,  1.26it/s]

[I 2025-08-16 12:30:05,158] Trial 19 finished with value: 0.6725459490031979 and parameters: {'n_estimators': 639, 'learning_rate': 0.1368873996962019, 'max_depth': 12, 'num_leaves': 328, 'min_child_samples': 40, 'subsample': 0.8047628723837983, 'colsample_bytree': 0.9177867036610718, 'reg_alpha': 8.971102599525771, 'reg_lambda': 8.983777818386056, 'min_gain_to_split': 0.7798755458576239}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  42%|████▏     | 21/50 [00:19<00:26,  1.11it/s]

[I 2025-08-16 12:30:06,306] Trial 20 finished with value: 0.7311643816198317 and parameters: {'n_estimators': 1463, 'learning_rate': 0.006819731493148195, 'max_depth': 4, 'num_leaves': 453, 'min_child_samples': 65, 'subsample': 0.7027591154849888, 'colsample_bytree': 0.7304414628598096, 'reg_alpha': 6.635017691080558, 'reg_lambda': 1.0455542546159682, 'min_gain_to_split': 0.16080805141749865}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  44%|████▍     | 22/50 [00:19<00:21,  1.28it/s]

[I 2025-08-16 12:30:06,816] Trial 21 finished with value: 0.639423365879658 and parameters: {'n_estimators': 1323, 'learning_rate': 0.06418412407751078, 'max_depth': 9, 'num_leaves': 136, 'min_child_samples': 74, 'subsample': 0.77117472624904, 'colsample_bytree': 0.7976199094477803, 'reg_alpha': 7.464914051180242, 'reg_lambda': 6.846696091424932, 'min_gain_to_split': 0.8492234104941779}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 7. Best value: 0.821674:  46%|████▌     | 23/50 [00:20<00:19,  1.35it/s]

[I 2025-08-16 12:30:07,454] Trial 22 finished with value: 0.7555242283005601 and parameters: {'n_estimators': 1487, 'learning_rate': 0.04068501500161551, 'max_depth': 3, 'num_leaves': 203, 'min_child_samples': 34, 'subsample': 0.7731968930137251, 'colsample_bytree': 0.9919031664257336, 'reg_alpha': 3.930977246667604, 'reg_lambda': 9.02841899659402, 'min_gain_to_split': 0.6311386259972629}. Best is trial 7 with value: 0.8216735700029483.


Best trial: 23. Best value: 0.840666:  48%|████▊     | 24/50 [00:21<00:24,  1.07it/s]

[I 2025-08-16 12:30:08,847] Trial 23 finished with value: 0.840665895534236 and parameters: {'n_estimators': 1693, 'learning_rate': 0.031931901530863685, 'max_depth': 8, 'num_leaves': 262, 'min_child_samples': 27, 'subsample': 0.9167356345784516, 'colsample_bytree': 0.7842317087322567, 'reg_alpha': 0.2431596643145384, 'reg_lambda': 6.80925066316451, 'min_gain_to_split': 0.17711067940704894}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  50%|█████     | 25/50 [00:22<00:21,  1.14it/s]

[I 2025-08-16 12:30:09,594] Trial 24 finished with value: 0.6552972828922001 and parameters: {'n_estimators': 1911, 'learning_rate': 0.16874107523050116, 'max_depth': 12, 'num_leaves': 204, 'min_child_samples': 11, 'subsample': 0.9784955687763176, 'colsample_bytree': 0.8284552444951943, 'reg_alpha': 9.666548190436696, 'reg_lambda': 9.672579793803276, 'min_gain_to_split': 0.8530094554673601}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  52%|█████▏    | 26/50 [00:23<00:19,  1.23it/s]

[I 2025-08-16 12:30:10,252] Trial 25 finished with value: 0.7282539945793927 and parameters: {'n_estimators': 941, 'learning_rate': 0.02069758467921053, 'max_depth': 11, 'num_leaves': 179, 'min_child_samples': 25, 'subsample': 0.8670403787375051, 'colsample_bytree': 0.9808464322482343, 'reg_alpha': 6.96029796674973, 'reg_lambda': 6.1305505308042845, 'min_gain_to_split': 0.09717649377076854}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  54%|█████▍    | 27/50 [00:23<00:16,  1.36it/s]

[I 2025-08-16 12:30:10,811] Trial 26 finished with value: 0.6552194254496382 and parameters: {'n_estimators': 1423, 'learning_rate': 0.19279495571883232, 'max_depth': 4, 'num_leaves': 274, 'min_child_samples': 89, 'subsample': 0.9222305853262613, 'colsample_bytree': 0.9091047222985804, 'reg_alpha': 7.024840839871093, 'reg_lambda': 4.2354203609777965, 'min_gain_to_split': 0.29359184426449336}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  56%|█████▌    | 28/50 [00:24<00:16,  1.33it/s]

[I 2025-08-16 12:30:11,605] Trial 27 finished with value: 0.6973393096096709 and parameters: {'n_estimators': 1714, 'learning_rate': 0.09927051213197452, 'max_depth': 11, 'num_leaves': 460, 'min_child_samples': 56, 'subsample': 0.8504548884061598, 'colsample_bytree': 0.9394885536900326, 'reg_alpha': 6.4996393077776515, 'reg_lambda': 7.31770189531933, 'min_gain_to_split': 0.795792669436101}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  58%|█████▊    | 29/50 [00:25<00:16,  1.29it/s]

[I 2025-08-16 12:30:12,426] Trial 28 finished with value: 0.7206421803769476 and parameters: {'n_estimators': 1835, 'learning_rate': 0.017396365448540358, 'max_depth': 6, 'num_leaves': 75, 'min_child_samples': 62, 'subsample': 0.7107826821390226, 'colsample_bytree': 0.8396794054397381, 'reg_alpha': 5.426446347075766, 'reg_lambda': 3.5788712691545594, 'min_gain_to_split': 0.5908332605690108}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  60%|██████    | 30/50 [00:26<00:18,  1.10it/s]

[I 2025-08-16 12:30:13,651] Trial 29 finished with value: 0.791811466625842 and parameters: {'n_estimators': 545, 'learning_rate': 0.0057385747526029875, 'max_depth': 11, 'num_leaves': 200, 'min_child_samples': 21, 'subsample': 0.8566729780164413, 'colsample_bytree': 0.9309980659295832, 'reg_alpha': 2.1582102749684315, 'reg_lambda': 6.606014282371002, 'min_gain_to_split': 0.085347464993768}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  62%|██████▏   | 31/50 [00:27<00:15,  1.26it/s]

[I 2025-08-16 12:30:14,172] Trial 30 finished with value: 0.768674020775215 and parameters: {'n_estimators': 577, 'learning_rate': 0.0355002981725949, 'max_depth': 8, 'num_leaves': 330, 'min_child_samples': 76, 'subsample': 0.9927556238387604, 'colsample_bytree': 0.8548901044903585, 'reg_alpha': 3.2295647294124596, 'reg_lambda': 8.156675752918332, 'min_gain_to_split': 0.2708322512620742}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  64%|██████▍   | 32/50 [00:28<00:17,  1.03it/s]

[I 2025-08-16 12:30:15,562] Trial 31 finished with value: 0.7406663207910912 and parameters: {'n_estimators': 1158, 'learning_rate': 0.006678237100523691, 'max_depth': 3, 'num_leaves': 483, 'min_child_samples': 86, 'subsample': 0.9087922618281093, 'colsample_bytree': 0.8226858833242809, 'reg_alpha': 1.7329432007084578, 'reg_lambda': 2.4079333840397745, 'min_gain_to_split': 0.25024289816459533}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  66%|██████▌   | 33/50 [00:29<00:14,  1.17it/s]

[I 2025-08-16 12:30:16,140] Trial 32 finished with value: 0.6825612795128257 and parameters: {'n_estimators': 1324, 'learning_rate': 0.06979037663300276, 'max_depth': 9, 'num_leaves': 162, 'min_child_samples': 96, 'subsample': 0.9213690750087306, 'colsample_bytree': 0.8663062157534202, 'reg_alpha': 6.117207462343522, 'reg_lambda': 4.776400561850109, 'min_gain_to_split': 0.24773098950115746}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  68%|██████▊   | 34/50 [00:29<00:11,  1.35it/s]

[I 2025-08-16 12:30:16,623] Trial 33 finished with value: 0.7250638239663424 and parameters: {'n_estimators': 1034, 'learning_rate': 0.08186248757017087, 'max_depth': 3, 'num_leaves': 85, 'min_child_samples': 14, 'subsample': 0.712218640695691, 'colsample_bytree': 0.9566381752033022, 'reg_alpha': 7.036578593800237, 'reg_lambda': 5.267564461785927, 'min_gain_to_split': 0.09783416065100148}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  70%|███████   | 35/50 [00:30<00:11,  1.28it/s]

[I 2025-08-16 12:30:17,493] Trial 34 finished with value: 0.8284953987208274 and parameters: {'n_estimators': 1237, 'learning_rate': 0.02867478599222064, 'max_depth': 4, 'num_leaves': 234, 'min_child_samples': 46, 'subsample': 0.884755029415665, 'colsample_bytree': 0.8905280952602931, 'reg_alpha': 0.4530400977204452, 'reg_lambda': 4.371513531638241, 'min_gain_to_split': 0.6258599157142364}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  72%|███████▏  | 36/50 [00:31<00:09,  1.41it/s]

[I 2025-08-16 12:30:18,032] Trial 35 finished with value: 0.7147144187589304 and parameters: {'n_estimators': 1255, 'learning_rate': 0.11779262819281898, 'max_depth': 9, 'num_leaves': 107, 'min_child_samples': 16, 'subsample': 0.8927257834618947, 'colsample_bytree': 0.7079533931624865, 'reg_alpha': 5.8577558127346325, 'reg_lambda': 9.462072172824618, 'min_gain_to_split': 0.575474177875879}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  74%|███████▍  | 37/50 [00:31<00:08,  1.53it/s]

[I 2025-08-16 12:30:18,553] Trial 36 finished with value: 0.6547687949354744 and parameters: {'n_estimators': 1082, 'learning_rate': 0.05364834414206791, 'max_depth': 7, 'num_leaves': 287, 'min_child_samples': 95, 'subsample': 0.8158307913402323, 'colsample_bytree': 0.9883571691471742, 'reg_alpha': 9.053506419560637, 'reg_lambda': 2.762120213103668, 'min_gain_to_split': 0.06936130087516545}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  76%|███████▌  | 38/50 [00:32<00:08,  1.39it/s]

[I 2025-08-16 12:30:19,437] Trial 37 finished with value: 0.791457652922365 and parameters: {'n_estimators': 651, 'learning_rate': 0.005347643682421839, 'max_depth': 3, 'num_leaves': 352, 'min_child_samples': 16, 'subsample': 0.7956926890881284, 'colsample_bytree': 0.9534625932908364, 'reg_alpha': 0.23271935735825866, 'reg_lambda': 8.330216343300421, 'min_gain_to_split': 0.28185477477339993}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  78%|███████▊  | 39/50 [00:32<00:07,  1.56it/s]

[I 2025-08-16 12:30:19,896] Trial 38 finished with value: 0.770502341747749 and parameters: {'n_estimators': 677, 'learning_rate': 0.06534084423030814, 'max_depth': 9, 'num_leaves': 443, 'min_child_samples': 76, 'subsample': 0.9410442791154545, 'colsample_bytree': 0.7846103717713919, 'reg_alpha': 1.7743954377972282, 'reg_lambda': 7.755532764767725, 'min_gain_to_split': 0.806834739267264}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  80%|████████  | 40/50 [00:33<00:07,  1.34it/s]

[I 2025-08-16 12:30:20,888] Trial 39 finished with value: 0.7443622989952597 and parameters: {'n_estimators': 1986, 'learning_rate': 0.022909113096185783, 'max_depth': 6, 'num_leaves': 395, 'min_child_samples': 41, 'subsample': 0.9792271976810694, 'colsample_bytree': 0.9575238255529035, 'reg_alpha': 4.289940273750183, 'reg_lambda': 7.757839610123476, 'min_gain_to_split': 0.7545428740846823}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  82%|████████▏ | 41/50 [00:34<00:06,  1.38it/s]

[I 2025-08-16 12:30:21,555] Trial 40 finished with value: 0.8381780720555216 and parameters: {'n_estimators': 654, 'learning_rate': 0.1396091574749852, 'max_depth': 8, 'num_leaves': 419, 'min_child_samples': 39, 'subsample': 0.9686569685488602, 'colsample_bytree': 0.8167605036202489, 'reg_alpha': 0.1083765148029836, 'reg_lambda': 9.148437787773373, 'min_gain_to_split': 0.09128667678613356}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  84%|████████▍ | 42/50 [00:34<00:05,  1.55it/s]

[I 2025-08-16 12:30:22,025] Trial 41 finished with value: 0.7515672841396205 and parameters: {'n_estimators': 979, 'learning_rate': 0.16635132753791704, 'max_depth': 12, 'num_leaves': 300, 'min_child_samples': 67, 'subsample': 0.8345336565934959, 'colsample_bytree': 0.7879632315094194, 'reg_alpha': 3.2866454536991596, 'reg_lambda': 7.052666104693346, 'min_gain_to_split': 0.75237452943768}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  86%|████████▌ | 43/50 [00:35<00:04,  1.46it/s]

[I 2025-08-16 12:30:22,792] Trial 42 finished with value: 0.7014194719443878 and parameters: {'n_estimators': 1688, 'learning_rate': 0.09204191360672336, 'max_depth': 3, 'num_leaves': 263, 'min_child_samples': 15, 'subsample': 0.8648586646971206, 'colsample_bytree': 0.8324591504120131, 'reg_alpha': 8.877041827582998, 'reg_lambda': 4.158235112968708, 'min_gain_to_split': 0.11706701642760586}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  88%|████████▊ | 44/50 [00:36<00:03,  1.66it/s]

[I 2025-08-16 12:30:23,205] Trial 43 finished with value: 0.7056597789231362 and parameters: {'n_estimators': 714, 'learning_rate': 0.08297661622436157, 'max_depth': 9, 'num_leaves': 78, 'min_child_samples': 17, 'subsample': 0.910290739437736, 'colsample_bytree': 0.721828901909258, 'reg_alpha': 8.218600592903563, 'reg_lambda': 7.3561800444084655, 'min_gain_to_split': 0.08134878064189976}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  90%|█████████ | 45/50 [00:36<00:02,  1.92it/s]

[I 2025-08-16 12:30:23,538] Trial 44 finished with value: 0.6674950670204803 and parameters: {'n_estimators': 627, 'learning_rate': 0.19038196232522647, 'max_depth': 6, 'num_leaves': 205, 'min_child_samples': 83, 'subsample': 0.9841745732151577, 'colsample_bytree': 0.9958003191468613, 'reg_alpha': 7.533781852589416, 'reg_lambda': 4.386336269778242, 'min_gain_to_split': 0.08350071669866876}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  92%|█████████▏| 46/50 [00:37<00:02,  1.60it/s]

[I 2025-08-16 12:30:24,406] Trial 45 finished with value: 0.7744958579982308 and parameters: {'n_estimators': 1666, 'learning_rate': 0.039225375689251656, 'max_depth': 7, 'num_leaves': 456, 'min_child_samples': 20, 'subsample': 0.8477875312872577, 'colsample_bytree': 0.7034060934302256, 'reg_alpha': 4.6866064199412625, 'reg_lambda': 1.5067294811365362, 'min_gain_to_split': 0.11881791626807192}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  94%|█████████▍| 47/50 [00:37<00:01,  1.88it/s]

[I 2025-08-16 12:30:24,715] Trial 46 finished with value: 0.6110794649134744 and parameters: {'n_estimators': 676, 'learning_rate': 0.05483323330491335, 'max_depth': 10, 'num_leaves': 305, 'min_child_samples': 97, 'subsample': 0.8124611738571113, 'colsample_bytree': 0.7857136258845582, 'reg_alpha': 8.685991281894603, 'reg_lambda': 3.0123625466750736, 'min_gain_to_split': 0.9632225394406113}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  96%|█████████▌| 48/50 [00:37<00:00,  2.20it/s]

[I 2025-08-16 12:30:24,992] Trial 47 finished with value: 0.719220156891429 and parameters: {'n_estimators': 518, 'learning_rate': 0.17896745778978254, 'max_depth': 3, 'num_leaves': 449, 'min_child_samples': 58, 'subsample': 0.9978894388357902, 'colsample_bytree': 0.7221389694206196, 'reg_alpha': 5.538542844013207, 'reg_lambda': 9.72372282057189, 'min_gain_to_split': 0.5230978441701488}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666:  98%|█████████▊| 49/50 [00:38<00:00,  2.01it/s]

[I 2025-08-16 12:30:25,590] Trial 48 finished with value: 0.7679814899866185 and parameters: {'n_estimators': 1444, 'learning_rate': 0.06510302118153247, 'max_depth': 7, 'num_leaves': 325, 'min_child_samples': 63, 'subsample': 0.9703474031472967, 'colsample_bytree': 0.7136339141024374, 'reg_alpha': 2.809631895922303, 'reg_lambda': 9.553703356689029, 'min_gain_to_split': 0.8902637838909163}. Best is trial 23 with value: 0.840665895534236.


Best trial: 23. Best value: 0.840666: 100%|██████████| 50/50 [00:39<00:00,  1.27it/s]
[I 2025-08-16 12:30:26,282] A new study created in memory with name: no-name-22cce209-3c4a-4422-b6b4-a02e2a1709eb


[I 2025-08-16 12:30:26,273] Trial 49 finished with value: 0.8040202365562132 and parameters: {'n_estimators': 1183, 'learning_rate': 0.04925605534809107, 'max_depth': 5, 'num_leaves': 119, 'min_child_samples': 52, 'subsample': 0.8060056684078158, 'colsample_bytree': 0.8750968335552616, 'reg_alpha': 0.7773463696498484, 'reg_lambda': 9.769553268995498, 'min_gain_to_split': 0.9862107444796029}. Best is trial 23 with value: 0.840665895534236.
Best lightgbm_v2 AUC: 0.8407
Best lightgbm_v2 params: {'n_estimators': 1693, 'learning_rate': 0.031931901530863685, 'max_depth': 8, 'num_leaves': 262, 'min_child_samples': 27, 'subsample': 0.9167356345784516, 'colsample_bytree': 0.7842317087322567, 'reg_alpha': 0.2431596643145384, 'reg_lambda': 6.80925066316451, 'min_gain_to_split': 0.17711067940704894}
Optimizing catboost_v2 with enhanced parameters...


Best trial: 0. Best value: 0.854494:   2%|▏         | 1/50 [00:49<40:12, 49.24s/it]

[I 2025-08-16 12:31:15,524] Trial 0 finished with value: 0.8544943270735524 and parameters: {'iterations': 1062, 'learning_rate': 0.1667521176194013, 'depth': 9, 'l2_leaf_reg': 6.387926357773329, 'border_count': 66, 'bagging_temperature': 1.5599452033620265, 'random_strength': 0.5808361216819946}. Best is trial 0 with value: 0.8544943270735524.


Best trial: 1. Best value: 0.869919:   4%|▍         | 2/50 [02:47<1:12:02, 90.05s/it]

[I 2025-08-16 12:33:14,134] Trial 1 finished with value: 0.8699189602186387 and parameters: {'iterations': 1800, 'learning_rate': 0.045918988705873284, 'depth': 8, 'l2_leaf_reg': 1.185260448662222, 'border_count': 249, 'bagging_temperature': 8.324426408004218, 'random_strength': 2.1233911067827616}. Best is trial 1 with value: 0.8699189602186387.


Best trial: 1. Best value: 0.869919:   6%|▌         | 3/50 [02:59<42:37, 54.40s/it]  

[I 2025-08-16 12:33:26,124] Trial 2 finished with value: 0.8601735614978114 and parameters: {'iterations': 772, 'learning_rate': 0.009835468046820034, 'depth': 6, 'l2_leaf_reg': 5.72280788469014, 'border_count': 128, 'bagging_temperature': 2.9122914019804194, 'random_strength': 6.118528947223795}. Best is trial 1 with value: 0.8699189602186387.


Best trial: 3. Best value: 0.87069:   8%|▊         | 4/50 [03:12<28:57, 37.77s/it] 

[I 2025-08-16 12:33:38,401] Trial 3 finished with value: 0.8706896673924384 and parameters: {'iterations': 709, 'learning_rate': 0.014689372953975089, 'depth': 6, 'l2_leaf_reg': 5.104629857953324, 'border_count': 207, 'bagging_temperature': 1.9967378215835974, 'random_strength': 5.142344384136116}. Best is trial 3 with value: 0.8706896673924384.


Best trial: 4. Best value: 0.873077:  10%|█         | 5/50 [03:37<25:05, 33.46s/it]

[I 2025-08-16 12:34:04,220] Trial 4 finished with value: 0.8730773428817672 and parameters: {'iterations': 1389, 'learning_rate': 0.005934530307791968, 'depth': 8, 'l2_leaf_reg': 2.5347171131856236, 'border_count': 46, 'bagging_temperature': 9.488855372533333, 'random_strength': 9.656320330745594}. Best is trial 4 with value: 0.8730773428817672.


Best trial: 4. Best value: 0.873077:  12%|█▏        | 6/50 [03:54<20:22, 27.78s/it]

[I 2025-08-16 12:34:20,954] Trial 5 finished with value: 0.85950392370325 and parameters: {'iterations': 1713, 'learning_rate': 0.015380821666156693, 'depth': 4, 'l2_leaf_reg': 7.158097238609412, 'border_count': 130, 'bagging_temperature': 1.2203823484477883, 'random_strength': 4.951769101112702}. Best is trial 4 with value: 0.8730773428817672.


Best trial: 4. Best value: 0.873077:  14%|█▍        | 7/50 [04:02<15:15, 21.30s/it]

[I 2025-08-16 12:34:28,912] Trial 6 finished with value: 0.8595908887301263 and parameters: {'iterations': 551, 'learning_rate': 0.14313829500644557, 'depth': 5, 'l2_leaf_reg': 6.962700559185838, 'border_count': 101, 'bagging_temperature': 5.200680211778108, 'random_strength': 5.4671027934327965}. Best is trial 4 with value: 0.8730773428817672.


Best trial: 4. Best value: 0.873077:  16%|█▌        | 8/50 [05:29<29:30, 42.16s/it]

[I 2025-08-16 12:35:55,758] Trial 7 finished with value: 0.8597221513460797 and parameters: {'iterations': 777, 'learning_rate': 0.17877333612826407, 'depth': 9, 'l2_leaf_reg': 9.455490474077703, 'border_count': 232, 'bagging_temperature': 5.978999788110851, 'random_strength': 9.218742350231167}. Best is trial 4 with value: 0.8730773428817672.


Best trial: 4. Best value: 0.873077:  18%|█▊        | 9/50 [05:35<21:09, 30.96s/it]

[I 2025-08-16 12:36:02,074] Trial 8 finished with value: 0.82315381823955 and parameters: {'iterations': 632, 'learning_rate': 0.010302587393796307, 'depth': 4, 'l2_leaf_reg': 3.927972976869379, 'border_count': 119, 'bagging_temperature': 2.713490317738959, 'random_strength': 8.287375091519294}. Best is trial 4 with value: 0.8730773428817672.


Best trial: 9. Best value: 0.873523:  20%|██        | 10/50 [06:04<20:14, 30.37s/it]

[I 2025-08-16 12:36:31,141] Trial 9 finished with value: 0.8735225868090994 and parameters: {'iterations': 1035, 'learning_rate': 0.014094313993387368, 'depth': 7, 'l2_leaf_reg': 2.2683180247728636, 'border_count': 211, 'bagging_temperature': 0.7455064367977082, 'random_strength': 9.868869366005173}. Best is trial 9 with value: 0.8735225868090994.


Best trial: 9. Best value: 0.873523:  22%|██▏       | 11/50 [06:22<17:10, 26.42s/it]

[I 2025-08-16 12:36:48,592] Trial 10 finished with value: 0.8565073512735024 and parameters: {'iterations': 1659, 'learning_rate': 0.01040697346842839, 'depth': 4, 'l2_leaf_reg': 8.339152856093508, 'border_count': 190, 'bagging_temperature': 7.2900716804098735, 'random_strength': 7.712703466859457}. Best is trial 9 with value: 0.8735225868090994.


Best trial: 9. Best value: 0.873523:  24%|██▍       | 12/50 [06:29<13:00, 20.53s/it]

[I 2025-08-16 12:36:55,661] Trial 11 finished with value: 0.8616205688235693 and parameters: {'iterations': 611, 'learning_rate': 0.018760897310138556, 'depth': 4, 'l2_leaf_reg': 8.767930832880342, 'border_count': 171, 'bagging_temperature': 3.308980248526492, 'random_strength': 0.6355835028602363}. Best is trial 9 with value: 0.8735225868090994.


In [None]:
# 4. Advanced Ensemble with Weighted Voting and Stacking - FIXED VERSION
class UltraAdvancedEnsemble:
    """Ultra-advanced ensemble with multiple techniques"""
    
    def __init__(self, random_state=42):
        self.random_state = random_state
        self.models = {}
        self.weights = {}
        self.meta_model = None
        
    def create_advanced_models(self, optimization_results):
        """Create models with advanced parameters"""
        models = {}
        
        for model_type, result in optimization_results.items():
            params = result['params'].copy()
            params['random_state'] = self.random_state
            
            if model_type == 'xgboost_v2':
                params['eval_metric'] = 'auc'
                params['verbosity'] = 0
                models[model_type] = xgb.XGBClassifier(**params)
            elif model_type == 'lightgbm_v2':
                params['verbosity'] = -1
                params['force_col_wise'] = True
                models[model_type] = lgb.LGBMClassifier(**params)
            elif model_type == 'catboost_v2':
                params['verbose'] = False
                models[model_type] = cb.CatBoostClassifier(**params)
                
        return models
    
    def fit_weighted_ensemble(self, X, y, models):
        """Fit ensemble with optimal weights"""
        from scipy.optimize import minimize
        
        # CRITICAL FIX: Convert to clean numpy arrays immediately
        print("Converting data to numpy arrays...")
        X_array = X.values if hasattr(X, 'values') else X
        y_array = y.values if hasattr(y, 'values') else y
        
        # Ensure clean indices for any DataFrame operations
        if hasattr(X, 'reset_index'):
            X = X.reset_index(drop=True)
        if hasattr(y, 'reset_index'):
            y = y.reset_index(drop=True)
        
        # Train all models
        trained_models = {}
        cv_scores = {}
        
        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=self.random_state)
        
        for name, model in models.items():
            print(f"Training {name}...")
            # Use numpy arrays for training to avoid DataFrame issues
            model.fit(X_array, y_array)
            trained_models[name] = model
            
            # Get CV scores for weighting using numpy arrays
            scores = []
            for train_idx, val_idx in cv.split(X_array, y_array):
                X_train_fold = X_array[train_idx]
                y_train_fold = y_array[train_idx]
                X_val_fold = X_array[val_idx]
                y_val_fold = y_array[val_idx]
                
                try:
                    model_clone = clone(model)
                    model_clone.fit(X_train_fold, y_train_fold)
                    y_pred_proba = model_clone.predict_proba(X_val_fold)[:, 1]
                    score = roc_auc_score(y_val_fold, y_pred_proba)
                    scores.append(score)
                except Exception as e:
                    print(f"Error in CV for {name}: {e}")
                    scores.append(0.5)  # Default score
            
            cv_scores[name] = np.mean(scores)
            print(f"{name} CV AUC: {cv_scores[name]:.4f}")
        
        # Optimize weights based on CV performance
        def objective(weights):
            weights = weights / np.sum(weights)  # Normalize
            ensemble_pred = np.zeros(len(y_array))
            
            try:
                for fold, (train_idx, val_idx) in enumerate(cv.split(X_array, y_array)):
                    X_train_fold = X_array[train_idx]
                    y_train_fold = y_array[train_idx]
                    X_val_fold = X_array[val_idx]
                    y_val_fold = y_array[val_idx]
                    
                    fold_preds = []
                    for i, (name, model) in enumerate(models.items()):
                        try:
                            model_clone = clone(model)
                            model_clone.fit(X_train_fold, y_train_fold)
                            pred_proba = model_clone.predict_proba(X_val_fold)[:, 1]
                            fold_preds.append(pred_proba)
                        except Exception as e:
                            print(f"Error in weight optimization for {name}: {e}")
                            fold_preds.append(np.full(len(y_val_fold), 0.5))
                    
                    if fold_preds:
                        weighted_pred = np.average(fold_preds, axis=0, weights=weights)
                        ensemble_pred[val_idx] = weighted_pred
                
                return -roc_auc_score(y_array, ensemble_pred)  # Minimize negative AUC
            except Exception as e:
                print(f"Error in objective function: {e}")
                return 1.0  # High value to minimize
        
        # Initial weights based on CV scores
        if cv_scores:
            initial_weights = np.array([cv_scores[name] for name in models.keys()])
            initial_weights = initial_weights / np.sum(initial_weights)
            
            # Optimize weights with error handling
            try:
                result = minimize(
                    objective, 
                    initial_weights, 
                    method='SLSQP',
                    bounds=[(0.1, 1.0) for _ in range(len(models))],
                    constraints={'type': 'eq', 'fun': lambda w: np.sum(w) - 1}
                )
                optimal_weights = result.x / np.sum(result.x)
            except Exception as e:
                print(f"Weight optimization failed: {e}")
                print("Using equal weights...")
                optimal_weights = np.ones(len(models)) / len(models)
        else:
            print("No CV scores available, using equal weights...")
            optimal_weights = np.ones(len(models)) / len(models)
        
        self.models = trained_models
        self.weights = dict(zip(models.keys(), optimal_weights))
        
        print("\nOptimal weights:")
        for name, weight in self.weights.items():
            print(f"{name}: {weight:.3f}")
        
        return self
    
    def predict_proba_weighted(self, X):
        """Predict with weighted ensemble"""
        # Convert to numpy array to avoid DataFrame issues
        X_array = X.values if hasattr(X, 'values') else X
        
        predictions = []
        
        for name, model in self.models.items():
            try:
                pred_proba = model.predict_proba(X_array)[:, 1]
                predictions.append(pred_proba * self.weights[name])
            except Exception as e:
                print(f"Error predicting with {name}: {e}")
                predictions.append(np.full(X_array.shape[0], 0.5) * self.weights[name])
        
        return np.sum(predictions, axis=0)
    
    def predict_weighted(self, X):
        """Predict classes with weighted ensemble"""
        proba = self.predict_proba_weighted(X)
        return (proba > 0.5).astype(int)

# Create and train ultra-advanced ensemble with error handling
print("Creating ultra-advanced ensemble...")
ultra_ensemble = UltraAdvancedEnsemble(random_state=42)
advanced_models_dict = ultra_ensemble.create_advanced_models(advanced_results)

print("Fitting weighted ensemble...")
ultra_ensemble.fit_weighted_ensemble(X_train_final_v2, Y_train, advanced_models_dict)

print("Making predictions...")
# Make predictions
ultra_pred_proba = ultra_ensemble.predict_proba_weighted(X_test_final_v2)
ultra_pred = ultra_ensemble.predict_weighted(X_test_final_v2)

# Evaluate ultra ensemble
ultra_metrics = evaluator.calculate_metrics(Y_test, ultra_pred, ultra_pred_proba)
print(f"\nUltra Ensemble Results:")
print(f"AUC: {ultra_metrics['auc']:.4f}")
print(f"Accuracy: {ultra_metrics['accuracy']:.4f}")
print(f"F1: {ultra_metrics['f1']:.4f}")
print(f"MCC: {ultra_metrics['mcc']:.4f}")

Creating ultra-advanced ensemble...
Fitting weighted ensemble...
Converting data to numpy arrays...
Training xgboost_v2...
xgboost_v2 CV AUC: 0.8374
Training lightgbm_v2...
lightgbm_v2 CV AUC: 0.8407
Training catboost_v2...
catboost_v2 CV AUC: 0.8761

Optimal weights:
xgboost_v2: 0.328
lightgbm_v2: 0.329
catboost_v2: 0.343
Making predictions...

Ultra Ensemble Results:
AUC: 0.8393
Accuracy: 0.8000
F1: 0.4286
MCC: 0.3849


In [None]:
# 5. Additional Techniques: Pseudo-labeling and Data Augmentation - FIXED VERSION
def pseudo_labeling_enhancement(X_train, y_train, X_test, best_model, confidence_threshold=0.9):
    """Add high-confidence predictions as pseudo-labels"""
    
    # CRITICAL FIX: Convert to numpy arrays to avoid column name issues
    X_train_array = X_train.values if hasattr(X_train, 'values') else X_train
    y_train_array = y_train.values if hasattr(y_train, 'values') else y_train
    X_test_array = X_test.values if hasattr(X_test, 'values') else X_test
    
    # Train model on original data using numpy arrays
    best_model.fit(X_train_array, y_train_array)
    
    # Get predictions on test set
    test_proba = best_model.predict_proba(X_test_array)
    
    # Select high-confidence predictions
    max_proba = np.max(test_proba, axis=1)
    high_conf_mask = max_proba >= confidence_threshold
    
    if np.sum(high_conf_mask) > 0:
        # Add pseudo-labels using numpy arrays
        X_pseudo = X_test_array[high_conf_mask]
        y_pseudo = np.argmax(test_proba[high_conf_mask], axis=1)
        
        # Combine with training data (numpy arrays)
        X_enhanced = np.vstack([X_train_array, X_pseudo])
        y_enhanced = np.concatenate([y_train_array, y_pseudo])
        
        print(f"Added {len(y_pseudo)} pseudo-labels")
        return X_enhanced, y_enhanced
    else:
        print("No high-confidence predictions found")
        return X_train_array, y_train_array

# Also, let's check for and fix duplicate column names in our data
def fix_duplicate_columns(df):
    """Fix duplicate column names by adding suffixes"""
    cols = pd.Series(df.columns)
    for dup in cols[cols.duplicated()].unique():
        cols[cols[cols == dup].index.values[1:]] = [dup + '_' + str(i) for i in range(1, sum(cols == dup))]
    df.columns = cols
    return df

# Fix duplicate columns in our datasets
print("Checking for duplicate columns...")
print(f"X_train_final_v2 duplicate columns: {X_train_final_v2.columns.duplicated().sum()}")
print(f"X_test_final_v2 duplicate columns: {X_test_final_v2.columns.duplicated().sum()}")

if X_train_final_v2.columns.duplicated().sum() > 0:
    print("Fixing duplicate columns...")
    X_train_final_v2 = fix_duplicate_columns(X_train_final_v2)
    X_test_final_v2 = fix_duplicate_columns(X_test_final_v2)
    print("Duplicate columns fixed!")

# Apply pseudo-labeling with best model
best_model_name = max(advanced_results.keys(), key=lambda x: advanced_results[x]['score'])
best_model = advanced_models_dict[best_model_name]

print(f"Using {best_model_name} for pseudo-labeling...")

X_train_pseudo, Y_train_pseudo = pseudo_labeling_enhancement(
    X_train_final_v2, Y_train, X_test_final_v2, best_model, confidence_threshold=0.95
)

# Retrain ultra ensemble with pseudo-labels if any were added
if len(Y_train_pseudo) > len(Y_train):
    print("Retraining with pseudo-labels...")
    
    # Create new ensemble for pseudo-labeled data
    ultra_ensemble_v2 = UltraAdvancedEnsemble(random_state=42)
    
    # For the ensemble, we need to use the original models but retrain them
    # Create fresh models to avoid any state issues
    advanced_models_dict_v2 = ultra_ensemble_v2.create_advanced_models(advanced_results)
    
    # Fit with pseudo-labeled data (using numpy arrays)
    ultra_ensemble_v2.fit_weighted_ensemble(X_train_pseudo, Y_train_pseudo, advanced_models_dict_v2)
    
    # Final predictions
    final_pred_proba = ultra_ensemble_v2.predict_proba_weighted(X_test_final_v2)
    final_pred = ultra_ensemble_v2.predict_weighted(X_test_final_v2)
    
    final_metrics = evaluator.calculate_metrics(Y_test, final_pred, final_pred_proba)
    print(f"\nFinal Enhanced Results:")
    print(f"AUC: {final_metrics['auc']:.4f}")
    print(f"Accuracy: {final_metrics['accuracy']:.4f}")
    print(f"F1: {final_metrics['f1']:.4f}")
    print(f"MCC: {final_metrics['mcc']:.4f}")
else:
    final_pred_proba = ultra_pred_proba
    final_pred = ultra_pred
    final_metrics = ultra_metrics
    print("No pseudo-labels added, using original ensemble results.")

Checking for duplicate columns...
X_train_final_v2 duplicate columns: 9
X_test_final_v2 duplicate columns: 9
Fixing duplicate columns...
Duplicate columns fixed!
Using catboost_v2 for pseudo-labeling...
Added 74 pseudo-labels
Retraining with pseudo-labels...
Converting data to numpy arrays...
Training xgboost_v2...
xgboost_v2 CV AUC: 0.8749
Training lightgbm_v2...
lightgbm_v2 CV AUC: 0.8829
Training catboost_v2...
catboost_v2 CV AUC: 0.9016


In [None]:
# 6. Model Calibration for Better Probability Estimates
from sklearn.calibration import CalibratedClassifierCV

def calibrate_model_predictions(model, X_train, y_train, X_test, method='isotonic'):
    """Calibrate model predictions for better probability estimates"""
    
    # Create calibrated classifier
    calibrated_model = CalibratedClassifierCV(model, method=method, cv=5)
    calibrated_model.fit(X_train, y_train)
    
    # Get calibrated predictions
    calibrated_proba = calibrated_model.predict_proba(X_test)[:, 1]
    calibrated_pred = calibrated_model.predict(X_test)
    
    return calibrated_pred, calibrated_proba

# Calibrate the best individual model
best_model_calibrated = clone(best_model)
cal_pred, cal_pred_proba = calibrate_model_predictions(
    best_model_calibrated, X_train_final_v2, Y_train, X_test_final_v2
)

cal_metrics = evaluator.calculate_metrics(Y_test, cal_pred, cal_pred_proba)
print(f"\nCalibrated Model Results:")
print(f"AUC: {cal_metrics['auc']:.4f}")
print(f"Accuracy: {cal_metrics['accuracy']:.4f}")
print(f"F1: {cal_metrics['f1']:.4f}")
print(f"MCC: {cal_metrics['mcc']:.4f}")

In [None]:
# 7. Final Comparison and Selection
print("\n" + "="*50)
print("FINAL RESULTS COMPARISON")
print("="*50)

all_results = {
    'Original Best': results_df.loc[results_df['Test_auc'].idxmax(), 'Test_auc'],
    'Ultra Ensemble': ultra_metrics['auc'],
    'Calibrated Model': cal_metrics['auc'],
}

if len(Y_train_pseudo) > len(Y_train):
    all_results['Enhanced with Pseudo-labels'] = final_metrics['auc']

for method, auc in sorted(all_results.items(), key=lambda x: x[1], reverse=True):
    print(f"{method:25}: AUC = {auc:.4f}")

# Select best method
best_method = max(all_results.keys(), key=lambda x: all_results[x])
best_auc = all_results[best_method]

print(f"\nBest method: {best_method} with AUC = {best_auc:.4f}")

if best_auc >= 0.9:
    print("🎉 Target AUC of 0.9+ achieved!")
else:
    print(f"Current best AUC: {best_auc:.4f}")
    print("Consider running with more trials or trying additional techniques.")