In [2]:
# ============================================================================
# Travelers UMC 2025 - Baseline Version 2
# Optimal 3-Model Ensemble for Subrogation Prediction
# ============================================================================
# Install missing packages if needed
import subprocess
import sys

def install_package(package):
    try:
        __import__(package)
    except ImportError:
        print(f"Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        print(f"{package} installed successfully!")

# Check and install required packages
packages_to_check = {
    'imbalanced-learn': 'imblearn',
    'optuna': 'optuna',
    'xgboost': 'xgboost'
}

for pip_name, import_name in packages_to_check.items():
    try:
        __import__(import_name)
    except ImportError:
        print(f"Installing {pip_name}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name])
        print(f"{pip_name} installed successfully!")

# All Required Libraries
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Data handling
from sklearn.model_selection import StratifiedKFold, cross_val_score, cross_val_predict
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score, classification_report

# Models
import xgboost as xgb
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV

# Imbalance handling
from imblearn.over_sampling import SMOTE

# Hyperparameter optimization
import optuna
from optuna.samplers import TPESampler

# Visualization (optional)
import matplotlib.pyplot as plt
import seaborn as sns

print("\n" + "="*80)
print("All libraries imported successfully!")
print(f"XGBoost version: {xgb.__version__}")
print("="*80)


Installing optuna...
optuna installed successfully!

All libraries imported successfully!
XGBoost version: 3.1.1


In [3]:
# ============================================================================
# STEP 1: Load Engineered Data
# ============================================================================
print("="*80)
print("LOADING ENGINEERED DATA")
print("="*80)

train_df = pd.read_csv('engineered_train.csv')
test_df = pd.read_csv('engineered_test.csv')
test_original = pd.read_csv('Data\\Testing_TriGuard.csv')

print(f"Train shape: {train_df.shape}")
print(f"Test shape: {test_df.shape}")

# Drop non-feature columns
cols_to_drop = ['claim_date', 'year_of_born', 'vehicle_made_year', 
                'gender', 'living_status', 'claim_day_of_week', 'accident_site',
                'witness_present_ind', 'channel', 'vehicle_category', 'vehicle_color',
                'accident_type', 'in_network_bodyshop', 'zip_group']

train_df = train_df.drop(columns=[col for col in cols_to_drop if col in train_df.columns], errors='ignore')
test_df = test_df.drop(columns=[col for col in cols_to_drop if col in test_df.columns], errors='ignore')

# Separate features and target
X = train_df.drop(['subrogation'], axis=1, errors='ignore')
y = train_df['subrogation'].astype(int)
X_test = test_df.copy()

# Keep only numeric columns
X = X.select_dtypes(include=[np.number])
X_test = X_test.select_dtypes(include=[np.number])

# Align columns between train and test
X_test = X_test.reindex(columns=X.columns, fill_value=0)

# Handle any remaining NaN/inf values
X = X.replace([np.inf, -np.inf], np.nan).fillna(0)
X_test = X_test.replace([np.inf, -np.inf], np.nan).fillna(0)

print(f"\nFinal feature count: {X.shape[1]}")
print(f"Train samples: {X.shape[0]}")
print(f"Test samples: {X_test.shape[0]}")
print(f"\nClass distribution:\n{y.value_counts(normalize=True)}")


LOADING ENGINEERED DATA
Train shape: (18000, 105)
Test shape: (12000, 104)

Final feature count: 90
Train samples: 18000
Test samples: 12000

Class distribution:
subrogation
0    0.771389
1    0.228611
Name: proportion, dtype: float64


In [4]:
# ============================================================================
# STEP 2: Handle Class Imbalance with SMOTE
# ============================================================================
print("\n" + "="*80)
print("APPLYING SMOTE FOR CLASS BALANCING")
print("="*80)

smote = SMOTE(sampling_strategy=0.5, random_state=42, k_neighbors=5)
X_resampled, y_resampled = smote.fit_resample(X, y)

print(f"Before SMOTE: {X.shape[0]} samples")
print(f"After SMOTE: {X_resampled.shape[0]} samples")
print(f"Class distribution after SMOTE:\n{pd.Series(y_resampled).value_counts(normalize=True)}")



APPLYING SMOTE FOR CLASS BALANCING
Before SMOTE: 18000 samples
After SMOTE: 20827 samples
Class distribution after SMOTE:
subrogation
0    0.666683
1    0.333317
Name: proportion, dtype: float64


In [5]:
# ============================================================================
# STEP 3: Feature Selection (Top Features)
# ============================================================================
print("\n" + "="*80)
print("FEATURE SELECTION")
print("="*80)

selector_model = xgb.XGBClassifier(
    n_estimators=100, 
    max_depth=5, 
    learning_rate=0.1,
    random_state=42, 
    eval_metric='auc',
    tree_method='hist'
)
selector_model.fit(X_resampled, y_resampled)

# Select top 150 features
selector = SelectFromModel(selector_model, max_features=150, threshold=-np.inf)
X_selected = selector.transform(X_resampled)
X_test_selected = selector.transform(X_test)

selected_features = X.columns[selector.get_support()].tolist()
print(f"Selected {len(selected_features)} features from {X.shape[1]} total features")
print(f"\nTop 20 features: {selected_features[:20]}")



FEATURE SELECTION
Selected 90 features from 90 total features

Top 20 features: ['email_or_tel_available', 'safety_rating', 'annual_income', 'high_education_ind', 'address_change_ind', 'zip_code', 'past_num_of_claims', 'liab_prct', 'policy_report_filed_ind', 'claim_est_payout', 'vehicle_price', 'vehicle_weight', 'age_of_DL', 'vehicle_mileage', 'claim_month', 'claim_year', 'driver_age', 'vehicle_age', 'vehicle_price_default', 'annual_income_log']


In [6]:
# ============================================================================
# STEP 4: Hyperparameter Optimization with Optuna
# ============================================================================
print("\n" + "="*80)
print("BAYESIAN HYPERPARAMETER OPTIMIZATION")
print("="*80)

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Model 1: XGBoost Optimization
def objective_xgb(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 500, 1500),
        'max_depth': trial.suggest_int('max_depth', 4, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.15, log=True),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 3),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 8),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 2),
        'reg_lambda': trial.suggest_float('reg_lambda', 0, 2),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1, 4),
        'random_state': 42,
        'eval_metric': 'auc',
        'tree_method': 'hist'
    }
    model = xgb.XGBClassifier(**params)
    scores = cross_val_score(model, X_selected, y_resampled, cv=cv, scoring='roc_auc', n_jobs=-1)
    return scores.mean()

# Model 2: ExtraTrees Optimization
def objective_et(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 300, 800),
        'max_depth': trial.suggest_int('max_depth', 10, 25),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 5),
        'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', 0.5, 0.7]),
        'bootstrap': True,
        'class_weight': trial.suggest_categorical('class_weight', [None, 'balanced']),
        'random_state': 42,
        'n_jobs': -1
    }
    model = ExtraTreesClassifier(**params)
    scores = cross_val_score(model, X_selected, y_resampled, cv=cv, scoring='roc_auc', n_jobs=-1)
    return scores.mean()

# Model 3: Logistic Regression Optimization
def objective_lr(trial):
    penalty = trial.suggest_categorical('penalty', ['l2', 'elasticnet'])
    C = trial.suggest_float('C', 0.01, 100, log=True)
    class_weight = trial.suggest_categorical('class_weight', [None, 'balanced'])
    
    if penalty == 'elasticnet':
        l1_ratio = trial.suggest_float('l1_ratio', 0.1, 0.9)
        params = {
            'C': C,
            'penalty': penalty,
            'l1_ratio': l1_ratio,
            'solver': 'saga',
            'max_iter': 2000,
            'class_weight': class_weight,
            'random_state': 42
        }
    else:
        params = {
            'C': C,
            'penalty': penalty,
            'solver': 'lbfgs',
            'max_iter': 2000,
            'class_weight': class_weight,
            'random_state': 42
        }
    
    model = LogisticRegression(**params)
    scores = cross_val_score(model, X_selected, y_resampled, cv=cv, scoring='roc_auc', n_jobs=-1)
    return scores.mean()

print("\nOptimizing XGBoost...")
study_xgb = optuna.create_study(direction='maximize', sampler=TPESampler(seed=42))
study_xgb.optimize(objective_xgb, n_trials=50, show_progress_bar=True)
best_xgb_params = study_xgb.best_params
print(f"Best XGBoost AUC: {study_xgb.best_value:.5f}")

print("\nOptimizing ExtraTrees...")
study_et = optuna.create_study(direction='maximize', sampler=TPESampler(seed=42))
study_et.optimize(objective_et, n_trials=50, show_progress_bar=True)
best_et_params = study_et.best_params
print(f"Best ExtraTrees AUC: {study_et.best_value:.5f}")

print("\nOptimizing Logistic Regression...")
study_lr = optuna.create_study(direction='maximize', sampler=TPESampler(seed=42))
study_lr.optimize(objective_lr, n_trials=30, show_progress_bar=True)
best_lr_params = study_lr.best_params
print(f"Best Logistic Regression AUC: {study_lr.best_value:.5f}")

print("\n" + "="*80)
print("OPTIMIZATION COMPLETE")
print("="*80)


[I 2025-11-02 01:24:27,092] A new study created in memory with name: no-name-04a7fe92-1891-4892-af74-d9fae040b443



BAYESIAN HYPERPARAMETER OPTIMIZATION

Optimizing XGBoost...


Best trial: 0. Best value: 0.898858:   2%|▏         | 1/50 [00:07<05:44,  7.03s/it]

[I 2025-11-02 01:24:34,126] Trial 0 finished with value: 0.8988581176161583 and parameters: {'n_estimators': 874, 'max_depth': 10, 'learning_rate': 0.07259248719561363, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'gamma': 0.46798356100860794, 'min_child_weight': 1, 'reg_alpha': 1.7323522915498704, 'reg_lambda': 1.2022300234864176, 'scale_pos_weight': 3.1242177333881367}. Best is trial 0 with value: 0.8988581176161583.


Best trial: 0. Best value: 0.898858:   4%|▍         | 2/50 [00:11<04:33,  5.71s/it]

[I 2025-11-02 01:24:38,903] Trial 1 finished with value: 0.896233282899583 and parameters: {'n_estimators': 520, 'max_depth': 10, 'learning_rate': 0.09528587217040241, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'gamma': 0.5502135295603015, 'min_child_weight': 3, 'reg_alpha': 1.0495128632644757, 'reg_lambda': 0.8638900372842315, 'scale_pos_weight': 1.8736874205941256}. Best is trial 0 with value: 0.8988581176161583.


Best trial: 2. Best value: 0.902361:   6%|▌         | 3/50 [00:16<03:55,  5.02s/it]

[I 2025-11-02 01:24:43,102] Trial 2 finished with value: 0.9023609308958939 and parameters: {'n_estimators': 1112, 'max_depth': 4, 'learning_rate': 0.022059149678071027, 'subsample': 0.8099085529881075, 'colsample_bytree': 0.8368209952651108, 'gamma': 2.3555278841790406, 'min_child_weight': 2, 'reg_alpha': 1.0284688768272232, 'reg_lambda': 1.184829137724085, 'scale_pos_weight': 1.139351238159993}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:   8%|▊         | 4/50 [00:21<03:58,  5.18s/it]

[I 2025-11-02 01:24:48,515] Trial 3 finished with value: 0.9011976837683081 and parameters: {'n_estimators': 1108, 'max_depth': 5, 'learning_rate': 0.011926324174062874, 'subsample': 0.984665661176, 'colsample_bytree': 0.9896896099223678, 'gamma': 2.4251920443493833, 'min_child_weight': 3, 'reg_alpha': 0.19534422801276774, 'reg_lambda': 1.3684660530243138, 'scale_pos_weight': 2.3204574812188037}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  10%|█         | 5/50 [00:26<03:56,  5.26s/it]

[I 2025-11-02 01:24:53,923] Trial 4 finished with value: 0.9006701025053673 and parameters: {'n_estimators': 622, 'max_depth': 7, 'learning_rate': 0.01097599850212944, 'subsample': 0.9727961206236346, 'colsample_bytree': 0.777633994480005, 'gamma': 1.987566853061946, 'min_child_weight': 3, 'reg_alpha': 1.0401360423556216, 'reg_lambda': 1.0934205586865593, 'scale_pos_weight': 1.554563366576581}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  12%|█▏        | 6/50 [00:30<03:29,  4.77s/it]

[I 2025-11-02 01:24:57,751] Trial 5 finished with value: 0.8957220616054865 and parameters: {'n_estimators': 1470, 'max_depth': 9, 'learning_rate': 0.12733168701231962, 'subsample': 0.9684482051282947, 'colsample_bytree': 0.8793699936433255, 'gamma': 2.7656227050693505, 'min_child_weight': 1, 'reg_alpha': 0.3919657248382904, 'reg_lambda': 0.09045457782107613, 'scale_pos_weight': 1.975990992289793}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  14%|█▍        | 7/50 [00:34<03:12,  4.49s/it]

[I 2025-11-02 01:25:01,653] Trial 6 finished with value: 0.8953807172194532 and parameters: {'n_estimators': 889, 'max_depth': 5, 'learning_rate': 0.09433458442225107, 'subsample': 0.8070259980080767, 'colsample_bytree': 0.7842803529062142, 'gamma': 1.6280882494747453, 'min_child_weight': 2, 'reg_alpha': 1.6043939615080793, 'reg_lambda': 0.14910128735954165, 'scale_pos_weight': 3.960660809801552}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  16%|█▌        | 8/50 [00:39<03:12,  4.59s/it]

[I 2025-11-02 01:25:06,456] Trial 7 finished with value: 0.9019009256091435 and parameters: {'n_estimators': 1273, 'max_depth': 5, 'learning_rate': 0.010150665434429315, 'subsample': 0.9446384285364502, 'colsample_bytree': 0.9120572031542851, 'gamma': 2.1870215041229617, 'min_child_weight': 7, 'reg_alpha': 0.14808930346818072, 'reg_lambda': 0.7169314570885452, 'scale_pos_weight': 1.3476071785753891}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  18%|█▊        | 9/50 [00:49<04:19,  6.32s/it]

[I 2025-11-02 01:25:16,577] Trial 8 finished with value: 0.8997651801284026 and parameters: {'n_estimators': 1363, 'max_depth': 8, 'learning_rate': 0.02450001073565503, 'subsample': 0.7190675050858071, 'colsample_bytree': 0.7932946965146986, 'gamma': 0.9755499660802411, 'min_child_weight': 6, 'reg_alpha': 1.2751149427104262, 'reg_lambda': 1.774425485152653, 'scale_pos_weight': 2.4166447754858478}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  20%|██        | 10/50 [00:52<03:28,  5.20s/it]

[I 2025-11-02 01:25:19,286] Trial 9 finished with value: 0.8988278662492417 and parameters: {'n_estimators': 619, 'max_depth': 8, 'learning_rate': 0.07847885275945195, 'subsample': 0.8683831592708489, 'colsample_bytree': 0.9312901539863683, 'gamma': 1.4813867890931722, 'min_child_weight': 5, 'reg_alpha': 0.8550820367170993, 'reg_lambda': 0.05083825348819038, 'scale_pos_weight': 1.3236742809799134}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  22%|██▏       | 11/50 [00:55<02:56,  4.53s/it]

[I 2025-11-02 01:25:22,288] Trial 10 finished with value: 0.9017002012043196 and parameters: {'n_estimators': 1089, 'max_depth': 4, 'learning_rate': 0.03203999390196693, 'subsample': 0.8040552674219865, 'colsample_bytree': 0.839496718827331, 'gamma': 2.7229943858475534, 'min_child_weight': 8, 'reg_alpha': 0.6507199959425696, 'reg_lambda': 1.8939237527802986, 'scale_pos_weight': 1.0369335086948124}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  24%|██▍       | 12/50 [00:58<02:42,  4.29s/it]

[I 2025-11-02 01:25:26,018] Trial 11 finished with value: 0.9021576922710516 and parameters: {'n_estimators': 1265, 'max_depth': 4, 'learning_rate': 0.020394199090916543, 'subsample': 0.9111388754989113, 'colsample_bytree': 0.9143493935391572, 'gamma': 2.111764779276088, 'min_child_weight': 8, 'reg_alpha': 0.015706629031111807, 'reg_lambda': 0.725956143453564, 'scale_pos_weight': 1.020153697967718}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  26%|██▌       | 13/50 [01:02<02:32,  4.12s/it]

[I 2025-11-02 01:25:29,771] Trial 12 finished with value: 0.9022805908749663 and parameters: {'n_estimators': 1212, 'max_depth': 4, 'learning_rate': 0.02046663278344732, 'subsample': 0.9155537760742339, 'colsample_bytree': 0.8606483548444819, 'gamma': 1.7746154069403142, 'min_child_weight': 5, 'reg_alpha': 1.3725938031395708, 'reg_lambda': 0.5098980383998858, 'scale_pos_weight': 1.008855684663833}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  28%|██▊       | 14/50 [01:07<02:35,  4.33s/it]

[I 2025-11-02 01:25:34,568] Trial 13 finished with value: 0.8978316829229422 and parameters: {'n_estimators': 1033, 'max_depth': 6, 'learning_rate': 0.045543435073691005, 'subsample': 0.8270483459112697, 'colsample_bytree': 0.8414386741642014, 'gamma': 1.4561120073996463, 'min_child_weight': 5, 'reg_alpha': 1.3799535905436386, 'reg_lambda': 0.3705194787681688, 'scale_pos_weight': 3.0305712406932157}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  30%|███       | 15/50 [01:11<02:23,  4.09s/it]

[I 2025-11-02 01:25:38,120] Trial 14 finished with value: 0.9020290157875124 and parameters: {'n_estimators': 1222, 'max_depth': 4, 'learning_rate': 0.021889350752898277, 'subsample': 0.9133084657617713, 'colsample_bytree': 0.8287536870000553, 'gamma': 2.9506543830480942, 'min_child_weight': 4, 'reg_alpha': 1.938539704097436, 'reg_lambda': 1.468407954229782, 'scale_pos_weight': 1.790641954540134}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  32%|███▏      | 16/50 [01:15<02:24,  4.24s/it]

[I 2025-11-02 01:25:42,697] Trial 15 finished with value: 0.9016905010383631 and parameters: {'n_estimators': 896, 'max_depth': 6, 'learning_rate': 0.015048974494395545, 'subsample': 0.8495485438639273, 'colsample_bytree': 0.7014220358959269, 'gamma': 1.1037320392963843, 'min_child_weight': 6, 'reg_alpha': 1.3465435815293199, 'reg_lambda': 0.4539796344894438, 'scale_pos_weight': 1.03417889682232}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  34%|███▍      | 17/50 [01:22<02:44,  5.00s/it]

[I 2025-11-02 01:25:49,452] Trial 16 finished with value: 0.8971438488607223 and parameters: {'n_estimators': 1157, 'max_depth': 6, 'learning_rate': 0.044648907294122, 'subsample': 0.7639850006893366, 'colsample_bytree': 0.875790467670702, 'gamma': 0.07108623451749674, 'min_child_weight': 4, 'reg_alpha': 0.7484946788209734, 'reg_lambda': 0.48836239549778265, 'scale_pos_weight': 2.882259479844903}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  36%|███▌      | 18/50 [01:26<02:33,  4.78s/it]

[I 2025-11-02 01:25:53,738] Trial 17 finished with value: 0.9002691293145426 and parameters: {'n_estimators': 1498, 'max_depth': 4, 'learning_rate': 0.03172653090298434, 'subsample': 0.9003377348752277, 'colsample_bytree': 0.9671384915832449, 'gamma': 1.7285712229369785, 'min_child_weight': 2, 'reg_alpha': 1.183035341991451, 'reg_lambda': 1.5714023655372502, 'scale_pos_weight': 1.482605132018061}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  38%|███▊      | 19/50 [01:30<02:23,  4.64s/it]

[I 2025-11-02 01:25:58,054] Trial 18 finished with value: 0.9012980142366491 and parameters: {'n_estimators': 964, 'max_depth': 5, 'learning_rate': 0.016054577783810843, 'subsample': 0.7729774890291333, 'colsample_bytree': 0.877174533511485, 'gamma': 2.4206626977606254, 'min_child_weight': 5, 'reg_alpha': 1.522747246543489, 'reg_lambda': 0.9073210715005743, 'scale_pos_weight': 3.604681012739113}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  40%|████      | 20/50 [01:35<02:22,  4.74s/it]

[I 2025-11-02 01:26:03,010] Trial 19 finished with value: 0.8989068585340794 and parameters: {'n_estimators': 810, 'max_depth': 7, 'learning_rate': 0.03134910660911017, 'subsample': 0.9364881302341646, 'colsample_bytree': 0.8212506262708488, 'gamma': 1.131734923741272, 'min_child_weight': 6, 'reg_alpha': 0.5559202496926237, 'reg_lambda': 1.2807514190059028, 'scale_pos_weight': 2.144602325496663}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  42%|████▏     | 21/50 [01:42<02:36,  5.41s/it]

[I 2025-11-02 01:26:09,973] Trial 20 finished with value: 0.9013020866200929 and parameters: {'n_estimators': 1185, 'max_depth': 6, 'learning_rate': 0.016184820264413334, 'subsample': 0.7081689197479885, 'colsample_bytree': 0.8641331761693597, 'gamma': 1.8283527325770774, 'min_child_weight': 2, 'reg_alpha': 0.8888486436646618, 'reg_lambda': 0.6648331257996019, 'scale_pos_weight': 1.6358337260830937}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  44%|████▍     | 22/50 [01:46<02:16,  4.88s/it]

[I 2025-11-02 01:26:13,631] Trial 21 finished with value: 0.9022185347592983 and parameters: {'n_estimators': 1372, 'max_depth': 4, 'learning_rate': 0.0215609532954075, 'subsample': 0.9119248069424618, 'colsample_bytree': 0.9112544563736775, 'gamma': 2.1852574888180967, 'min_child_weight': 8, 'reg_alpha': 1.123479112029059, 'reg_lambda': 0.6782910920870073, 'scale_pos_weight': 1.0296822799431136}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  46%|████▌     | 23/50 [01:50<02:04,  4.61s/it]

[I 2025-11-02 01:26:17,606] Trial 22 finished with value: 0.9016911662771319 and parameters: {'n_estimators': 1404, 'max_depth': 4, 'learning_rate': 0.02520352518540367, 'subsample': 0.8470916839461203, 'colsample_bytree': 0.9444481721517656, 'gamma': 2.3700735615998894, 'min_child_weight': 7, 'reg_alpha': 1.0843860419100324, 'reg_lambda': 0.9901469178886743, 'scale_pos_weight': 1.270541334331373}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  48%|████▊     | 24/50 [01:54<01:58,  4.54s/it]

[I 2025-11-02 01:26:21,983] Trial 23 finished with value: 0.9021289995158348 and parameters: {'n_estimators': 1335, 'max_depth': 5, 'learning_rate': 0.018978533714944526, 'subsample': 0.883850534629757, 'colsample_bytree': 0.8956581395865374, 'gamma': 2.6369795656812465, 'min_child_weight': 7, 'reg_alpha': 1.4546071019310858, 'reg_lambda': 0.3026096934709095, 'scale_pos_weight': 1.2313624978822433}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  50%|█████     | 25/50 [01:59<01:50,  4.44s/it]

[I 2025-11-02 01:26:26,186] Trial 24 finished with value: 0.9018075480589193 and parameters: {'n_estimators': 1330, 'max_depth': 4, 'learning_rate': 0.014141350978680073, 'subsample': 0.9295438390777887, 'colsample_bytree': 0.8094172729347169, 'gamma': 1.9443464991318966, 'min_child_weight': 4, 'reg_alpha': 1.764537543303519, 'reg_lambda': 0.6709939066640738, 'scale_pos_weight': 2.6725529547184226}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  52%|█████▏    | 26/50 [02:02<01:37,  4.04s/it]

[I 2025-11-02 01:26:29,303] Trial 25 finished with value: 0.8992774780394235 and parameters: {'n_estimators': 1031, 'max_depth': 5, 'learning_rate': 0.0551421656729341, 'subsample': 0.8254403122025729, 'colsample_bytree': 0.8538474312719675, 'gamma': 2.2653663572715277, 'min_child_weight': 6, 'reg_alpha': 1.2026186262424952, 'reg_lambda': 0.5414418716385926, 'scale_pos_weight': 1.6476704856860334}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  54%|█████▍    | 27/50 [02:05<01:28,  3.86s/it]

[I 2025-11-02 01:26:32,729] Trial 26 finished with value: 0.9012681029229958 and parameters: {'n_estimators': 1129, 'max_depth': 4, 'learning_rate': 0.026495403226932116, 'subsample': 0.9497052896555559, 'colsample_bytree': 0.9022629849536483, 'gamma': 1.3633169775799125, 'min_child_weight': 8, 'reg_alpha': 0.8738408317508108, 'reg_lambda': 1.0583041731036742, 'scale_pos_weight': 1.2235248098179763}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  56%|█████▌    | 28/50 [02:09<01:28,  4.01s/it]

[I 2025-11-02 01:26:37,082] Trial 27 finished with value: 0.9008937556822805 and parameters: {'n_estimators': 1434, 'max_depth': 7, 'learning_rate': 0.0370933539501635, 'subsample': 0.8641174047163592, 'colsample_bytree': 0.9520638460945188, 'gamma': 2.0475771616833005, 'min_child_weight': 3, 'reg_alpha': 1.6317188253440738, 'reg_lambda': 0.24824549113108485, 'scale_pos_weight': 1.0117331251283304}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 2. Best value: 0.902361:  58%|█████▊    | 29/50 [02:13<01:23,  3.98s/it]

[I 2025-11-02 01:26:40,996] Trial 28 finished with value: 0.9011051720595136 and parameters: {'n_estimators': 1272, 'max_depth': 5, 'learning_rate': 0.01813729249352205, 'subsample': 0.9972550529720188, 'colsample_bytree': 0.8558231833769434, 'gamma': 2.51586993401647, 'min_child_weight': 1, 'reg_alpha': 1.1951077543377595, 'reg_lambda': 1.1794724335996793, 'scale_pos_weight': 2.1194039159057683}. Best is trial 2 with value: 0.9023609308958939.


Best trial: 29. Best value: 0.902385:  60%|██████    | 30/50 [02:17<01:19,  3.95s/it]

[I 2025-11-02 01:26:44,893] Trial 29 finished with value: 0.9023847373699987 and parameters: {'n_estimators': 1213, 'max_depth': 4, 'learning_rate': 0.01324003933777381, 'subsample': 0.8837116451321902, 'colsample_bytree': 0.9276747017943526, 'gamma': 2.995490062648261, 'min_child_weight': 7, 'reg_alpha': 1.9054205622745752, 'reg_lambda': 0.5530739078731997, 'scale_pos_weight': 1.462937716379769}. Best is trial 29 with value: 0.9023847373699987.


Best trial: 29. Best value: 0.902385:  62%|██████▏   | 31/50 [02:24<01:32,  4.85s/it]

[I 2025-11-02 01:26:51,820] Trial 30 finished with value: 0.90022007255016 and parameters: {'n_estimators': 940, 'max_depth': 9, 'learning_rate': 0.013611312730526135, 'subsample': 0.8861295790714148, 'colsample_bytree': 0.9988518933355586, 'gamma': 2.8698766953975676, 'min_child_weight': 7, 'reg_alpha': 1.9668844278736723, 'reg_lambda': 0.9057720099817193, 'scale_pos_weight': 1.46671194673158}. Best is trial 29 with value: 0.9023847373699987.


Best trial: 31. Best value: 0.902462:  64%|██████▍   | 32/50 [02:28<01:21,  4.54s/it]

[I 2025-11-02 01:26:55,647] Trial 31 finished with value: 0.9024616226358058 and parameters: {'n_estimators': 1215, 'max_depth': 4, 'learning_rate': 0.013004437275864339, 'subsample': 0.8989031774990877, 'colsample_bytree': 0.9345776046304488, 'gamma': 2.601170812425173, 'min_child_weight': 8, 'reg_alpha': 1.7938078139341376, 'reg_lambda': 0.5617102752434641, 'scale_pos_weight': 1.1977287241158403}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  66%|██████▌   | 33/50 [02:32<01:13,  4.35s/it]

[I 2025-11-02 01:26:59,540] Trial 32 finished with value: 0.9022290769147666 and parameters: {'n_estimators': 1222, 'max_depth': 4, 'learning_rate': 0.012486167153813421, 'subsample': 0.8915848314052669, 'colsample_bytree': 0.9636857433975984, 'gamma': 2.955955023512384, 'min_child_weight': 7, 'reg_alpha': 1.7963824904673662, 'reg_lambda': 0.5650943751896229, 'scale_pos_weight': 1.771748632193889}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  68%|██████▊   | 34/50 [02:36<01:09,  4.37s/it]

[I 2025-11-02 01:27:03,954] Trial 33 finished with value: 0.9022207770637172 and parameters: {'n_estimators': 1065, 'max_depth': 5, 'learning_rate': 0.012016344202869433, 'subsample': 0.832754724174599, 'colsample_bytree': 0.9321386042297639, 'gamma': 2.6142505980686512, 'min_child_weight': 8, 'reg_alpha': 1.8627512887114421, 'reg_lambda': 0.8533824581145212, 'scale_pos_weight': 1.2268774034253742}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  70%|███████   | 35/50 [02:40<01:02,  4.14s/it]

[I 2025-11-02 01:27:07,557] Trial 34 finished with value: 0.9024499017996181 and parameters: {'n_estimators': 1178, 'max_depth': 4, 'learning_rate': 0.01742381913319424, 'subsample': 0.789846853139116, 'colsample_bytree': 0.7557052224531497, 'gamma': 2.543725066381473, 'min_child_weight': 5, 'reg_alpha': 1.6711103638854798, 'reg_lambda': 0.3502215392609723, 'scale_pos_weight': 1.349863567687072}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  72%|███████▏  | 36/50 [02:43<00:54,  3.92s/it]

[I 2025-11-02 01:27:10,962] Trial 35 finished with value: 0.9019862820787757 and parameters: {'n_estimators': 816, 'max_depth': 5, 'learning_rate': 0.016762448259564813, 'subsample': 0.7841016140932097, 'colsample_bytree': 0.7391188830726835, 'gamma': 2.7310812563344915, 'min_child_weight': 6, 'reg_alpha': 1.6665406697375107, 'reg_lambda': 0.21454778862450175, 'scale_pos_weight': 1.908581442184776}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  74%|███████▍  | 37/50 [02:50<00:59,  4.58s/it]

[I 2025-11-02 01:27:17,101] Trial 36 finished with value: 0.9021084421324279 and parameters: {'n_estimators': 1129, 'max_depth': 6, 'learning_rate': 0.010710087185388533, 'subsample': 0.7412236780441611, 'colsample_bytree': 0.7436561855741727, 'gamma': 2.512630343186418, 'min_child_weight': 3, 'reg_alpha': 1.8598367300314345, 'reg_lambda': 0.3223874118353265, 'scale_pos_weight': 1.4476006861682986}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  76%|███████▌  | 38/50 [02:55<00:57,  4.77s/it]

[I 2025-11-02 01:27:22,296] Trial 37 finished with value: 0.9020165836150357 and parameters: {'n_estimators': 1283, 'max_depth': 5, 'learning_rate': 0.013408548003780043, 'subsample': 0.799832311962138, 'colsample_bytree': 0.7273793367062711, 'gamma': 2.8076051511039437, 'min_child_weight': 1, 'reg_alpha': 1.5414340153843313, 'reg_lambda': 0.41755119833837584, 'scale_pos_weight': 1.6584573557997497}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  78%|███████▊  | 39/50 [02:57<00:44,  4.06s/it]

[I 2025-11-02 01:27:24,721] Trial 38 finished with value: 0.8943943345845777 and parameters: {'n_estimators': 989, 'max_depth': 4, 'learning_rate': 0.13687864839696998, 'subsample': 0.7384945943392751, 'colsample_bytree': 0.7644120514890252, 'gamma': 2.985349871359613, 'min_child_weight': 7, 'reg_alpha': 1.6723341384229025, 'reg_lambda': 0.7945836594860458, 'scale_pos_weight': 2.0477005085721576}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  80%|████████  | 40/50 [03:12<01:13,  7.33s/it]

[I 2025-11-02 01:27:39,665] Trial 39 finished with value: 0.8997715758231315 and parameters: {'n_estimators': 1080, 'max_depth': 10, 'learning_rate': 0.01019352716979719, 'subsample': 0.8680859133976012, 'colsample_bytree': 0.9760562115470809, 'gamma': 2.2697159093679318, 'min_child_weight': 2, 'reg_alpha': 0.45035687523077117, 'reg_lambda': 0.15096362228632662, 'scale_pos_weight': 2.247368092577847}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  82%|████████▏ | 41/50 [03:17<00:59,  6.60s/it]

[I 2025-11-02 01:27:44,560] Trial 40 finished with value: 0.901606674341463 and parameters: {'n_estimators': 1165, 'max_depth': 5, 'learning_rate': 0.011900110916282045, 'subsample': 0.789904612556925, 'colsample_bytree': 0.8056084293155411, 'gamma': 2.583210582775715, 'min_child_weight': 3, 'reg_alpha': 1.9864986623103191, 'reg_lambda': 1.1829155589761358, 'scale_pos_weight': 3.2989627819244607}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  84%|████████▍ | 42/50 [03:20<00:44,  5.59s/it]

[I 2025-11-02 01:27:47,798] Trial 41 finished with value: 0.9022452622061357 and parameters: {'n_estimators': 1211, 'max_depth': 4, 'learning_rate': 0.0181400862035274, 'subsample': 0.9598066774377352, 'colsample_bytree': 0.7740648204033984, 'gamma': 2.3850186765915735, 'min_child_weight': 5, 'reg_alpha': 1.7762171442878851, 'reg_lambda': 0.5954697472063812, 'scale_pos_weight': 1.1833999432895081}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  86%|████████▌ | 43/50 [03:24<00:35,  5.03s/it]

[I 2025-11-02 01:27:51,528] Trial 42 finished with value: 0.9015978680714237 and parameters: {'n_estimators': 1317, 'max_depth': 4, 'learning_rate': 0.027195669282074408, 'subsample': 0.9261404701902635, 'colsample_bytree': 0.9241847838219762, 'gamma': 1.8564371758404008, 'min_child_weight': 4, 'reg_alpha': 1.4866685739407144, 'reg_lambda': 0.479350184679181, 'scale_pos_weight': 1.3986046383056683}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  88%|████████▊ | 44/50 [03:28<00:28,  4.70s/it]

[I 2025-11-02 01:27:55,464] Trial 43 finished with value: 0.9016975283699811 and parameters: {'n_estimators': 1233, 'max_depth': 4, 'learning_rate': 0.022960666298224856, 'subsample': 0.8125492046039354, 'colsample_bytree': 0.8928121464386359, 'gamma': 0.7133843367490774, 'min_child_weight': 5, 'reg_alpha': 1.8665482971274303, 'reg_lambda': 0.025899036253090313, 'scale_pos_weight': 1.1666798241150782}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  90%|█████████ | 45/50 [03:31<00:21,  4.29s/it]

[I 2025-11-02 01:27:58,782] Trial 44 finished with value: 0.90245535124375 and parameters: {'n_estimators': 1119, 'max_depth': 4, 'learning_rate': 0.019056461784876026, 'subsample': 0.8732188985160834, 'colsample_bytree': 0.7938438952570411, 'gamma': 2.7820648472229004, 'min_child_weight': 6, 'reg_alpha': 1.3876483788158747, 'reg_lambda': 1.4318595707515844, 'scale_pos_weight': 1.5594244609248895}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  92%|█████████▏| 46/50 [03:35<00:16,  4.24s/it]

[I 2025-11-02 01:28:02,926] Trial 45 finished with value: 0.9022690874143624 and parameters: {'n_estimators': 1048, 'max_depth': 5, 'learning_rate': 0.014871939153450028, 'subsample': 0.8676300305113733, 'colsample_bytree': 0.7913207426299632, 'gamma': 2.8039144671542697, 'min_child_weight': 6, 'reg_alpha': 1.7155943443304873, 'reg_lambda': 1.5886531968899358, 'scale_pos_weight': 1.5873035653695908}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  94%|█████████▍| 47/50 [03:42<00:14,  4.91s/it]

[I 2025-11-02 01:28:09,384] Trial 46 finished with value: 0.900843237656391 and parameters: {'n_estimators': 1104, 'max_depth': 9, 'learning_rate': 0.017908538396747945, 'subsample': 0.8429616622010094, 'colsample_bytree': 0.7588172455426361, 'gamma': 2.684362691815164, 'min_child_weight': 6, 'reg_alpha': 1.5665178875095724, 'reg_lambda': 1.3981283870821448, 'scale_pos_weight': 1.770339748211422}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 31. Best value: 0.902462:  96%|█████████▌| 48/50 [03:45<00:08,  4.50s/it]

[I 2025-11-02 01:28:12,917] Trial 47 finished with value: 0.9024171865314514 and parameters: {'n_estimators': 1161, 'max_depth': 4, 'learning_rate': 0.01295570170645865, 'subsample': 0.8135531423706579, 'colsample_bytree': 0.7218411072944528, 'gamma': 2.855090035345301, 'min_child_weight': 7, 'reg_alpha': 1.4316958170181218, 'reg_lambda': 1.7362588228980311, 'scale_pos_weight': 1.3663467329934775}. Best is trial 31 with value: 0.9024616226358058.


Best trial: 48. Best value: 0.902639:  98%|█████████▊| 49/50 [03:49<00:04,  4.21s/it]

[I 2025-11-02 01:28:16,465] Trial 48 finished with value: 0.9026389459370605 and parameters: {'n_estimators': 1170, 'max_depth': 4, 'learning_rate': 0.011838767757643854, 'subsample': 0.8593982167178645, 'colsample_bytree': 0.7108975034601005, 'gamma': 2.827547621382358, 'min_child_weight': 7, 'reg_alpha': 1.3030642388497118, 'reg_lambda': 1.9802619016393284, 'scale_pos_weight': 1.373001409156999}. Best is trial 48 with value: 0.9026389459370605.


Best trial: 48. Best value: 0.902639: 100%|██████████| 50/50 [03:53<00:00,  4.67s/it]
[I 2025-11-02 01:28:20,619] A new study created in memory with name: no-name-0e751eae-918a-47c0-9b3b-e687f340fd91


[I 2025-11-02 01:28:20,616] Trial 49 finished with value: 0.9002124784506513 and parameters: {'n_estimators': 504, 'max_depth': 8, 'learning_rate': 0.011854248243604384, 'subsample': 0.8554327736842552, 'colsample_bytree': 0.7022390747417447, 'gamma': 2.517535033830394, 'min_child_weight': 8, 'reg_alpha': 1.3037972978330816, 'reg_lambda': 1.9631655877969876, 'scale_pos_weight': 1.317306056791044}. Best is trial 48 with value: 0.9026389459370605.
Best XGBoost AUC: 0.90264

Optimizing ExtraTrees...


Best trial: 0. Best value: 0.893995:   2%|▏         | 1/50 [00:05<04:08,  5.08s/it]

[I 2025-11-02 01:28:25,695] Trial 0 finished with value: 0.8939946938885239 and parameters: {'n_estimators': 487, 'max_depth': 25, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 0.7, 'class_weight': 'balanced'}. Best is trial 0 with value: 0.8939946938885239.


Best trial: 1. Best value: 0.894616:   4%|▍         | 2/50 [00:08<03:18,  4.13s/it]

[I 2025-11-02 01:28:29,169] Trial 1 finished with value: 0.8946158027240887 and parameters: {'n_estimators': 310, 'max_depth': 25, 'min_samples_split': 9, 'min_samples_leaf': 2, 'max_features': 0.7, 'class_weight': None}. Best is trial 1 with value: 0.8946158027240887.


Best trial: 1. Best value: 0.894616:   6%|▌         | 3/50 [00:09<02:15,  2.88s/it]

[I 2025-11-02 01:28:30,564] Trial 2 finished with value: 0.8865588152646217 and parameters: {'n_estimators': 606, 'max_depth': 12, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'class_weight': None}. Best is trial 1 with value: 0.8946158027240887.


Best trial: 1. Best value: 0.894616:   8%|▊         | 4/50 [00:11<01:50,  2.41s/it]

[I 2025-11-02 01:28:32,247] Trial 3 finished with value: 0.8872253926918683 and parameters: {'n_estimators': 604, 'max_depth': 12, 'min_samples_split': 2, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 1 with value: 0.8946158027240887.


Best trial: 1. Best value: 0.894616:  10%|█         | 5/50 [00:12<01:25,  1.89s/it]

[I 2025-11-02 01:28:33,214] Trial 4 finished with value: 0.8886908407800759 and parameters: {'n_estimators': 361, 'max_depth': 17, 'min_samples_split': 2, 'min_samples_leaf': 5, 'max_features': 'log2', 'class_weight': None}. Best is trial 1 with value: 0.8946158027240887.


Best trial: 1. Best value: 0.894616:  12%|█▏        | 6/50 [00:14<01:23,  1.91s/it]

[I 2025-11-02 01:28:35,160] Trial 5 finished with value: 0.8881533322256331 and parameters: {'n_estimators': 785, 'max_depth': 22, 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_features': 'log2', 'class_weight': 'balanced'}. Best is trial 1 with value: 0.8946158027240887.


Best trial: 1. Best value: 0.894616:  14%|█▍        | 7/50 [00:19<01:58,  2.77s/it]

[I 2025-11-02 01:28:39,693] Trial 6 finished with value: 0.8923187823044018 and parameters: {'n_estimators': 494, 'max_depth': 14, 'min_samples_split': 9, 'min_samples_leaf': 2, 'max_features': 0.7, 'class_weight': 'balanced'}. Best is trial 1 with value: 0.8946158027240887.


Best trial: 1. Best value: 0.894616:  16%|█▌        | 8/50 [00:23<02:20,  3.35s/it]

[I 2025-11-02 01:28:44,288] Trial 7 finished with value: 0.8924016408039517 and parameters: {'n_estimators': 686, 'max_depth': 13, 'min_samples_split': 2, 'min_samples_leaf': 5, 'max_features': 0.5, 'class_weight': None}. Best is trial 1 with value: 0.8946158027240887.


Best trial: 8. Best value: 0.896178:  18%|█▊        | 9/50 [00:30<02:56,  4.29s/it]

[I 2025-11-02 01:28:50,661] Trial 8 finished with value: 0.8961781749017634 and parameters: {'n_estimators': 732, 'max_depth': 19, 'min_samples_split': 4, 'min_samples_leaf': 1, 'max_features': 0.5, 'class_weight': None}. Best is trial 8 with value: 0.8961781749017634.


Best trial: 8. Best value: 0.896178:  20%|██        | 10/50 [00:31<02:12,  3.32s/it]

[I 2025-11-02 01:28:51,800] Trial 9 finished with value: 0.893318247968395 and parameters: {'n_estimators': 359, 'max_depth': 21, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'class_weight': 'balanced'}. Best is trial 8 with value: 0.8961781749017634.


Best trial: 8. Best value: 0.896178:  22%|██▏       | 11/50 [00:37<02:50,  4.36s/it]

[I 2025-11-02 01:28:58,516] Trial 10 finished with value: 0.8960961258171404 and parameters: {'n_estimators': 790, 'max_depth': 18, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 0.5, 'class_weight': None}. Best is trial 8 with value: 0.8961781749017634.


Best trial: 8. Best value: 0.896178:  24%|██▍       | 12/50 [00:44<03:13,  5.09s/it]

[I 2025-11-02 01:29:05,271] Trial 11 finished with value: 0.896100168726562 and parameters: {'n_estimators': 794, 'max_depth': 18, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 0.5, 'class_weight': None}. Best is trial 8 with value: 0.8961781749017634.


Best trial: 8. Best value: 0.896178:  26%|██▌       | 13/50 [00:50<03:16,  5.31s/it]

[I 2025-11-02 01:29:11,106] Trial 12 finished with value: 0.895673581041429 and parameters: {'n_estimators': 709, 'max_depth': 17, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_features': 0.5, 'class_weight': None}. Best is trial 8 with value: 0.8961781749017634.


Best trial: 8. Best value: 0.896178:  28%|██▊       | 14/50 [00:56<03:22,  5.64s/it]

[I 2025-11-02 01:29:17,487] Trial 13 finished with value: 0.8961545146210504 and parameters: {'n_estimators': 712, 'max_depth': 20, 'min_samples_split': 4, 'min_samples_leaf': 1, 'max_features': 0.5, 'class_weight': None}. Best is trial 8 with value: 0.8961781749017634.


Best trial: 14. Best value: 0.896404:  30%|███       | 15/50 [01:02<03:21,  5.77s/it]

[I 2025-11-02 01:29:23,557] Trial 14 finished with value: 0.8964042703133455 and parameters: {'n_estimators': 686, 'max_depth': 21, 'min_samples_split': 4, 'min_samples_leaf': 1, 'max_features': 0.5, 'class_weight': None}. Best is trial 14 with value: 0.8964042703133455.


Best trial: 14. Best value: 0.896404:  32%|███▏      | 16/50 [01:07<03:06,  5.50s/it]

[I 2025-11-02 01:29:28,434] Trial 15 finished with value: 0.8946683099289536 and parameters: {'n_estimators': 625, 'max_depth': 23, 'min_samples_split': 4, 'min_samples_leaf': 4, 'max_features': 0.5, 'class_weight': None}. Best is trial 14 with value: 0.8964042703133455.


Best trial: 14. Best value: 0.896404:  34%|███▍      | 17/50 [01:13<03:02,  5.53s/it]

[I 2025-11-02 01:29:34,045] Trial 16 finished with value: 0.8961058058294402 and parameters: {'n_estimators': 665, 'max_depth': 20, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 0.5, 'class_weight': None}. Best is trial 14 with value: 0.8964042703133455.


Best trial: 14. Best value: 0.896404:  36%|███▌      | 18/50 [01:17<02:44,  5.16s/it]

[I 2025-11-02 01:29:38,323] Trial 17 finished with value: 0.8952306485353809 and parameters: {'n_estimators': 535, 'max_depth': 15, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_features': 0.5, 'class_weight': None}. Best is trial 14 with value: 0.8964042703133455.


Best trial: 14. Best value: 0.896404:  38%|███▊      | 19/50 [01:19<02:11,  4.26s/it]

[I 2025-11-02 01:29:40,483] Trial 18 finished with value: 0.8922499308016076 and parameters: {'n_estimators': 742, 'max_depth': 23, 'min_samples_split': 3, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'class_weight': 'balanced'}. Best is trial 14 with value: 0.8964042703133455.


Best trial: 14. Best value: 0.896404:  40%|████      | 20/50 [01:23<02:03,  4.10s/it]

[I 2025-11-02 01:29:44,221] Trial 19 finished with value: 0.8900671955110167 and parameters: {'n_estimators': 653, 'max_depth': 10, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 0.5, 'class_weight': None}. Best is trial 14 with value: 0.8964042703133455.


Best trial: 14. Best value: 0.896404:  42%|████▏     | 21/50 [01:28<02:02,  4.23s/it]

[I 2025-11-02 01:29:48,759] Trial 20 finished with value: 0.8951190241372228 and parameters: {'n_estimators': 565, 'max_depth': 19, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_features': 0.5, 'class_weight': None}. Best is trial 14 with value: 0.8964042703133455.


Best trial: 14. Best value: 0.896404:  44%|████▍     | 22/50 [01:34<02:19,  4.97s/it]

[I 2025-11-02 01:29:55,463] Trial 21 finished with value: 0.8961537562159807 and parameters: {'n_estimators': 734, 'max_depth': 20, 'min_samples_split': 4, 'min_samples_leaf': 1, 'max_features': 0.5, 'class_weight': None}. Best is trial 14 with value: 0.8964042703133455.


Best trial: 14. Best value: 0.896404:  46%|████▌     | 23/50 [01:41<02:23,  5.33s/it]

[I 2025-11-02 01:30:01,620] Trial 22 finished with value: 0.895684837615071 and parameters: {'n_estimators': 737, 'max_depth': 16, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_features': 0.5, 'class_weight': None}. Best is trial 14 with value: 0.8964042703133455.


Best trial: 14. Best value: 0.896404:  48%|████▊     | 24/50 [01:47<02:27,  5.66s/it]

[I 2025-11-02 01:30:08,063] Trial 23 finished with value: 0.8963924963193204 and parameters: {'n_estimators': 710, 'max_depth': 21, 'min_samples_split': 4, 'min_samples_leaf': 1, 'max_features': 0.5, 'class_weight': None}. Best is trial 14 with value: 0.8964042703133455.


Best trial: 24. Best value: 0.896413:  50%|█████     | 25/50 [01:52<02:19,  5.59s/it]

[I 2025-11-02 01:30:13,495] Trial 24 finished with value: 0.8964132057797165 and parameters: {'n_estimators': 638, 'max_depth': 22, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 0.5, 'class_weight': None}. Best is trial 24 with value: 0.8964132057797165.


Best trial: 25. Best value: 0.896503:  52%|█████▏    | 26/50 [01:58<02:13,  5.57s/it]

[I 2025-11-02 01:30:19,001] Trial 25 finished with value: 0.896502717768961 and parameters: {'n_estimators': 651, 'max_depth': 23, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 0.5, 'class_weight': None}. Best is trial 25 with value: 0.896502717768961.


Best trial: 25. Best value: 0.896503:  54%|█████▍    | 27/50 [01:59<01:39,  4.34s/it]

[I 2025-11-02 01:30:20,472] Trial 26 finished with value: 0.8949649454918751 and parameters: {'n_estimators': 550, 'max_depth': 24, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'log2', 'class_weight': 'balanced'}. Best is trial 25 with value: 0.896502717768961.


Best trial: 27. Best value: 0.896649:  56%|█████▌    | 28/50 [02:01<01:19,  3.63s/it]

[I 2025-11-02 01:30:22,438] Trial 27 finished with value: 0.8966486551028667 and parameters: {'n_estimators': 633, 'max_depth': 23, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 27 with value: 0.8966486551028667.


Best trial: 27. Best value: 0.896649:  58%|█████▊    | 29/50 [02:03<01:05,  3.12s/it]

[I 2025-11-02 01:30:24,372] Trial 28 finished with value: 0.8953164256729025 and parameters: {'n_estimators': 630, 'max_depth': 24, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 27 with value: 0.8966486551028667.


Best trial: 27. Best value: 0.896649:  60%|██████    | 30/50 [02:05<00:52,  2.62s/it]

[I 2025-11-02 01:30:25,822] Trial 29 finished with value: 0.8934574399537716 and parameters: {'n_estimators': 458, 'max_depth': 25, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'class_weight': 'balanced'}. Best is trial 27 with value: 0.8966486551028667.


Best trial: 27. Best value: 0.896649:  62%|██████▏   | 31/50 [02:11<01:10,  3.72s/it]

[I 2025-11-02 01:30:32,123] Trial 30 finished with value: 0.8954717813764571 and parameters: {'n_estimators': 583, 'max_depth': 23, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 0.7, 'class_weight': None}. Best is trial 27 with value: 0.8966486551028667.


Best trial: 31. Best value: 0.897351:  64%|██████▍   | 32/50 [02:13<00:58,  3.27s/it]

[I 2025-11-02 01:30:34,325] Trial 31 finished with value: 0.8973506208194 and parameters: {'n_estimators': 648, 'max_depth': 22, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 31 with value: 0.8973506208194.


Best trial: 32. Best value: 0.897358:  66%|██████▌   | 33/50 [02:15<00:50,  2.94s/it]

[I 2025-11-02 01:30:36,515] Trial 32 finished with value: 0.8973577744148902 and parameters: {'n_estimators': 651, 'max_depth': 22, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 32 with value: 0.8973577744148902.


Best trial: 32. Best value: 0.897358:  68%|██████▊   | 34/50 [02:17<00:42,  2.65s/it]

[I 2025-11-02 01:30:38,473] Trial 33 finished with value: 0.8968006395983867 and parameters: {'n_estimators': 593, 'max_depth': 24, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 32 with value: 0.8973577744148902.


Best trial: 32. Best value: 0.897358:  70%|███████   | 35/50 [02:19<00:35,  2.37s/it]

[I 2025-11-02 01:30:40,200] Trial 34 finished with value: 0.8956280588436355 and parameters: {'n_estimators': 510, 'max_depth': 25, 'min_samples_split': 6, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 32 with value: 0.8973577744148902.


Best trial: 32. Best value: 0.897358:  72%|███████▏  | 36/50 [02:21<00:31,  2.26s/it]

[I 2025-11-02 01:30:42,214] Trial 35 finished with value: 0.8967765802042973 and parameters: {'n_estimators': 601, 'max_depth': 24, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 32 with value: 0.8973577744148902.


Best trial: 32. Best value: 0.897358:  74%|███████▍  | 37/50 [02:23<00:28,  2.18s/it]

[I 2025-11-02 01:30:44,194] Trial 36 finished with value: 0.8967765802042973 and parameters: {'n_estimators': 601, 'max_depth': 24, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 32 with value: 0.8973577744148902.


Best trial: 32. Best value: 0.897358:  76%|███████▌  | 38/50 [02:24<00:23,  1.94s/it]

[I 2025-11-02 01:30:45,586] Trial 37 finished with value: 0.8944815696215507 and parameters: {'n_estimators': 432, 'max_depth': 22, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 32 with value: 0.8973577744148902.


Best trial: 32. Best value: 0.897358:  78%|███████▊  | 39/50 [02:26<00:21,  1.94s/it]

[I 2025-11-02 01:30:47,516] Trial 38 finished with value: 0.8921306427840987 and parameters: {'n_estimators': 578, 'max_depth': 24, 'min_samples_split': 6, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'class_weight': 'balanced'}. Best is trial 32 with value: 0.8973577744148902.


Best trial: 32. Best value: 0.897358:  80%|████████  | 40/50 [02:28<00:18,  1.89s/it]

[I 2025-11-02 01:30:49,288] Trial 39 finished with value: 0.896340912689854 and parameters: {'n_estimators': 530, 'max_depth': 25, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 32 with value: 0.8973577744148902.


Best trial: 32. Best value: 0.897358:  82%|████████▏ | 41/50 [02:30<00:17,  1.92s/it]

[I 2025-11-02 01:30:51,289] Trial 40 finished with value: 0.8968672606760253 and parameters: {'n_estimators': 605, 'max_depth': 22, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 32 with value: 0.8973577744148902.


Best trial: 32. Best value: 0.897358:  84%|████████▍ | 42/50 [02:32<00:15,  1.92s/it]

[I 2025-11-02 01:30:53,196] Trial 41 finished with value: 0.8968849484140844 and parameters: {'n_estimators': 603, 'max_depth': 22, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 32 with value: 0.8973577744148902.


Best trial: 32. Best value: 0.897358:  86%|████████▌ | 43/50 [02:34<00:14,  2.00s/it]

[I 2025-11-02 01:30:55,391] Trial 42 finished with value: 0.896863228525182 and parameters: {'n_estimators': 684, 'max_depth': 22, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 32 with value: 0.8973577744148902.


Best trial: 43. Best value: 0.897371:  88%|████████▊ | 44/50 [02:36<00:12,  2.05s/it]

[I 2025-11-02 01:30:57,569] Trial 43 finished with value: 0.8973705607239081 and parameters: {'n_estimators': 671, 'max_depth': 22, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 43 with value: 0.8973705607239081.


Best trial: 43. Best value: 0.897371:  90%|█████████ | 45/50 [02:44<00:18,  3.64s/it]

[I 2025-11-02 01:31:04,921] Trial 44 finished with value: 0.8959527269847667 and parameters: {'n_estimators': 674, 'max_depth': 21, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 0.7, 'class_weight': None}. Best is trial 43 with value: 0.8973705607239081.


Best trial: 43. Best value: 0.897371:  92%|█████████▏| 46/50 [02:46<00:12,  3.12s/it]

[I 2025-11-02 01:31:06,813] Trial 45 finished with value: 0.895171099504702 and parameters: {'n_estimators': 613, 'max_depth': 19, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 43 with value: 0.8973705607239081.


Best trial: 43. Best value: 0.897371:  94%|█████████▍| 47/50 [02:48<00:08,  2.79s/it]

[I 2025-11-02 01:31:08,829] Trial 46 finished with value: 0.8930012070430218 and parameters: {'n_estimators': 767, 'max_depth': 22, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'log2', 'class_weight': 'balanced'}. Best is trial 43 with value: 0.8973705607239081.


Best trial: 43. Best value: 0.897371:  96%|█████████▌| 48/50 [02:49<00:04,  2.48s/it]

[I 2025-11-02 01:31:10,576] Trial 47 finished with value: 0.8966279403565718 and parameters: {'n_estimators': 556, 'max_depth': 20, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 43 with value: 0.8973705607239081.


Best trial: 48. Best value: 0.897431:  98%|█████████▊| 49/50 [02:52<00:02,  2.38s/it]

[I 2025-11-02 01:31:12,728] Trial 48 finished with value: 0.8974312278996244 and parameters: {'n_estimators': 694, 'max_depth': 21, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 48 with value: 0.8974312278996244.


Best trial: 48. Best value: 0.897431: 100%|██████████| 50/50 [02:54<00:00,  3.48s/it]
[I 2025-11-02 01:31:14,833] A new study created in memory with name: no-name-a530d65f-d725-4bea-8208-782a8885ab30


[I 2025-11-02 01:31:14,830] Trial 49 finished with value: 0.89527408924709 and parameters: {'n_estimators': 691, 'max_depth': 21, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'class_weight': None}. Best is trial 48 with value: 0.8974312278996244.
Best ExtraTrees AUC: 0.89743

Optimizing Logistic Regression...


Best trial: 0. Best value: 0.623805:   3%|▎         | 1/30 [00:22<10:42, 22.15s/it]

[I 2025-11-02 01:31:36,984] Trial 0 finished with value: 0.6238046730514661 and parameters: {'penalty': 'elasticnet', 'C': 8.471801418819979, 'class_weight': None, 'l1_ratio': 0.22479561626896213}. Best is trial 0 with value: 0.6238046730514661.


Best trial: 0. Best value: 0.623805:   7%|▋         | 2/30 [00:46<11:00, 23.59s/it]

[I 2025-11-02 01:32:01,580] Trial 1 finished with value: 0.6238046729767538 and parameters: {'penalty': 'elasticnet', 'C': 2.5378155082656657, 'class_weight': None, 'l1_ratio': 0.8759278817295955}. Best is trial 0 with value: 0.6238046730514661.


Best trial: 2. Best value: 0.804081:  10%|█         | 3/30 [00:50<06:33, 14.56s/it]

[I 2025-11-02 01:32:05,392] Trial 2 finished with value: 0.8040810056601547 and parameters: {'penalty': 'l2', 'C': 0.053370327626039576, 'class_weight': 'balanced'}. Best is trial 2 with value: 0.8040810056601547.


Best trial: 2. Best value: 0.804081:  13%|█▎        | 4/30 [00:54<04:29, 10.35s/it]

[I 2025-11-02 01:32:09,301] Trial 3 finished with value: 0.8031728306062993 and parameters: {'penalty': 'l2', 'C': 0.14618962793704965, 'class_weight': None}. Best is trial 2 with value: 0.8040810056601547.


Best trial: 2. Best value: 0.804081:  17%|█▋        | 5/30 [01:16<06:02, 14.50s/it]

[I 2025-11-02 01:32:31,159] Trial 4 finished with value: 0.6238046211263825 and parameters: {'penalty': 'elasticnet', 'C': 0.6672367170464207, 'class_weight': None, 'l1_ratio': 0.5113875507308893}. Best is trial 2 with value: 0.8040810056601547.


Best trial: 2. Best value: 0.804081:  20%|██        | 6/30 [01:20<04:20, 10.85s/it]

[I 2025-11-02 01:32:34,924] Trial 5 finished with value: 0.7909829069117338 and parameters: {'penalty': 'l2', 'C': 2.69264691008618, 'class_weight': None}. Best is trial 2 with value: 0.8040810056601547.


Best trial: 2. Best value: 0.804081:  23%|██▎       | 7/30 [01:41<05:29, 14.31s/it]

[I 2025-11-02 01:32:56,339] Trial 6 finished with value: 0.6238046730514661 and parameters: {'penalty': 'elasticnet', 'C': 17.12337597316399, 'class_weight': None, 'l1_ratio': 0.6473864212097256}. Best is trial 2 with value: 0.8040810056601547.


Best trial: 2. Best value: 0.804081:  27%|██▋       | 8/30 [01:45<04:01, 11.00s/it]

[I 2025-11-02 01:33:00,252] Trial 7 finished with value: 0.7956411041056602 and parameters: {'penalty': 'l2', 'C': 0.9565499215943827, 'class_weight': 'balanced'}. Best is trial 2 with value: 0.8040810056601547.


Best trial: 2. Best value: 0.804081:  30%|███       | 9/30 [02:06<04:57, 14.16s/it]

[I 2025-11-02 01:33:21,383] Trial 8 finished with value: 0.6238997619627904 and parameters: {'penalty': 'elasticnet', 'C': 0.17654048052495078, 'class_weight': 'balanced', 'l1_ratio': 0.24788356442042164}. Best is trial 2 with value: 0.8040810056601547.


Best trial: 2. Best value: 0.804081:  33%|███▎      | 10/30 [02:10<03:39, 10.96s/it]

[I 2025-11-02 01:33:25,177] Trial 9 finished with value: 0.7989816251257293 and parameters: {'penalty': 'l2', 'C': 57.27904470799624, 'class_weight': None}. Best is trial 2 with value: 0.8040810056601547.


Best trial: 2. Best value: 0.804081:  37%|███▋      | 11/30 [02:14<02:48,  8.87s/it]

[I 2025-11-02 01:33:29,297] Trial 10 finished with value: 0.7984241819339992 and parameters: {'penalty': 'l2', 'C': 0.010567900894501657, 'class_weight': 'balanced'}. Best is trial 2 with value: 0.8040810056601547.


Best trial: 2. Best value: 0.804081:  40%|████      | 12/30 [02:18<02:12,  7.37s/it]

[I 2025-11-02 01:33:33,244] Trial 11 finished with value: 0.7996781207775218 and parameters: {'penalty': 'l2', 'C': 0.04171259042190411, 'class_weight': 'balanced'}. Best is trial 2 with value: 0.8040810056601547.


Best trial: 12. Best value: 0.805381:  43%|████▎     | 13/30 [02:22<01:46,  6.29s/it]

[I 2025-11-02 01:33:37,045] Trial 12 finished with value: 0.8053809218117539 and parameters: {'penalty': 'l2', 'C': 0.11609406701497008, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  47%|████▋     | 14/30 [02:26<01:30,  5.63s/it]

[I 2025-11-02 01:33:41,133] Trial 13 finished with value: 0.7930096358662664 and parameters: {'penalty': 'l2', 'C': 0.047988763373152984, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  50%|█████     | 15/30 [02:30<01:16,  5.07s/it]

[I 2025-11-02 01:33:44,927] Trial 14 finished with value: 0.8019714925822168 and parameters: {'penalty': 'l2', 'C': 0.21620010846943202, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  53%|█████▎    | 16/30 [02:33<01:06,  4.72s/it]

[I 2025-11-02 01:33:48,831] Trial 15 finished with value: 0.8049322443742275 and parameters: {'penalty': 'l2', 'C': 0.01108107754857325, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  57%|█████▋    | 17/30 [02:37<00:58,  4.49s/it]

[I 2025-11-02 01:33:52,773] Trial 16 finished with value: 0.8008623456762327 and parameters: {'penalty': 'l2', 'C': 0.01034090192099258, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  60%|██████    | 18/30 [02:41<00:51,  4.30s/it]

[I 2025-11-02 01:33:56,626] Trial 17 finished with value: 0.8039554875665196 and parameters: {'penalty': 'l2', 'C': 0.029906387116757097, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  63%|██████▎   | 19/30 [02:45<00:46,  4.20s/it]

[I 2025-11-02 01:34:00,587] Trial 18 finished with value: 0.795776810162188 and parameters: {'penalty': 'l2', 'C': 0.37425304567214623, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  67%|██████▋   | 20/30 [02:49<00:41,  4.11s/it]

[I 2025-11-02 01:34:04,485] Trial 19 finished with value: 0.8041452606350097 and parameters: {'penalty': 'l2', 'C': 0.020774416831623982, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  70%|███████   | 21/30 [02:53<00:36,  4.09s/it]

[I 2025-11-02 01:34:08,550] Trial 20 finished with value: 0.8044178159964523 and parameters: {'penalty': 'l2', 'C': 0.07892357904221876, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  73%|███████▎  | 22/30 [02:57<00:32,  4.10s/it]

[I 2025-11-02 01:34:12,667] Trial 21 finished with value: 0.7999766563875845 and parameters: {'penalty': 'l2', 'C': 0.09312609920168025, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  77%|███████▋  | 23/30 [03:01<00:28,  4.01s/it]

[I 2025-11-02 01:34:16,476] Trial 22 finished with value: 0.7988494864718686 and parameters: {'penalty': 'l2', 'C': 0.08040523443525752, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  80%|████████  | 24/30 [03:05<00:23,  3.98s/it]

[I 2025-11-02 01:34:20,385] Trial 23 finished with value: 0.8035298866821119 and parameters: {'penalty': 'l2', 'C': 0.017680601553800107, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  83%|████████▎ | 25/30 [03:09<00:19,  3.99s/it]

[I 2025-11-02 01:34:24,399] Trial 24 finished with value: 0.7951291593531961 and parameters: {'penalty': 'l2', 'C': 0.3980364072244555, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  87%|████████▋ | 26/30 [03:13<00:15,  3.95s/it]

[I 2025-11-02 01:34:28,258] Trial 25 finished with value: 0.7959821408075564 and parameters: {'penalty': 'l2', 'C': 2.080962230659839, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  90%|█████████ | 27/30 [03:33<00:26,  8.89s/it]

[I 2025-11-02 01:34:48,673] Trial 26 finished with value: 0.6238966492594884 and parameters: {'penalty': 'elasticnet', 'C': 0.09048836563202602, 'class_weight': 'balanced', 'l1_ratio': 0.8835404734181955}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  93%|█████████▎| 28/30 [03:37<00:14,  7.38s/it]

[I 2025-11-02 01:34:52,536] Trial 27 finished with value: 0.8008627985825015 and parameters: {'penalty': 'l2', 'C': 0.023190146464774648, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381:  97%|█████████▋| 29/30 [03:41<00:06,  6.33s/it]

[I 2025-11-02 01:34:56,403] Trial 28 finished with value: 0.7926758237739497 and parameters: {'penalty': 'l2', 'C': 0.33968762817977727, 'class_weight': 'balanced'}. Best is trial 12 with value: 0.8053809218117539.


Best trial: 12. Best value: 0.805381: 100%|██████████| 30/30 [04:02<00:00,  8.08s/it]

[I 2025-11-02 01:35:17,335] Trial 29 finished with value: 0.6239008510073673 and parameters: {'penalty': 'elasticnet', 'C': 10.092271935851178, 'class_weight': 'balanced', 'l1_ratio': 0.12177522111295758}. Best is trial 12 with value: 0.8053809218117539.
Best Logistic Regression AUC: 0.80538

OPTIMIZATION COMPLETE





In [7]:
# ============================================================================
# STEP 5: Train Base Models with Optimized Hyperparameters
# ============================================================================
print("\n" + "="*80)
print("TRAINING BASE MODELS")
print("="*80)

# Model 1: XGBoost
print("\n[1/3] Training XGBoost...")
xgb_model = xgb.XGBClassifier(**best_xgb_params)
xgb_model.fit(X_selected, y_resampled)
xgb_preds_train = xgb_model.predict_proba(X_selected)[:, 1]
xgb_preds_test = xgb_model.predict_proba(X_test_selected)[:, 1]
xgb_auc = roc_auc_score(y_resampled, xgb_preds_train)
print(f"XGBoost Train AUC: {xgb_auc:.5f}")

# Model 2: ExtraTrees
print("\n[2/3] Training ExtraTrees...")
et_model = ExtraTreesClassifier(**best_et_params)
et_model.fit(X_selected, y_resampled)
et_preds_train = et_model.predict_proba(X_selected)[:, 1]
et_preds_test = et_model.predict_proba(X_test_selected)[:, 1]
et_auc = roc_auc_score(y_resampled, et_preds_train)
print(f"ExtraTrees Train AUC: {et_auc:.5f}")

# Model 3: Logistic Regression (with calibration)
print("\n[3/3] Training Logistic Regression (with calibration)...")
lr_base = LogisticRegression(**best_lr_params)
lr_model = CalibratedClassifierCV(lr_base, cv=5, method='isotonic')
lr_model.fit(X_selected, y_resampled)
lr_preds_train = lr_model.predict_proba(X_selected)[:, 1]
lr_preds_test = lr_model.predict_proba(X_test_selected)[:, 1]
lr_auc = roc_auc_score(y_resampled, lr_preds_train)
print(f"Logistic Regression Train AUC: {lr_auc:.5f}")

print("\n" + "="*80)
print("BASE MODEL TRAINING COMPLETE")
print("="*80)



TRAINING BASE MODELS

[1/3] Training XGBoost...
XGBoost Train AUC: 0.92665

[2/3] Training ExtraTrees...
ExtraTrees Train AUC: 1.00000

[3/3] Training Logistic Regression (with calibration)...
Logistic Regression Train AUC: 0.62811

BASE MODEL TRAINING COMPLETE


In [8]:
# ============================================================================
# STEP 6: Stacking Ensemble with Meta-Learner
# ============================================================================
print("\n" + "="*80)
print("STACKING ENSEMBLE - TRAINING META-LEARNER")
print("="*80)

# Stack predictions as new features
meta_train = np.column_stack([xgb_preds_train, et_preds_train, lr_preds_train])
meta_test = np.column_stack([xgb_preds_test, et_preds_test, lr_preds_test])

# Train meta-learner (Logistic Regression with regularization)
meta_model = LogisticRegression(C=0.5, penalty='l2', random_state=42, max_iter=1000)
meta_model.fit(meta_train, y_resampled)
meta_preds_train = meta_model.predict_proba(meta_train)[:, 1]
meta_preds_test = meta_model.predict_proba(meta_test)[:, 1]

stacking_auc = roc_auc_score(y_resampled, meta_preds_train)
print(f"Stacking Ensemble Train AUC: {stacking_auc:.5f}")
print(f"\nMeta-learner coefficients:")
print(f"  XGBoost weight: {meta_model.coef_[0][0]:.4f}")
print(f"  ExtraTrees weight: {meta_model.coef_[0][1]:.4f}")
print(f"  Logistic Regression weight: {meta_model.coef_[0][2]:.4f}")



STACKING ENSEMBLE - TRAINING META-LEARNER
Stacking Ensemble Train AUC: 1.00000

Meta-learner coefficients:
  XGBoost weight: -1.5257
  ExtraTrees weight: 16.5152
  Logistic Regression weight: -1.5019


In [9]:
# ============================================================================
# STEP 7: Weighted Averaging Ensemble (Based on CV Performance)
# ============================================================================
print("\n" + "="*80)
print("WEIGHTED AVERAGING ENSEMBLE")
print("="*80)

# Calculate weights based on individual model performance
weights_sum = xgb_auc + et_auc + lr_auc
weights = [xgb_auc/weights_sum, et_auc/weights_sum, lr_auc/weights_sum]

weighted_preds_test = (weights[0] * xgb_preds_test + 
                        weights[1] * et_preds_test + 
                        weights[2] * lr_preds_test)

weighted_preds_train = (weights[0] * xgb_preds_train + 
                        weights[1] * et_preds_train + 
                        weights[2] * lr_preds_train)

weighted_auc = roc_auc_score(y_resampled, weighted_preds_train)
print(f"Weighted Ensemble Train AUC: {weighted_auc:.5f}")
print(f"\nModel weights:")
print(f"  XGBoost: {weights[0]:.4f} ({weights[0]*100:.1f}%)")
print(f"  ExtraTrees: {weights[1]:.4f} ({weights[1]*100:.1f}%)")
print(f"  Logistic Regression: {weights[2]:.4f} ({weights[2]*100:.1f}%)")



WEIGHTED AVERAGING ENSEMBLE
Weighted Ensemble Train AUC: 0.99143

Model weights:
  XGBoost: 0.3627 (36.3%)
  ExtraTrees: 0.3914 (39.1%)
  Logistic Regression: 0.2459 (24.6%)


In [10]:
# ============================================================================
# STEP 8: Final Ensemble (Blend Stacking + Weighted)
# ============================================================================
print("\n" + "="*80)
print("FINAL ENSEMBLE PREDICTIONS")
print("="*80)

# Optimize blending ratio using cross-validation
print("\nOptimizing blending ratio...")
blend_ratios = np.linspace(0.3, 0.8, 11)
best_blend_ratio = 0.65
best_blend_auc = 0

for ratio in blend_ratios:
    blended_preds = ratio * meta_preds_train + (1 - ratio) * weighted_preds_train
    blended_auc = roc_auc_score(y_resampled, blended_preds)
    if blended_auc > best_blend_auc:
        best_blend_auc = blended_auc
        best_blend_ratio = ratio

print(f"Best blending ratio: {best_blend_ratio:.3f} (stacking : weighted)")
print(f"Best blended AUC: {best_blend_auc:.5f}")

# Final predictions
final_preds_test = best_blend_ratio * meta_preds_test + (1 - best_blend_ratio) * weighted_preds_test
final_preds_train = best_blend_ratio * meta_preds_train + (1 - best_blend_ratio) * weighted_preds_train

final_auc = roc_auc_score(y_resampled, final_preds_train)
print(f"\nFinal Ensemble Train AUC: {final_auc:.5f}")
print(f"Prediction range: [{final_preds_test.min():.4f}, {final_preds_test.max():.4f}]")
print(f"Mean prediction: {final_preds_test.mean():.4f}")



FINAL ENSEMBLE PREDICTIONS

Optimizing blending ratio...
Best blending ratio: 0.400 (stacking : weighted)
Best blended AUC: 1.00000

Final Ensemble Train AUC: 1.00000
Prediction range: [0.0695, 0.8806]
Mean prediction: 0.3939


In [11]:
# ============================================================================
# STEP 9: Cross-Validation Evaluation on Original Unbalanced Data
# ============================================================================
print("\n" + "="*80)
print("CROSS-VALIDATION EVALUATION")
print("="*80)

# Evaluate on original unbalanced data using 5-fold CV
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

def cv_predictions(model_class, model_params, X_data, y_data):
    """Get out-of-fold predictions"""
    oof_preds = np.zeros(len(y_data))
    for train_idx, val_idx in cv.split(X_data, y_data):
        X_train_fold = X_data.iloc[train_idx].values if isinstance(X_data, pd.DataFrame) else X_data[train_idx]
        X_val_fold = X_data.iloc[val_idx].values if isinstance(X_data, pd.DataFrame) else X_data[val_idx]
        y_train_fold, y_val_fold = y_data.iloc[train_idx], y_data.iloc[val_idx]
        
        # Create selector for this fold
        selector_fold = xgb.XGBClassifier(n_estimators=100, max_depth=5, learning_rate=0.1, 
                                        random_state=42, eval_metric='auc', tree_method='hist')
        selector_fold.fit(X_train_fold, y_train_fold)
        selector_transform = SelectFromModel(selector_fold, max_features=150, threshold=-np.inf)
        
        # Apply SMOTE to training fold
        smote_fold = SMOTE(sampling_strategy=0.5, random_state=42, k_neighbors=5)
        X_train_smote, y_train_smote = smote_fold.fit_resample(X_train_fold, y_train_fold)
        
        # Transform with fold-specific selector
        X_train_selected = selector_transform.transform(X_train_smote)
        X_val_selected = selector_transform.transform(X_val_fold)
        
        # Train and predict
        model = model_class(**model_params)
        model.fit(X_train_selected, y_train_smote)
        oof_preds[val_idx] = model.predict_proba(X_val_selected)[:, 1]
    
    return oof_preds

print("\nComputing out-of-fold predictions for ensemble...")
print("(This may take a few minutes)")

# Get OOF predictions for each model (pass class and params instead of fitted model)
print("Computing OOF for XGBoost...")
xgb_oof = cv_predictions(xgb.XGBClassifier, best_xgb_params, X, y)

print("Computing OOF for ExtraTrees...")
et_oof = cv_predictions(ExtraTreesClassifier, best_et_params, X, y)

print("Computing OOF for Logistic Regression...")
lr_oof = cv_predictions(LogisticRegression, best_lr_params, X, y)

# Stack and blend OOF predictions
meta_oof = np.column_stack([xgb_oof, et_oof, lr_oof])
meta_oof_preds = meta_model.predict_proba(meta_oof)[:, 1]

weighted_oof = weights[0] * xgb_oof + weights[1] * et_oof + weights[2] * lr_oof
final_oof = best_blend_ratio * meta_oof_preds + (1 - best_blend_ratio) * weighted_oof

# Calculate metrics
threshold = 0.5
final_oof_binary = (final_oof >= threshold).astype(int)

cv_auc = roc_auc_score(y, final_oof)
cv_f1 = f1_score(y, final_oof_binary)
cv_precision = precision_score(y, final_oof_binary)
cv_recall = recall_score(y, final_oof_binary)

print(f"\nCross-Validated Metrics (5-fold):")
print(f"  AUC: {cv_auc:.5f}")
print(f"  F1 Score: {cv_f1:.5f}")
print(f"  Precision: {cv_precision:.5f}")
print(f"  Recall: {cv_recall:.5f}")



CROSS-VALIDATION EVALUATION

Computing out-of-fold predictions for ensemble...
(This may take a few minutes)
Computing OOF for XGBoost...
Computing OOF for ExtraTrees...
Computing OOF for Logistic Regression...

Cross-Validated Metrics (5-fold):
  AUC: 0.83094
  F1 Score: 0.52235
  Precision: 0.60933
  Recall: 0.45711


In [12]:
# ============================================================================
# STEP 10: Generate Submission File
# ============================================================================
print("\n" + "="*80)
print("GENERATING SUBMISSION FILE")
print("="*80)

submission = pd.DataFrame({
    'claim_number': test_original['claim_number'],
    'subrogation': final_preds_test
})

submission.to_csv('baseline_v2_submission.csv', index=False)
print("Submission file saved: baseline_v2_submission.csv")
print(f"\nSubmission statistics:")
print(f"  Shape: {submission.shape}")
print(f"  Prediction range: [{submission['subrogation'].min():.4f}, {submission['subrogation'].max():.4f}]")
print(f"  Mean prediction: {submission['subrogation'].mean():.4f}")
print(f"\nFirst 10 predictions:")
print(submission.head(10))
print(f"\nLast 10 predictions:")
print(submission.tail(10))

# Also save binary version (threshold = 0.5)
submission_binary = submission.copy()
submission_binary['subrogation'] = (submission_binary['subrogation'] >= 0.5).astype(int)
submission_binary.to_csv('baseline_v2_submission_binary.csv', index=False)
print(f"\nBinary submission saved: baseline_v2_submission_binary.csv")



GENERATING SUBMISSION FILE
Submission file saved: baseline_v2_submission.csv

Submission statistics:
  Shape: (12000, 2)
  Prediction range: [0.0695, 0.8806]
  Mean prediction: 0.3939

First 10 predictions:
   claim_number  subrogation
0       3126034     0.272111
1       7380142     0.440979
2       4655051     0.216919
3       6728725     0.787726
4       9848460     0.318639
5       7164028     0.111893
6       8693447     0.584767
7       3185825     0.337260
8       4469488     0.327042
9       6027831     0.731881

Last 10 predictions:
       claim_number  subrogation
11990       8550044     0.834583
11991       6752685     0.151940
11992       8890691     0.379889
11993       2253734     0.645585
11994       3618383     0.752167
11995       3760534     0.183342
11996       9934539     0.614633
11997       1682245     0.137872
11998       6431119     0.592145
11999       7730901     0.284031

Binary submission saved: baseline_v2_submission_binary.csv


In [13]:
# ============================================================================
# STEP 11: Feature Importance Analysis
# ============================================================================
print("\n" + "="*80)
print("FEATURE IMPORTANCE ANALYSIS")
print("="*80)

# Combine feature importances from tree models
feature_importance = pd.DataFrame({
    'feature': selected_features,
    'xgb_importance': xgb_model.feature_importances_,
    'et_importance': et_model.feature_importances_
})

# Average importance across tree models
feature_importance['avg_importance'] = feature_importance[['xgb_importance', 'et_importance']].mean(axis=1)
feature_importance = feature_importance.sort_values('avg_importance', ascending=False)

print("\nTop 30 Most Important Features:")
print(feature_importance.head(30).to_string(index=False))

feature_importance.to_csv('baseline_v2_feature_importance.csv', index=False)
print(f"\nFeature importance saved: baseline_v2_feature_importance.csv")



FEATURE IMPORTANCE ANALYSIS

Top 30 Most Important Features:
                                       feature  xgb_importance  et_importance  avg_importance
                             accident_type_enc        0.087821       0.063116        0.075468
           liab_prct_x_witness_present_ind_enc        0.088661       0.024553        0.056607
                                liab_prct_rank        0.054810       0.054489        0.054650
                        accident_site_enc_freq        0.062112       0.032898        0.047505
              liab_prct_plus_accident_type_enc        0.056270       0.029919        0.043094
                            multi_vehicle_flag        0.036124       0.041363        0.038744
                  accident_site_enc_target_enc        0.048219       0.028423        0.038321
                            high_education_ind        0.025423       0.028839        0.027131
                                     liab_prct        0.023382       0.030099        0.02674

In [14]:
# ============================================================================
# SUMMARY
# ============================================================================
print("\n" + "="*80)
print("BASELINE VERSION 2 - COMPLETE!")
print("="*80)
print("\nMODELS USED:")
print("✓ 1. XGBoost (Gradient Boosting)")
print("✓ 2. ExtraTrees (Bagging)")
print("✓ 3. Logistic Regression with Calibration (Linear)")
print("\nTECHNIQUES APPLIED:")
print("✓ SMOTE for class imbalance (50% minority)")
print("✓ Feature selection (top 150 features)")
print("✓ Bayesian hyperparameter optimization (Optuna)")
print("✓ Stacking with Logistic Regression meta-learner")
print("✓ Weighted averaging based on performance")
print("✓ Optimized blending ratio")
print("✓ 5-fold stratified cross-validation")
print("\nOUTPUT FILES:")
print("- baseline_v2_submission.csv (probabilities)")
print("- baseline_v2_submission_binary.csv (binary predictions)")
print("- baseline_v2_feature_importance.csv (feature analysis)")
print("\nMODEL DIVERSITY:")
print("✓ Algorithmic diversity: Boosting (XGB) + Bagging (ET) + Linear (LR)")
print("✓ Complementary strengths for robust ensemble")
print("="*80)



BASELINE VERSION 2 - COMPLETE!

MODELS USED:
✓ 1. XGBoost (Gradient Boosting)
✓ 2. ExtraTrees (Bagging)
✓ 3. Logistic Regression with Calibration (Linear)

TECHNIQUES APPLIED:
✓ SMOTE for class imbalance (50% minority)
✓ Feature selection (top 150 features)
✓ Bayesian hyperparameter optimization (Optuna)
✓ Stacking with Logistic Regression meta-learner
✓ Weighted averaging based on performance
✓ Optimized blending ratio
✓ 5-fold stratified cross-validation

OUTPUT FILES:
- baseline_v2_submission.csv (probabilities)
- baseline_v2_submission_binary.csv (binary predictions)
- baseline_v2_feature_importance.csv (feature analysis)

MODEL DIVERSITY:
✓ Algorithmic diversity: Boosting (XGB) + Bagging (ET) + Linear (LR)
✓ Complementary strengths for robust ensemble
