# Imports

In [20]:
# General

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Hyperparameter tuning
import optuna

# Models
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier

# Data processing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Metrics
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score

# Data preparation

In [21]:
# Data input

try:
    data = pd.read_csv('../data/train.csv')
    data_to_predict = pd.read_csv('../data/test.csv')
    data_ccrisk = pd.read_csv('../data/credit_risk_dataset.csv')

except:
    data = pd.read_csv('/kaggle/input/playground-series-s4e10/train.csv')
    data_to_predict = pd.read_csv('/kaggle/input/playground-series-s4e10/test.csv')
    data_ccrisk = pd.read_csv('/kaggle/input/loan-approval-prediction/credit_risk_dataset.csv')


data_ccrisk.dropna(inplace=True)
data_ccrisk.drop_duplicates(inplace=True)

X_old = data.drop(['loan_status'], axis=1)
y_old = data['loan_status']

# Merge the dataframes
data_no_id = data.drop(['id'], axis=1)
merged_data = pd.concat([data_no_id, data_ccrisk], ignore_index=True)

X = merged_data.drop(['loan_status'], axis=1)
y = merged_data['loan_status']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, shuffle=True, test_size=0.2, random_state=2024)

In [14]:
from imblearn.over_sampling import SMOTENC

categorical_columns = X.select_dtypes(include=['object']).columns

sm = SMOTENC(sampling_strategy=0.7, categorical_features=[X_train.columns.get_loc(col) for col in categorical_columns], random_state=2024)

X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

In [18]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.compose import ColumnTransformer

# Data processing
categorical_columns = X.select_dtypes(include=['object']).columns

categorical_ordinal = ['loan_grade']
categorical_onehot = categorical_columns.drop(categorical_ordinal)

log_columns = ['person_age', 'person_income', 'person_emp_length', 'loan_amnt']
log_columns = log_columns

numerical_columns = X.select_dtypes(include=['int64', 'float64']).columns
numerical_columns = numerical_columns.drop(log_columns)

def log_transform():
	return FunctionTransformer(np.log1p, validate=True)

preprocessor = ColumnTransformer(
	transformers=[
		('ordinal', OrdinalEncoder(), categorical_ordinal),
		('onehot', OneHotEncoder(), categorical_onehot),
		('scaler', StandardScaler(), numerical_columns),
		('log', log_transform(), log_columns)
	], remainder='passthrough'
	)

preprocessor.fit(X_train)

X_train_prep = preprocessor.transform(X_train_res)
X_test_prep = preprocessor.transform(X_test)

# Models

## XGBoost

In [4]:
def xgb_objective(trial):
    # Hyperparameter search space
    param = {
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0.0, 0.5),
        'lambda': trial.suggest_float('lambda', 1e-8, 10.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-8, 10.0, log=True),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 3.0),
        'eval_metric': 'auc',  # AUC is the metric
        'objective': 'binary:logistic'  # Use binary logistic, probability outputs
    }

    # Initialize the model
    model = xgb.XGBClassifier(**param)

    # Fit the model
    model.fit(X_train_prep, y_train, 
              eval_set=[(X_test_prep, y_test)],  
              verbose=False)

    # Predict proba for validation set
    y_pred_prob = model.predict_proba(X_test_prep)[:, 1]

    auc = roc_auc_score(y_test, y_pred_prob)
 
    return auc

# Study object to run the optimization. I want to maximize AUC
xgb_study = optuna.create_study(direction='maximize')
xgb_study.optimize(xgb_objective, n_trials=50)

print(f"Best trial: {xgb_study.best_trial.params}")

[I 2024-10-10 16:25:13,530] A new study created in memory with name: no-name-5002604b-a04c-4839-872b-ccbe860dd18c
[I 2024-10-10 16:25:16,439] Trial 0 finished with value: 0.9561452864666558 and parameters: {'max_depth': 6, 'learning_rate': 0.06030562429393147, 'n_estimators': 261, 'subsample': 0.8579357374324157, 'colsample_bytree': 0.9136377678517776, 'gamma': 0.4457896422552342, 'lambda': 4.754404709438046e-06, 'alpha': 3.295307639605494, 'scale_pos_weight': 2.4347591546713394}. Best is trial 0 with value: 0.9561452864666558.
[I 2024-10-10 16:25:22,020] Trial 1 finished with value: 0.9482924522618721 and parameters: {'max_depth': 6, 'learning_rate': 0.015119517117753988, 'n_estimators': 363, 'subsample': 0.7590898344676097, 'colsample_bytree': 0.9883691715767334, 'gamma': 0.3777298310435124, 'lambda': 0.002486519370281084, 'alpha': 0.00030518551380154105, 'scale_pos_weight': 1.6551231083987032}. Best is trial 0 with value: 0.9561452864666558.
[I 2024-10-10 16:25:25,635] Trial 2 finis

Best trial: {'max_depth': 5, 'learning_rate': 0.1308367345871002, 'n_estimators': 313, 'subsample': 0.8864952727733624, 'colsample_bytree': 0.5020797617972026, 'gamma': 0.4062806308038399, 'lambda': 3.0890707327408745e-08, 'alpha': 0.018470671128964587, 'scale_pos_weight': 2.0013827993878412}


In [5]:
xgb_best_params = xgb_study.best_trial.params
xgb_best_score = xgb_study.best_trial.value
xgb_best_score

0.9576051262733373

## CatBoostClassifier

In [6]:
from catboost import CatBoostClassifier

def cat_objective(trial):

    # Define the hyperparameter search space

    param = {
        'depth': trial.suggest_int('depth', 4, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'iterations': trial.suggest_int('iterations', 200, 1000),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-8, 10.0, log=True),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'random_strength': trial.suggest_float('random_strength', 0.0, 1.0),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 3.0),
        'eval_metric': 'AUC',  
        'logging_level': 'Silent',  # Disable CatBoost output
        'task_type': 'CPU',  # Try using GPU
        'use_best_model': True
    }

    # Initialize the CatBoost model with the trial's parameters
    model = CatBoostClassifier(**param)

    # Fit the model
    model.fit(X_train_prep, y_train,
              eval_set=[(X_test_prep, y_test)],
              early_stopping_rounds=20,  # Early stopping to prevent overfitting
              verbose=False)

    # Get predictions and calculate AUC score
    y_pred_prob = model.predict_proba(X_test_prep)[:, 1]  # Probabilities for class 1

    auc = roc_auc_score(y_test, y_pred_prob)

    return auc
    
cat_study = optuna.create_study(direction='maximize')  # We want to maximize AUC
cat_study.optimize(cat_objective, n_trials=50)

# Output the best trial
print(f"Best trial: {cat_study.best_trial.params}")

[I 2024-10-10 16:27:46,926] A new study created in memory with name: no-name-19ac5b1a-9e77-4d18-856e-8f21d2fad7a0
[I 2024-10-10 16:27:50,527] Trial 0 finished with value: 0.9473316300250798 and parameters: {'depth': 8, 'learning_rate': 0.10693357271169861, 'iterations': 297, 'l2_leaf_reg': 2.8046216498735753e-08, 'border_count': 64, 'bagging_temperature': 0.31679719505416903, 'random_strength': 0.41718515158972147, 'scale_pos_weight': 1.7219320628517139}. Best is trial 0 with value: 0.9473316300250798.
[I 2024-10-10 16:28:00,383] Trial 1 finished with value: 0.9534438049408316 and parameters: {'depth': 7, 'learning_rate': 0.0628238398297528, 'iterations': 605, 'l2_leaf_reg': 0.1628819998650926, 'border_count': 93, 'bagging_temperature': 0.47815406070449173, 'random_strength': 0.8769080384707851, 'scale_pos_weight': 1.455663879587693}. Best is trial 1 with value: 0.9534438049408316.
[I 2024-10-10 16:28:06,203] Trial 2 finished with value: 0.9418586574617045 and parameters: {'depth': 6, 

Best trial: {'depth': 4, 'learning_rate': 0.158296890656293, 'iterations': 835, 'l2_leaf_reg': 8.321387311341882, 'border_count': 220, 'bagging_temperature': 0.6876763301098845, 'random_strength': 0.2829120896454952, 'scale_pos_weight': 2.2754863604783795}


In [7]:
cat_best_params = cat_study.best_trial.params
cat_best_score = cat_study.best_trial.value
cat_best_score

0.9601213839193278

## LightGBM

In [8]:
# Objective function for LightGBM

def lgb_objective(trial):
    param = {
        'objective': 'binary',
        'metric': 'auc',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'num_leaves': trial.suggest_int('num_leaves', 20, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'min_child_weight': trial.suggest_float('min_child_weight', 1e-3, 10.0),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
    }

    lgb_model = lgb.LGBMClassifier(**param)
  

    # Train the model
    lgb_model.fit(X_train_prep, y_train,
                  eval_set=[(X_test_prep, y_test)],
                  eval_metric='auc'
                  )

    # Predict and evaluate AUC
    y_pred_prob = lgb_model.predict_proba(X_test_prep)[:, 1]
    auc = roc_auc_score(y_test, y_pred_prob)

    return auc

# Optimize the objective function
lgb_study = optuna.create_study(direction='maximize')
lgb_study.optimize(lgb_objective, n_trials=50)

# Best parameters and AUC
print("Best LGBM Params: ", lgb_study.best_trial.params)
print("Best AUC for LGBM: ", lgb_study.best_value)

[I 2024-10-10 16:31:45,315] A new study created in memory with name: no-name-3efa0588-b03e-4815-aeb9-60f7b835dc5e


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008243 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:31:55,145] Trial 0 finished with value: 0.9589626809165129 and parameters: {'learning_rate': 0.024696604869611023, 'num_leaves': 199, 'max_depth': 14, 'min_child_samples': 15, 'min_child_weight': 1.5177216525219952, 'subsample': 0.8052189535554071, 'colsample_bytree': 0.5537786414328838, 'n_estimators': 268}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016878 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:32:12,599] Trial 1 finished with value: 0.9525834208383163 and parameters: {'learning_rate': 0.15631814969837637, 'num_leaves': 179, 'max_depth': 11, 'min_child_samples': 90, 'min_child_weight': 6.451664745054859, 'subsample': 0.6383111129688093, 'colsample_bytree': 0.8952455601389373, 'n_estimators': 816}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016848 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:32:20,875] Trial 2 finished with value: 0.9586838031828386 and parameters: {'learning_rate': 0.07286056716652187, 'num_leaves': 149, 'max_depth': 10, 'min_child_samples': 58, 'min_child_weight': 2.855969386416546, 'subsample': 0.5385812956364816, 'colsample_bytree': 0.7957990738887843, 'n_estimators': 415}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016774 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:32:30,718] Trial 3 finished with value: 0.9524713203720504 and parameters: {'learning_rate': 0.2457225305845174, 'num_leaves': 55, 'max_depth': 10, 'min_child_samples': 22, 'min_child_weight': 1.9250121353863898, 'subsample': 0.6738488672402028, 'colsample_bytree': 0.7192577161711017, 'n_estimators': 671}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016003 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:32:44,524] Trial 4 finished with value: 0.9522777611808257 and parameters: {'learning_rate': 0.19496653947609627, 'num_leaves': 84, 'max_depth': 13, 'min_child_samples': 10, 'min_child_weight': 2.026271758561759, 'subsample': 0.9839505635940979, 'colsample_bytree': 0.5976750966977569, 'n_estimators': 790}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016753 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:32:54,520] Trial 5 finished with value: 0.9572376266564774 and parameters: {'learning_rate': 0.2722435648324439, 'num_leaves': 174, 'max_depth': 5, 'min_child_samples': 83, 'min_child_weight': 7.505087206094436, 'subsample': 0.9072566539461602, 'colsample_bytree': 0.9323193794827227, 'n_estimators': 751}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016663 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:33:03,679] Trial 6 finished with value: 0.9519442303258709 and parameters: {'learning_rate': 0.2306532146091382, 'num_leaves': 201, 'max_depth': 15, 'min_child_samples': 11, 'min_child_weight': 4.460396448080087, 'subsample': 0.9290739564342656, 'colsample_bytree': 0.6361460758677823, 'n_estimators': 378}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016902 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:33:15,888] Trial 7 finished with value: 0.9495496042905636 and parameters: {'learning_rate': 0.2733779127618724, 'num_leaves': 133, 'max_depth': 6, 'min_child_samples': 20, 'min_child_weight': 1.1492924096638608, 'subsample': 0.6434899945135696, 'colsample_bytree': 0.9733245114343562, 'n_estimators': 772}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016133 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:33:18,103] Trial 8 finished with value: 0.9560645736573471 and parameters: {'learning_rate': 0.08540373524843523, 'num_leaves': 269, 'max_depth': 6, 'min_child_samples': 15, 'min_child_weight': 9.743449904431841, 'subsample': 0.5869687649140002, 'colsample_bytree': 0.56707100861557, 'n_estimators': 140}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016450 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:33:26,675] Trial 9 finished with value: 0.9555275854288906 and parameters: {'learning_rate': 0.28768235476930293, 'num_leaves': 211, 'max_depth': 5, 'min_child_samples': 46, 'min_child_weight': 3.1021107938462356, 'subsample': 0.9806536676208665, 'colsample_bytree': 0.5446620397052678, 'n_estimators': 720}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016953 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:33:30,228] Trial 10 finished with value: 0.9542641937565864 and parameters: {'learning_rate': 0.022410246367151363, 'num_leaves': 263, 'max_depth': 15, 'min_child_samples': 42, 'min_child_weight': 0.44569736592654596, 'subsample': 0.807695093177624, 'colsample_bytree': 0.7041685633760203, 'n_estimators': 115}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016955 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:33:38,140] Trial 11 finished with value: 0.9566323812260751 and parameters: {'learning_rate': 0.020773340615254987, 'num_leaves': 120, 'max_depth': 8, 'min_child_samples': 69, 'min_child_weight': 3.909354667566099, 'subsample': 0.5231982527234952, 'colsample_bytree': 0.8105009879211272, 'n_estimators': 396}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017206 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:33:46,586] Trial 12 finished with value: 0.9563484478418837 and parameters: {'learning_rate': 0.08985941036699727, 'num_leaves': 227, 'max_depth': 12, 'min_child_samples': 62, 'min_child_weight': 2.8136721712402317, 'subsample': 0.7793421593133574, 'colsample_bytree': 0.8131881266869068, 'n_estimators': 369}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014003 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:34:03,258] Trial 13 finished with value: 0.9570195588081793 and parameters: {'learning_rate': 0.07337246333392128, 'num_leaves': 125, 'max_depth': 8, 'min_child_samples': 35, 'min_child_weight': 0.16525853013894332, 'subsample': 0.8400740368660072, 'colsample_bytree': 0.5046816211567697, 'n_estimators': 974}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016969 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:34:08,309] Trial 14 finished with value: 0.9518405954102367 and parameters: {'learning_rate': 0.05431725268493975, 'num_leaves': 300, 'max_depth': 3, 'min_child_samples': 60, 'min_child_weight': 5.678875258571935, 'subsample': 0.708385933120094, 'colsample_bytree': 0.80364394006694, 'n_estimators': 466}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016667 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:34:13,601] Trial 15 finished with value: 0.9562836834195692 and parameters: {'learning_rate': 0.1268197270521819, 'num_leaves': 158, 'max_depth': 13, 'min_child_samples': 30, 'min_child_weight': 3.232169844134674, 'subsample': 0.5097783554588665, 'colsample_bytree': 0.6498812126035454, 'n_estimators': 243}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017748 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:34:23,963] Trial 16 finished with value: 0.9555575167743404 and parameters: {'learning_rate': 0.12704394419375695, 'num_leaves': 79, 'max_depth': 9, 'min_child_samples': 75, 'min_child_weight': 1.756706435936598, 'subsample': 0.7210743230525557, 'colsample_bytree': 0.8560326813266085, 'n_estimators': 581}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017375 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:34:28,373] Trial 17 finished with value: 0.9576738333926593 and parameters: {'learning_rate': 0.04116831336590335, 'num_leaves': 32, 'max_depth': 13, 'min_child_samples': 100, 'min_child_weight': 5.3147763500655145, 'subsample': 0.8781934325054608, 'colsample_bytree': 0.7542450990876145, 'n_estimators': 253}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017239 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:34:38,932] Trial 18 finished with value: 0.9569139229442137 and parameters: {'learning_rate': 0.11550979042051304, 'num_leaves': 231, 'max_depth': 10, 'min_child_samples': 53, 'min_child_weight': 3.892405866734734, 'subsample': 0.5732640424359493, 'colsample_bytree': 0.6593214274414818, 'n_estimators': 542}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016960 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:34:45,920] Trial 19 finished with value: 0.9555982816566128 and parameters: {'learning_rate': 0.014225192807598049, 'num_leaves': 152, 'max_depth': 15, 'min_child_samples': 35, 'min_child_weight': 1.02834789729504, 'subsample': 0.7482916488928121, 'colsample_bytree': 0.7530781123969792, 'n_estimators': 264}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017354 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:34:55,879] Trial 20 finished with value: 0.9556882059322032 and parameters: {'learning_rate': 0.16541857299353346, 'num_leaves': 248, 'max_depth': 12, 'min_child_samples': 5, 'min_child_weight': 8.244597407757611, 'subsample': 0.8408500055603889, 'colsample_bytree': 0.8658281061328438, 'n_estimators': 469}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016920 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:35:00,466] Trial 21 finished with value: 0.9586910373806511 and parameters: {'learning_rate': 0.04785805523502665, 'num_leaves': 51, 'max_depth': 14, 'min_child_samples': 99, 'min_child_weight': 5.626806242420393, 'subsample': 0.8828525954503725, 'colsample_bytree': 0.762823377629425, 'n_estimators': 239}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016899 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:35:05,031] Trial 22 finished with value: 0.9585443406361396 and parameters: {'learning_rate': 0.05610215831134874, 'num_leaves': 23, 'max_depth': 14, 'min_child_samples': 99, 'min_child_weight': 6.261284474053203, 'subsample': 0.8015499321422183, 'colsample_bytree': 0.7755563381172013, 'n_estimators': 305}. Best is trial 0 with value: 0.9589626809165129.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017033 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:35:08,641] Trial 23 finished with value: 0.9597123498246838 and parameters: {'learning_rate': 0.10268826360793096, 'num_leaves': 96, 'max_depth': 11, 'min_child_samples': 75, 'min_child_weight': 4.678439048523547, 'subsample': 0.8654596643805321, 'colsample_bytree': 0.6838121102001031, 'n_estimators': 177}. Best is trial 23 with value: 0.9597123498246838.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016754 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:35:12,478] Trial 24 finished with value: 0.9587109166247216 and parameters: {'learning_rate': 0.1106624311982421, 'num_leaves': 97, 'max_depth': 14, 'min_child_samples': 85, 'min_child_weight': 7.0472100008986835, 'subsample': 0.8681771587441925, 'colsample_bytree': 0.6975665684229633, 'n_estimators': 180}. Best is trial 23 with value: 0.9597123498246838.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017259 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:35:16,343] Trial 25 finished with value: 0.9574049012009856 and parameters: {'learning_rate': 0.18608822302261402, 'num_leaves': 101, 'max_depth': 12, 'min_child_samples': 80, 'min_child_weight': 7.541754731998644, 'subsample': 0.8659505153792695, 'colsample_bytree': 0.6890785826888272, 'n_estimators': 188}. Best is trial 23 with value: 0.9597123498246838.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016507 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:35:20,034] Trial 26 finished with value: 0.9590235855213368 and parameters: {'learning_rate': 0.11084279639979486, 'num_leaves': 104, 'max_depth': 14, 'min_child_samples': 89, 'min_child_weight': 9.865369274846064, 'subsample': 0.9343146700867179, 'colsample_bytree': 0.6025847438272431, 'n_estimators': 172}. Best is trial 23 with value: 0.9597123498246838.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016198 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:35:25,440] Trial 27 finished with value: 0.959662326116406 and parameters: {'learning_rate': 0.10330182982221921, 'num_leaves': 58, 'max_depth': 11, 'min_child_samples': 72, 'min_child_weight': 9.701846884251905, 'subsample': 0.9308889778265481, 'colsample_bytree': 0.616698310546685, 'n_estimators': 319}. Best is trial 23 with value: 0.9597123498246838.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:35:30,731] Trial 28 finished with value: 0.9588649067667007 and parameters: {'learning_rate': 0.1411768790397545, 'num_leaves': 60, 'max_depth': 11, 'min_child_samples': 71, 'min_child_weight': 9.920184591902538, 'subsample': 0.9451420201906113, 'colsample_bytree': 0.6040021690837023, 'n_estimators': 312}. Best is trial 23 with value: 0.9597123498246838.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016233 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:35:34,419] Trial 29 finished with value: 0.9594629653590142 and parameters: {'learning_rate': 0.10474060489331474, 'num_leaves': 104, 'max_depth': 11, 'min_child_samples': 90, 'min_child_weight': 8.713169516730407, 'subsample': 0.9406291341942323, 'colsample_bytree': 0.6052672785598459, 'n_estimators': 188}. Best is trial 23 with value: 0.9597123498246838.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014195 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:35:39,443] Trial 30 finished with value: 0.9604750071371102 and parameters: {'learning_rate': 0.09326407775073504, 'num_leaves': 77, 'max_depth': 8, 'min_child_samples': 67, 'min_child_weight': 8.849174764839312, 'subsample': 0.9523505516036563, 'colsample_bytree': 0.5249387355118129, 'n_estimators': 320}. Best is trial 30 with value: 0.9604750071371102.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014049 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:35:44,467] Trial 31 finished with value: 0.9606949338545682 and parameters: {'learning_rate': 0.09831411564981576, 'num_leaves': 73, 'max_depth': 8, 'min_child_samples': 69, 'min_child_weight': 8.931263456893246, 'subsample': 0.9610712545651485, 'colsample_bytree': 0.5132219354918022, 'n_estimators': 326}. Best is trial 31 with value: 0.9606949338545682.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014229 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:35:49,644] Trial 32 finished with value: 0.9601698802765126 and parameters: {'learning_rate': 0.09386974858737984, 'num_leaves': 69, 'max_depth': 8, 'min_child_samples': 65, 'min_child_weight': 8.95106494538632, 'subsample': 0.9990286570440879, 'colsample_bytree': 0.5110166972808171, 'n_estimators': 331}. Best is trial 31 with value: 0.9606949338545682.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.013961 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:35:56,694] Trial 33 finished with value: 0.9588713240092772 and parameters: {'learning_rate': 0.141978729921141, 'num_leaves': 87, 'max_depth': 8, 'min_child_samples': 64, 'min_child_weight': 8.605676127922585, 'subsample': 0.9907140462286355, 'colsample_bytree': 0.5084454578458655, 'n_estimators': 473}. Best is trial 31 with value: 0.9606949338545682.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016243 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:36:01,807] Trial 34 finished with value: 0.9601558381184019 and parameters: {'learning_rate': 0.07495637311413861, 'num_leaves': 73, 'max_depth': 7, 'min_child_samples': 54, 'min_child_weight': 9.023570607155811, 'subsample': 0.9613653777434101, 'colsample_bytree': 0.5466114269854228, 'n_estimators': 340}. Best is trial 31 with value: 0.9606949338545682.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.015955 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:36:09,755] Trial 35 finished with value: 0.9602484974180426 and parameters: {'learning_rate': 0.08249769737766409, 'num_leaves': 69, 'max_depth': 7, 'min_child_samples': 52, 'min_child_weight': 9.123005242921767, 'subsample': 0.9638328834244902, 'colsample_bytree': 0.5477585517943178, 'n_estimators': 562}. Best is trial 31 with value: 0.9606949338545682.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016231 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:36:17,842] Trial 36 finished with value: 0.9607131910280983 and parameters: {'learning_rate': 0.08747324431067474, 'num_leaves': 36, 'max_depth': 9, 'min_child_samples': 47, 'min_child_weight': 7.775139447622631, 'subsample': 0.9974900027757904, 'colsample_bytree': 0.5665140281470507, 'n_estimators': 578}. Best is trial 36 with value: 0.9607131910280983.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.015994 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:36:26,749] Trial 37 finished with value: 0.9606258478574556 and parameters: {'learning_rate': 0.06659088683748238, 'num_leaves': 37, 'max_depth': 9, 'min_child_samples': 43, 'min_child_weight': 7.871043793650589, 'subsample': 0.9052614259486612, 'colsample_bytree': 0.5688353475743771, 'n_estimators': 622}. Best is trial 36 with value: 0.9607131910280983.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016517 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:36:36,063] Trial 38 finished with value: 0.9604767594468914 and parameters: {'learning_rate': 0.0397174719289882, 'num_leaves': 40, 'max_depth': 9, 'min_child_samples': 46, 'min_child_weight': 7.514684725818922, 'subsample': 0.9163362179186622, 'colsample_bytree': 0.5770931161706776, 'n_estimators': 612}. Best is trial 36 with value: 0.9607131910280983.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016260 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:36:45,818] Trial 39 finished with value: 0.9606246283445673 and parameters: {'learning_rate': 0.03561846615920527, 'num_leaves': 38, 'max_depth': 9, 'min_child_samples': 46, 'min_child_weight': 7.884785819555958, 'subsample': 0.9072136432822686, 'colsample_bytree': 0.5772233449846986, 'n_estimators': 637}. Best is trial 36 with value: 0.9607131910280983.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016098 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:36:55,046] Trial 40 finished with value: 0.9605246282877357 and parameters: {'learning_rate': 0.06245801290337209, 'num_leaves': 34, 'max_depth': 10, 'min_child_samples': 27, 'min_child_weight': 8.036430516988396, 'subsample': 0.9160049244362193, 'colsample_bytree': 0.5713762005658848, 'n_estimators': 651}. Best is trial 36 with value: 0.9607131910280983.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016201 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:37:03,455] Trial 41 finished with value: 0.9607718460460408 and parameters: {'learning_rate': 0.057993792786835606, 'num_leaves': 22, 'max_depth': 10, 'min_child_samples': 24, 'min_child_weight': 8.055964388781751, 'subsample': 0.8934632557796529, 'colsample_bytree': 0.5751991695464493, 'n_estimators': 663}. Best is trial 41 with value: 0.9607718460460408.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016533 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:37:12,739] Trial 42 finished with value: 0.9593432044574215 and parameters: {'learning_rate': 0.030718587459658706, 'num_leaves': 21, 'max_depth': 9, 'min_child_samples': 40, 'min_child_weight': 6.501302142135425, 'subsample': 0.9007182041130768, 'colsample_bytree': 0.5788093944770998, 'n_estimators': 693}. Best is trial 41 with value: 0.9607718460460408.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016560 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:37:26,128] Trial 43 finished with value: 0.9594312343440594 and parameters: {'learning_rate': 0.06427331179727105, 'num_leaves': 43, 'max_depth': 9, 'min_child_samples': 20, 'min_child_weight': 7.105223394658465, 'subsample': 0.971155564413388, 'colsample_bytree': 0.6325496856727507, 'n_estimators': 882}. Best is trial 41 with value: 0.9607718460460408.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016209 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:37:35,550] Trial 44 finished with value: 0.9601621132818072 and parameters: {'learning_rate': 0.03242803411737463, 'num_leaves': 48, 'max_depth': 7, 'min_child_samples': 48, 'min_child_weight': 8.090442969579248, 'subsample': 0.8437259515347707, 'colsample_bytree': 0.5343376935290427, 'n_estimators': 633}. Best is trial 41 with value: 0.9607718460460408.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.015944 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:37:42,617] Trial 45 finished with value: 0.9604651918343501 and parameters: {'learning_rate': 0.06930775232090058, 'num_leaves': 27, 'max_depth': 10, 'min_child_samples': 41, 'min_child_weight': 6.865245138450635, 'subsample': 0.8872828779277838, 'colsample_bytree': 0.5793615435355488, 'n_estimators': 530}. Best is trial 41 with value: 0.9607718460460408.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016204 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:37:54,458] Trial 46 finished with value: 0.9541514302541874 and parameters: {'learning_rate': 0.22445721305537653, 'num_leaves': 57, 'max_depth': 9, 'min_child_samples': 57, 'min_child_weight': 7.796608633219739, 'subsample': 0.9031991419905858, 'colsample_bytree': 0.5621686851795243, 'n_estimators': 734}. Best is trial 41 with value: 0.9607718460460408.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017133 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:38:02,570] Trial 47 finished with value: 0.9521774888055821 and parameters: {'learning_rate': 0.01356521316555774, 'num_leaves': 185, 'max_depth': 6, 'min_child_samples': 34, 'min_child_weight': 9.326373308539642, 'subsample': 0.9744144711466844, 'colsample_bytree': 0.6280512416632724, 'n_estimators': 510}. Best is trial 41 with value: 0.9607718460460408.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016425 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:38:13,616] Trial 48 finished with value: 0.9605655352491796 and parameters: {'learning_rate': 0.04481877749363102, 'num_leaves': 41, 'max_depth': 10, 'min_child_samples': 25, 'min_child_weight': 8.384800606776349, 'subsample': 0.8238838054771483, 'colsample_bytree': 0.6618236732036606, 'n_estimators': 696}. Best is trial 41 with value: 0.9607718460460408.


[LightGBM] [Info] Number of positive: 11630, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016670 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 957
[LightGBM] [Info] Number of data points in the train set: 69716, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166820 -> initscore=-1.608337
[LightGBM] [Info] Start training from score -1.608337


[I 2024-10-10 16:38:25,246] Trial 49 finished with value: 0.9598055774410105 and parameters: {'learning_rate': 0.08413190680954888, 'num_leaves': 34, 'max_depth': 7, 'min_child_samples': 15, 'min_child_weight': 6.02421715131323, 'subsample': 0.9851410054159023, 'colsample_bytree': 0.7203328375578026, 'n_estimators': 832}. Best is trial 41 with value: 0.9607718460460408.


Best LGBM Params:  {'learning_rate': 0.057993792786835606, 'num_leaves': 22, 'max_depth': 10, 'min_child_samples': 24, 'min_child_weight': 8.055964388781751, 'subsample': 0.8934632557796529, 'colsample_bytree': 0.5751991695464493, 'n_estimators': 663}
Best AUC for LGBM:  0.9607718460460408


In [9]:
lgb_best_params = lgb_study.best_trial.params
lgb_best_score = lgb_study.best_value
lgb_best_score

0.9607718460460408

## ExtraTreesClassifier

In [10]:
from sklearn.ensemble import ExtraTreesClassifier

def extratrees_objective(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 5, 20),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
        'max_features': trial.suggest_float('max_features', 0.1, 1.0)
    }

    model = ExtraTreesClassifier(**param)
    score = cross_val_score(model, X_train_prep, y_train, cv=3, scoring='roc_auc').mean()

    return score

extratrees_study = optuna.create_study(direction='maximize')
extratrees_study.optimize(extratrees_objective, n_trials=50, timeout=600)

print('Best parameters for ExtraTrees:', extratrees_study.best_params)

[I 2024-10-10 16:38:25,275] A new study created in memory with name: no-name-efe562d5-5e14-4df6-97e6-6a258837e8f8
[I 2024-10-10 16:39:53,817] Trial 0 finished with value: 0.9172517971342113 and parameters: {'n_estimators': 429, 'max_depth': 8, 'min_samples_split': 2, 'min_samples_leaf': 7, 'max_features': 0.8777728723925546}. Best is trial 0 with value: 0.9172517971342113.
[I 2024-10-10 16:40:34,750] Trial 1 finished with value: 0.9241795543118106 and parameters: {'n_estimators': 195, 'max_depth': 11, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_features': 0.6914270119569254}. Best is trial 1 with value: 0.9241795543118106.
[I 2024-10-10 16:42:17,032] Trial 2 finished with value: 0.9173721257311008 and parameters: {'n_estimators': 494, 'max_depth': 8, 'min_samples_split': 6, 'min_samples_leaf': 3, 'max_features': 0.8769846365934916}. Best is trial 1 with value: 0.9241795543118106.
[I 2024-10-10 16:42:55,583] Trial 3 finished with value: 0.92768826170796 and parameters: {'n_estim

Best parameters for ExtraTrees: {'n_estimators': 111, 'max_depth': 15, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_features': 0.6964044567483005}


In [11]:
extratrees_best_params = extratrees_study.best_params
extratrees_best_score = extratrees_study.best_value
extratrees_best_score

0.9298212453316017

## LogisticRegression

In [12]:
from sklearn.linear_model import LogisticRegression

def logreg_objective(trial):
    
    param = {
        'penalty': 'elasticnet',
        'solver' : 'saga',
        'C': trial.suggest_float('C', 1e-4, 1e2, log=True),
        'l1_ratio': trial.suggest_float('l1_ratio', 0, 1)
    }

    model = LogisticRegression(**param, max_iter=1000)
    score = cross_val_score(model, X_train_prep, y_train, cv=3, scoring='roc_auc').mean()
    return score

logreg_study = optuna.create_study(direction='maximize')
logreg_study.optimize(logreg_objective, n_trials=50, timeout=600)
print('Best parameters for Logistic Regression:', logreg_study.best_params)

[I 2024-10-10 16:49:00,550] A new study created in memory with name: no-name-21c330be-5aeb-4301-ad32-05887eb98dd8
[I 2024-10-10 16:49:34,377] Trial 0 finished with value: 0.8813751785037436 and parameters: {'C': 11.04021839611546, 'l1_ratio': 0.8953861870293954}. Best is trial 0 with value: 0.8813751785037436.
[I 2024-10-10 16:50:52,225] Trial 1 finished with value: 0.8813958011061516 and parameters: {'C': 0.09893212711010874, 'l1_ratio': 0.5708240957665429}. Best is trial 1 with value: 0.8813958011061516.
[I 2024-10-10 16:50:56,191] Trial 2 finished with value: 0.850624799353969 and parameters: {'C': 0.00019311590052084702, 'l1_ratio': 0.37393686867331666}. Best is trial 1 with value: 0.8813958011061516.
[I 2024-10-10 16:52:13,466] Trial 3 finished with value: 0.8813971204333081 and parameters: {'C': 0.08007473512842489, 'l1_ratio': 0.6198924618047804}. Best is trial 3 with value: 0.8813971204333081.
[I 2024-10-10 16:52:48,381] Trial 4 finished with value: 0.8813743790998988 and param

Best parameters for Logistic Regression: {'C': 0.14669352446022293, 'l1_ratio': 0.8696458211366759}


In [13]:
logreg_best_params = logreg_study.best_params
logreg_best_params['penalty'] = 'elasticnet'
logreg_best_score = logreg_study.best_value
logreg_best_score

0.8814172446287084

## HistGradientBoostingClassifier

In [14]:
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier

def histgb_objective(trial):
    param = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
        'max_iter': trial.suggest_int('max_iter', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 5, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 10, 50),
        'max_leaf_nodes': trial.suggest_int('max_leaf_nodes', 10, 50),
        'l2_regularization': trial.suggest_float('l2_regularization', 1e-5, 1.0, log=True)
    }

    model = HistGradientBoostingClassifier(**param)
    score = cross_val_score(model, X_train_prep, y_train, cv=3, scoring='roc_auc').mean()

    return score

histgb_study = optuna.create_study(direction='maximize')
histgb_study.optimize(histgb_objective, n_trials=50, timeout=600)
print('Best parameters for HistGradientBoosting:', histgb_study.best_params)

[I 2024-10-10 16:59:47,999] A new study created in memory with name: no-name-76096296-9fc6-4729-b545-af9e03768e86
[I 2024-10-10 17:00:01,050] Trial 0 finished with value: 0.9503980141162014 and parameters: {'learning_rate': 0.04210518383779263, 'max_iter': 203, 'max_depth': 20, 'min_samples_leaf': 21, 'max_leaf_nodes': 46, 'l2_regularization': 0.003142649536690639}. Best is trial 0 with value: 0.9503980141162014.
[I 2024-10-10 17:00:07,495] Trial 1 finished with value: 0.9219105467224997 and parameters: {'learning_rate': 0.01261549185537781, 'max_iter': 169, 'max_depth': 15, 'min_samples_leaf': 23, 'max_leaf_nodes': 10, 'l2_regularization': 0.00037662483891966815}. Best is trial 0 with value: 0.9503980141162014.
[I 2024-10-10 17:00:22,947] Trial 2 finished with value: 0.951255295153732 and parameters: {'learning_rate': 0.037654885480700374, 'max_iter': 360, 'max_depth': 9, 'min_samples_leaf': 28, 'max_leaf_nodes': 28, 'l2_regularization': 0.09323554708804963}. Best is trial 2 with valu

Best parameters for HistGradientBoosting: {'learning_rate': 0.08411918936593765, 'max_iter': 412, 'max_depth': 12, 'min_samples_leaf': 47, 'max_leaf_nodes': 33, 'l2_regularization': 0.00027459557899173984}


In [15]:
histgb_best_params = histgb_study.best_trial.params
histgb_best_score = histgb_study.best_trial.value
histgb_best_score

0.9519987809872763

## KNN

In [16]:
from sklearn.neighbors import KNeighborsClassifier

def knn_objective(trial):
    param = {
        'n_neighbors': trial.suggest_int('n_neighbors', 3, 15),
        'leaf_size': trial.suggest_int('leaf_size', 20, 50),
        'p': trial.suggest_categorical('p', [1, 2]),  # Minkowski distance parameter
        'weights': trial.suggest_categorical('weights', ['uniform', 'distance'])
    }

    model = KNeighborsClassifier(**param)
    score = cross_val_score(model, X_train_prep, y_train, cv=3, scoring='roc_auc').mean()

    return score

knn_study = optuna.create_study(direction='maximize')
knn_study.optimize(knn_objective, n_trials=50, timeout=600)
print('Best parameters for KNN:', knn_study.best_params)
print('Best score for KNN:', knn_study.best_value)

[I 2024-10-10 17:09:56,661] A new study created in memory with name: no-name-58880836-f3e9-4c90-8c31-39eee2c723e8
[I 2024-10-10 17:11:32,631] Trial 0 finished with value: 0.8964486446269743 and parameters: {'n_neighbors': 11, 'leaf_size': 36, 'p': 1, 'weights': 'distance'}. Best is trial 0 with value: 0.8964486446269743.
[I 2024-10-10 17:13:09,227] Trial 1 finished with value: 0.8983835060031757 and parameters: {'n_neighbors': 13, 'leaf_size': 27, 'p': 1, 'weights': 'uniform'}. Best is trial 1 with value: 0.8983835060031757.
[I 2024-10-10 17:14:45,619] Trial 2 finished with value: 0.8983835060031757 and parameters: {'n_neighbors': 13, 'leaf_size': 31, 'p': 1, 'weights': 'uniform'}. Best is trial 1 with value: 0.8983835060031757.
[I 2024-10-10 17:16:24,187] Trial 3 finished with value: 0.8990068582143662 and parameters: {'n_neighbors': 14, 'leaf_size': 25, 'p': 1, 'weights': 'uniform'}. Best is trial 3 with value: 0.8990068582143662.
[I 2024-10-10 17:18:00,244] Trial 4 finished with val

Best parameters for KNN: {'n_neighbors': 14, 'leaf_size': 40, 'p': 1, 'weights': 'distance'}
Best score for KNN: 0.9003336325395043


In [17]:
knn_best_params = knn_study.best_trial.params
knn_best_score = knn_study.best_trial.value
knn_best_score

0.9003336325395043

# Stacking

In [18]:
from sklearn.ensemble import StackingClassifier

# Init base models

xgb_model = xgb.XGBClassifier(**xgb_best_params)
cat_model = CatBoostClassifier(**cat_best_params)
lgb_model = lgb.LGBMClassifier(**lgb_best_params)
extratrees_model = ExtraTreesClassifier(**extratrees_best_params)
logreg_model = LogisticRegression(**logreg_best_params)
histgb_model = HistGradientBoostingClassifier(**histgb_best_params)
knn_model = KNeighborsClassifier(**knn_best_params)

# Base and meta models
base_estimators = [
	('xgb', xgb_model),
	('cat', cat_model),
	('lgb', lgb_model),
	('extratrees', extratrees_model),
	('logreg', logreg_model),
	('histgb', histgb_model),
	('knn', knn_model)
]

meta_model = LogisticRegression()

# Stack

stack_model = StackingClassifier(estimators=base_estimators, final_estimator=meta_model, cv=5, n_jobs=-1)

stack_model.fit(X_train_prep, y_train)

0:	learn: 0.5668079	total: 11.2ms	remaining: 9.37s
1:	learn: 0.4847346	total: 21.7ms	remaining: 9.04s
2:	learn: 0.4346061	total: 32.4ms	remaining: 8.98s
3:	learn: 0.3999273	total: 42.4ms	remaining: 8.81s
4:	learn: 0.3774917	total: 52.6ms	remaining: 8.74s
5:	learn: 0.3583983	total: 63.9ms	remaining: 8.82s
6:	learn: 0.3463471	total: 74.2ms	remaining: 8.78s
7:	learn: 0.3357052	total: 84.9ms	remaining: 8.78s
8:	learn: 0.3281523	total: 94.7ms	remaining: 8.69s
9:	learn: 0.3205315	total: 105ms	remaining: 8.65s
10:	learn: 0.3155539	total: 116ms	remaining: 8.68s
11:	learn: 0.3114349	total: 127ms	remaining: 8.72s
12:	learn: 0.3064047	total: 137ms	remaining: 8.68s
13:	learn: 0.3032817	total: 147ms	remaining: 8.61s
14:	learn: 0.3005172	total: 158ms	remaining: 8.65s
15:	learn: 0.2976801	total: 169ms	remaining: 8.64s
16:	learn: 0.2952662	total: 180ms	remaining: 8.68s
17:	learn: 0.2936171	total: 191ms	remaining: 8.68s
18:	learn: 0.2907942	total: 202ms	remaining: 8.67s
19:	learn: 0.2883578	total: 215m

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0:	learn: 0.5669406	total: 9.16ms	remaining: 7.64s
1:	learn: 0.4846002	total: 18.5ms	remaining: 7.69s
2:	learn: 0.4346343	total: 27.2ms	remaining: 7.54s
3:	learn: 0.4002863	total: 35.8ms	remaining: 7.45s
4:	learn: 0.3781079	total: 46.3ms	remaining: 7.68s
5:	learn: 0.3609909	total: 54.5ms	remaining: 7.52s
6:	learn: 0.3498306	total: 62.8ms	remaining: 7.43s
7:	learn: 0.3401244	total: 72.5ms	remaining: 7.5s
8:	learn: 0.3331976	total: 80.7ms	remaining: 7.4s
9:	learn: 0.3263725	total: 89.3ms	remaining: 7.37s
10:	learn: 0.3207866	total: 97.8ms	remaining: 7.33s
11:	learn: 0.3161695	total: 107ms	remaining: 7.32s
12:	learn: 0.3127329	total: 115ms	remaining: 7.26s
13:	learn: 0.3094197	total: 124ms	remaining: 7.27s
14:	learn: 0.3045218	total: 133ms	remaining: 7.27s
15:	learn: 0.3022130	total: 142ms	remaining: 7.26s
16:	learn: 0.2996484	total: 151ms	remaining: 7.25s
17:	learn: 0.2967367	total: 160ms	remaining: 7.25s
18:	learn: 0.2947495	total: 169ms	remaining: 7.25s
19:	learn: 0.2931558	total: 178m

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [19]:
print('Stacking Classifier ROC AUC:', roc_auc_score(y_test, stack_model.predict_proba(X_test_prep)[:, 1]))

Stacking Classifier ROC AUC: 0.9614965682196932


In [19]:
# Choose different base models

base_estimators = [
	('xgb', xgb_model),
	('cat', cat_model),
	('lgb', lgb_model),
	('histgb', histgb_model),
	# ('knn', knn_model)

]

meta_model = lgb.LGBMClassifier()

# Stack
stack_model_2 = StackingClassifier(estimators=base_estimators, final_estimator=meta_model, cv=3, n_jobs=-1)

# stack_model_2.fit(X_train_prep, y_train)

# print('Stacking Classifier ROC AUC:', roc_auc_score(y_test, stack_model_2.predict_proba(X_test_prep)[:, 1]))

NameError: name 'xgb_model' is not defined

# Submission

In [21]:
X_train_all = preprocessor.transform(X)

stack_model_2 = StackingClassifier(estimators=base_estimators, final_estimator=meta_model, cv=5, n_jobs=-1)

stack_model_2.fit(X_train_all, y)

0:	learn: 0.5665053	total: 13.5ms	remaining: 11.2s
1:	learn: 0.4848151	total: 26.2ms	remaining: 10.9s
2:	learn: 0.4347886	total: 39.5ms	remaining: 10.9s
3:	learn: 0.4028477	total: 52ms	remaining: 10.8s
4:	learn: 0.3815115	total: 64.8ms	remaining: 10.8s
5:	learn: 0.3636601	total: 77.5ms	remaining: 10.7s
6:	learn: 0.3504883	total: 89.9ms	remaining: 10.6s
7:	learn: 0.3420471	total: 102ms	remaining: 10.6s
8:	learn: 0.3345370	total: 115ms	remaining: 10.6s
9:	learn: 0.3269051	total: 128ms	remaining: 10.5s
10:	learn: 0.3207508	total: 141ms	remaining: 10.5s
11:	learn: 0.3161075	total: 153ms	remaining: 10.5s
12:	learn: 0.3125321	total: 167ms	remaining: 10.5s
13:	learn: 0.3092873	total: 179ms	remaining: 10.5s
14:	learn: 0.3040977	total: 192ms	remaining: 10.5s
15:	learn: 0.3013252	total: 205ms	remaining: 10.5s
16:	learn: 0.2966019	total: 220ms	remaining: 10.6s
17:	learn: 0.2946561	total: 234ms	remaining: 10.6s
18:	learn: 0.2918070	total: 249ms	remaining: 10.7s
19:	learn: 0.2902920	total: 261ms	re

In [23]:
import joblib

try:
    joblib.dump(stack_model_2, '../models/stack_model_v2.pkl')
except:
    joblib.dump(stack_model_2, 'stack_model_v2.pkl')
    
X_to_pred = preprocessor.transform(data_to_predict)
y_pred_submit = stack_model_2.predict_proba(X_to_pred)[:, 1]

submission = pd.DataFrame({'id': data_to_predict['id'], 'loan_status': y_pred_submit})
try:
    submission.to_csv('../submissions/stack_model_v2.csv', index=False)
except:
    submission.to_csv('stack_model_v2.csv', index=False)