# Imports

In [69]:
# General

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Hyperparameter tuning
import optuna

# Models
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier

# Data processing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Metrics
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score

# Data preparation

In [70]:
# Data input

try:
    data = pd.read_csv('../data/train.csv')
    data_to_predict = pd.read_csv('../data/test.csv')
    data_ccrisk = pd.read_csv('../data/credit_risk_dataset.csv')

except:
    data = pd.read_csv('/kaggle/input/playground-series-s4e10/train.csv')
    data_to_predict = pd.read_csv('/kaggle/input/playground-series-s4e10/test.csv')
    data_ccrisk = pd.read_csv('/kaggle/input/loan-approval-prediction/credit_risk_dataset.csv')


data_ccrisk.dropna(inplace=True)
data_ccrisk.drop_duplicates(inplace=True)

X_old = data.drop(['loan_status'], axis=1)
y_old = data['loan_status']

# Merge the dataframes
data_no_id = data.drop(['id'], axis=1)
merged_data = pd.concat([data_no_id, data_ccrisk], ignore_index=True)

X = merged_data.drop(['loan_status'], axis=1)
y = merged_data['loan_status']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, shuffle=True, test_size=0.2, random_state=2024)

In [73]:
from imblearn.over_sampling import SMOTENC

categorical_columns = X.select_dtypes(include=['object']).columns

sm = SMOTENC(sampling_strategy=0.3, categorical_features=[X_train.columns.get_loc(col) for col in categorical_columns], random_state=2024)

X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

In [74]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.compose import ColumnTransformer

# Data processing
categorical_columns = X.select_dtypes(include=['object']).columns

categorical_ordinal = ['loan_grade']
categorical_onehot = categorical_columns.drop(categorical_ordinal)

log_columns = ['person_age', 'person_income', 'person_emp_length', 'loan_amnt']
log_columns = log_columns

numerical_columns = X.select_dtypes(include=['int64', 'float64']).columns
numerical_columns = numerical_columns.drop(log_columns)

def log_transform():
	return FunctionTransformer(np.log1p, validate=True)

preprocessor = ColumnTransformer(
	transformers=[
		('ordinal', OrdinalEncoder(), categorical_ordinal),
		('onehot', OneHotEncoder(), categorical_onehot),
		('scaler', StandardScaler(), numerical_columns),
		('log', log_transform(), log_columns)
	], remainder='passthrough'
	)

preprocessor.fit(X_train_res)

X_train_prep = preprocessor.transform(X_train_res)
X_test_prep = preprocessor.transform(X_test)

# Models

## XGBoost

In [77]:
def xgb_objective(trial):
    # Hyperparameter search space
    param = {
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0.0, 0.5),
        'lambda': trial.suggest_float('lambda', 1e-8, 10.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-8, 10.0, log=True),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 3.0),
        'eval_metric': 'auc',  # AUC is the metric
        'objective': 'binary:logistic'  # Use binary logistic, probability outputs
    }

    # Initialize the model
    model = xgb.XGBClassifier(**param)

    # Fit the model
    model.fit(X_train_prep, y_train_res,
              eval_set=[(X_test_prep, y_test)],  
              verbose=False)

    # Predict proba for validation set
    y_pred_prob = model.predict_proba(X_test_prep)[:, 1]

    auc = roc_auc_score(y_test, y_pred_prob)
 
    return auc

# Study object to run the optimization. I want to maximize AUC
xgb_study = optuna.create_study(direction='maximize')
xgb_study.optimize(xgb_objective, n_trials=50)

print(f"Best trial: {xgb_study.best_trial.params}")

[I 2024-10-12 19:30:40,044] A new study created in memory with name: no-name-a419244e-6b70-446a-abca-d2e02890931a


[I 2024-10-12 19:30:48,466] Trial 0 finished with value: 0.9509918617997795 and parameters: {'max_depth': 6, 'learning_rate': 0.2753359156029285, 'n_estimators': 470, 'subsample': 0.8930516628715627, 'colsample_bytree': 0.549635675574224, 'gamma': 0.3869428290930283, 'lambda': 6.663872006442553e-07, 'alpha': 0.08758235756615641, 'scale_pos_weight': 1.6035776672634539}. Best is trial 0 with value: 0.9509918617997795.
[I 2024-10-12 19:30:53,035] Trial 1 finished with value: 0.936974283385811 and parameters: {'max_depth': 3, 'learning_rate': 0.03228938470800746, 'n_estimators': 310, 'subsample': 0.7238808570425561, 'colsample_bytree': 0.7849660409677419, 'gamma': 0.468314165373791, 'lambda': 0.0013806290894327314, 'alpha': 0.004836910624548605, 'scale_pos_weight': 1.599625055404122}. Best is trial 0 with value: 0.9509918617997795.
[I 2024-10-12 19:30:58,394] Trial 2 finished with value: 0.9565239866581974 and parameters: {'max_depth': 8, 'learning_rate': 0.08196270349822926, 'n_estimators

Best trial: {'max_depth': 6, 'learning_rate': 0.08712961838528847, 'n_estimators': 435, 'subsample': 0.9168468853937433, 'colsample_bytree': 0.7801433420045394, 'gamma': 0.21389666989069075, 'lambda': 7.568525836977652e-05, 'alpha': 0.0005838263233074763, 'scale_pos_weight': 1.005561624866418}


In [78]:
xgb_best_params = xgb_study.best_trial.params
xgb_best_score = xgb_study.best_trial.value
xgb_best_score

0.9578972528897483

prev: 0.9576051262733373

## CatBoostClassifier

In [80]:
from catboost import CatBoostClassifier

def cat_objective(trial):

    # Define the hyperparameter search space

    param = {
        'depth': trial.suggest_int('depth', 4, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'iterations': trial.suggest_int('iterations', 200, 1000),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-8, 10.0, log=True),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'random_strength': trial.suggest_float('random_strength', 0.0, 1.0),
        'scale_pos_weight': trial.suggest_float('scale_pos_weight', 1.0, 3.0),
        'eval_metric': 'AUC',  
        'logging_level': 'Silent',  # Disable CatBoost output
        'task_type': 'CPU',  # Try using GPU
        'use_best_model': True
    }

    # Initialize the CatBoost model with the trial's parameters
    model = CatBoostClassifier(**param)

    # Fit the model
    model.fit(X_train_prep, y_train_res,
              eval_set=[(X_test_prep, y_test)],
              early_stopping_rounds=20,  # Early stopping to prevent overfitting
              verbose=False)

    # Get predictions and calculate AUC score
    y_pred_prob = model.predict_proba(X_test_prep)[:, 1]  # Probabilities for class 1

    auc = roc_auc_score(y_test, y_pred_prob)

    return auc
    
cat_study = optuna.create_study(direction='maximize')  # We want to maximize AUC
cat_study.optimize(cat_objective, n_trials=50)

# Output the best trial
print(f"Best trial: {cat_study.best_trial.params}")

[I 2024-10-12 19:40:42,509] A new study created in memory with name: no-name-d2997b63-b960-42da-8418-d17026932fee
[I 2024-10-12 19:40:49,576] Trial 0 finished with value: 0.9516252389061267 and parameters: {'depth': 7, 'learning_rate': 0.14923111169375491, 'iterations': 220, 'l2_leaf_reg': 9.175977789900557e-06, 'border_count': 235, 'bagging_temperature': 0.2819519131072058, 'random_strength': 0.38014451113574343, 'scale_pos_weight': 2.0650612435134765}. Best is trial 0 with value: 0.9516252389061267.
[I 2024-10-12 19:41:04,769] Trial 1 finished with value: 0.9449467364130162 and parameters: {'depth': 7, 'learning_rate': 0.05129892236922186, 'iterations': 853, 'l2_leaf_reg': 8.211830203332446e-06, 'border_count': 37, 'bagging_temperature': 0.976816357191858, 'random_strength': 0.17116699653918355, 'scale_pos_weight': 2.9830174834750443}. Best is trial 0 with value: 0.9516252389061267.
[I 2024-10-12 19:41:13,082] Trial 2 finished with value: 0.9482456963745374 and parameters: {'depth': 

Best trial: {'depth': 4, 'learning_rate': 0.29883221631968115, 'iterations': 490, 'l2_leaf_reg': 0.3645825533719849, 'border_count': 231, 'bagging_temperature': 0.7809357127963482, 'random_strength': 0.6205304341507932, 'scale_pos_weight': 1.9590564128414854}


In [81]:
cat_best_params = cat_study.best_trial.params
cat_best_score = cat_study.best_trial.value
cat_best_score

0.9585876155837739

prev: 0.9601213839193278

## LightGBM

In [82]:
# Objective function for LightGBM

def lgb_objective(trial):
    param = {
        'objective': 'binary',
        'metric': 'auc',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'num_leaves': trial.suggest_int('num_leaves', 20, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'min_child_weight': trial.suggest_float('min_child_weight', 1e-3, 10.0),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
    }

    lgb_model = lgb.LGBMClassifier(**param)
  

    # Train the model
    lgb_model.fit(X_train_prep, y_train_res,
                  eval_set=[(X_test_prep, y_test)],
                  eval_metric='auc'
                  )

    # Predict and evaluate AUC
    y_pred_prob = lgb_model.predict_proba(X_test_prep)[:, 1]
    auc = roc_auc_score(y_test, y_pred_prob)

    return auc

# Optimize the objective function
lgb_study = optuna.create_study(direction='maximize')
lgb_study.optimize(lgb_objective, n_trials=50)

# Best parameters and AUC
print("Best LGBM Params: ", lgb_study.best_trial.params)
print("Best AUC for LGBM: ", lgb_study.best_value)

[I 2024-10-12 19:47:59,692] A new study created in memory with name: no-name-2288c113-9bce-4922-9ca4-d5444d67e626


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010680 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:48:08,093] Trial 0 finished with value: 0.9532816689263568 and parameters: {'learning_rate': 0.14660732960628925, 'num_leaves': 122, 'max_depth': 15, 'min_child_samples': 67, 'min_child_weight': 1.9934250980471022, 'subsample': 0.595440920984583, 'colsample_bytree': 0.5226370318101002, 'n_estimators': 695}. Best is trial 0 with value: 0.9532816689263568.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010882 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:48:11,215] Trial 1 finished with value: 0.9598445900135015 and parameters: {'learning_rate': 0.21616636469118897, 'num_leaves': 88, 'max_depth': 3, 'min_child_samples': 42, 'min_child_weight': 4.3746928508639265, 'subsample': 0.6870420272315677, 'colsample_bytree': 0.7644315655489906, 'n_estimators': 538}. Best is trial 1 with value: 0.9598445900135015.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005304 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:48:16,498] Trial 2 finished with value: 0.9604328451429857 and parameters: {'learning_rate': 0.11968014237688865, 'num_leaves': 89, 'max_depth': 4, 'min_child_samples': 85, 'min_child_weight': 3.5413493409988774, 'subsample': 0.8341827260887544, 'colsample_bytree': 0.8128581709520988, 'n_estimators': 829}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:48:25,198] Trial 3 finished with value: 0.95311453646102 and parameters: {'learning_rate': 0.20333670891129133, 'num_leaves': 240, 'max_depth': 11, 'min_child_samples': 44, 'min_child_weight': 9.113123153210339, 'subsample': 0.5707408981661324, 'colsample_bytree': 0.6735561005209829, 'n_estimators': 738}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005345 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:48:29,714] Trial 4 finished with value: 0.9557175452811056 and parameters: {'learning_rate': 0.1874151477533191, 'num_leaves': 146, 'max_depth': 8, 'min_child_samples': 99, 'min_child_weight': 3.9645995293705556, 'subsample': 0.8023430390440767, 'colsample_bytree': 0.7583576297084353, 'n_estimators': 466}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003052 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:48:35,297] Trial 5 finished with value: 0.9519216870973357 and parameters: {'learning_rate': 0.25321090725034445, 'num_leaves': 81, 'max_depth': 10, 'min_child_samples': 46, 'min_child_weight': 1.824211665233154, 'subsample': 0.7914244066440297, 'colsample_bytree': 0.8727504548522098, 'n_estimators': 543}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005302 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:48:37,547] Trial 6 finished with value: 0.9577860996179586 and parameters: {'learning_rate': 0.06207729615160702, 'num_leaves': 219, 'max_depth': 8, 'min_child_samples': 96, 'min_child_weight': 3.6856734701275284, 'subsample': 0.7809981674911444, 'colsample_bytree': 0.7731087147817586, 'n_estimators': 198}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006244 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:48:39,270] Trial 7 finished with value: 0.9597126695028194 and parameters: {'learning_rate': 0.1795478073884005, 'num_leaves': 195, 'max_depth': 7, 'min_child_samples': 8, 'min_child_weight': 3.4676179151279127, 'subsample': 0.7069035828922743, 'colsample_bytree': 0.6539393197267953, 'n_estimators': 179}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002878 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:48:46,558] Trial 8 finished with value: 0.9514481608987891 and parameters: {'learning_rate': 0.29858807690174055, 'num_leaves': 183, 'max_depth': 12, 'min_child_samples': 9, 'min_child_weight': 8.288676213167886, 'subsample': 0.8881587716228716, 'colsample_bytree': 0.7715990471463263, 'n_estimators': 569}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007878 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:48:49,058] Trial 9 finished with value: 0.95622792918471 and parameters: {'learning_rate': 0.13085161255622174, 'num_leaves': 269, 'max_depth': 3, 'min_child_samples': 87, 'min_child_weight': 2.3766393632308067, 'subsample': 0.611393591615223, 'colsample_bytree': 0.6591588791284837, 'n_estimators': 414}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007671 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:48:56,998] Trial 10 finished with value: 0.9593806304791649 and parameters: {'learning_rate': 0.026573993190084175, 'num_leaves': 33, 'max_depth': 6, 'min_child_samples': 71, 'min_child_weight': 0.07421608006341174, 'subsample': 0.9846396982974885, 'colsample_bytree': 0.9847454683776178, 'n_estimators': 975}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005764 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:49:02,345] Trial 11 finished with value: 0.9588595787977716 and parameters: {'learning_rate': 0.09688227980133908, 'num_leaves': 75, 'max_depth': 3, 'min_child_samples': 30, 'min_child_weight': 6.562993759416261, 'subsample': 0.6863652188148863, 'colsample_bytree': 0.8658742474321569, 'n_estimators': 956}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005409 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:49:08,214] Trial 12 finished with value: 0.9573229215190652 and parameters: {'learning_rate': 0.24163741070411277, 'num_leaves': 105, 'max_depth': 5, 'min_child_samples': 65, 'min_child_weight': 5.8755320108850535, 'subsample': 0.8886781105130725, 'colsample_bytree': 0.8639693370373462, 'n_estimators': 819}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003558 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:49:10,978] Trial 13 finished with value: 0.958629457899763 and parameters: {'learning_rate': 0.10496446467756873, 'num_leaves': 36, 'max_depth': 5, 'min_child_samples': 31, 'min_child_weight': 5.281634461468192, 'subsample': 0.511858008173744, 'colsample_bytree': 0.9521826784512022, 'n_estimators': 348}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006973 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:49:14,632] Trial 14 finished with value: 0.960008004740519 and parameters: {'learning_rate': 0.21927290668486676, 'num_leaves': 72, 'max_depth': 3, 'min_child_samples': 81, 'min_child_weight': 6.97512068137703, 'subsample': 0.8697342398157587, 'colsample_bytree': 0.547843830643193, 'n_estimators': 643}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005131 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:49:20,234] Trial 15 finished with value: 0.9599906237218794 and parameters: {'learning_rate': 0.15798213693173546, 'num_leaves': 142, 'max_depth': 5, 'min_child_samples': 81, 'min_child_weight': 7.254844488799735, 'subsample': 0.8713649459503615, 'colsample_bytree': 0.5306564614718892, 'n_estimators': 796}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005151 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:49:24,467] Trial 16 finished with value: 0.9588778122914398 and parameters: {'learning_rate': 0.27227093316974466, 'num_leaves': 54, 'max_depth': 4, 'min_child_samples': 83, 'min_child_weight': 7.60406013880848, 'subsample': 0.9403552058423377, 'colsample_bytree': 0.5942126021750185, 'n_estimators': 655}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008556 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:49:31,096] Trial 17 finished with value: 0.9602670861096392 and parameters: {'learning_rate': 0.07999335927064621, 'num_leaves': 20, 'max_depth': 14, 'min_child_samples': 58, 'min_child_weight': 9.139621979429412, 'subsample': 0.8495950086822445, 'colsample_bytree': 0.5885461222045375, 'n_estimators': 888}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005825 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:49:39,427] Trial 18 finished with value: 0.9585755151743397 and parameters: {'learning_rate': 0.015121112850685708, 'num_leaves': 38, 'max_depth': 15, 'min_child_samples': 57, 'min_child_weight': 9.688013028211568, 'subsample': 0.8305920186885138, 'colsample_bytree': 0.7063512174548601, 'n_estimators': 873}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005302 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:49:45,494] Trial 19 finished with value: 0.9592762970076041 and parameters: {'learning_rate': 0.07011453979989957, 'num_leaves': 20, 'max_depth': 13, 'min_child_samples': 56, 'min_child_weight': 0.541153853575751, 'subsample': 0.7375757403916396, 'colsample_bytree': 0.8237865651874083, 'n_estimators': 885}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010279 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:49:58,097] Trial 20 finished with value: 0.9572849153407049 and parameters: {'learning_rate': 0.06029903387421163, 'num_leaves': 114, 'max_depth': 13, 'min_child_samples': 71, 'min_child_weight': 5.245808766723103, 'subsample': 0.9453283124347691, 'colsample_bytree': 0.6034303422834348, 'n_estimators': 988}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005259 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:50:05,253] Trial 21 finished with value: 0.9571843469671875 and parameters: {'learning_rate': 0.11070721898162574, 'num_leaves': 59, 'max_depth': 9, 'min_child_samples': 89, 'min_child_weight': 8.863720581510599, 'subsample': 0.8443058545940185, 'colsample_bytree': 0.5948273531013109, 'n_estimators': 761}. Best is trial 2 with value: 0.9604328451429857.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004636 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:50:10,197] Trial 22 finished with value: 0.9609191584669619 and parameters: {'learning_rate': 0.08183082759159926, 'num_leaves': 64, 'max_depth': 6, 'min_child_samples': 72, 'min_child_weight': 6.58015466678275, 'subsample': 0.9264934033935981, 'colsample_bytree': 0.507869285171017, 'n_estimators': 632}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004632 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:50:17,596] Trial 23 finished with value: 0.9602618765400198 and parameters: {'learning_rate': 0.08447291829834881, 'num_leaves': 98, 'max_depth': 7, 'min_child_samples': 75, 'min_child_weight': 9.982868200407882, 'subsample': 0.9408058639398523, 'colsample_bytree': 0.5137786521838017, 'n_estimators': 875}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005187 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:50:23,802] Trial 24 finished with value: 0.960523823172431 and parameters: {'learning_rate': 0.040196370136255405, 'num_leaves': 55, 'max_depth': 10, 'min_child_samples': 61, 'min_child_weight': 6.29882555112451, 'subsample': 0.9793825964056134, 'colsample_bytree': 0.5751891466854018, 'n_estimators': 617}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005408 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:50:31,601] Trial 25 finished with value: 0.9598465554420395 and parameters: {'learning_rate': 0.03424845623015611, 'num_leaves': 128, 'max_depth': 10, 'min_child_samples': 62, 'min_child_weight': 5.916322633339458, 'subsample': 0.9916918890036458, 'colsample_bytree': 0.8071329855200181, 'n_estimators': 607}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002960 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:50:35,843] Trial 26 finished with value: 0.959353706476177 and parameters: {'learning_rate': 0.044324473123954364, 'num_leaves': 170, 'max_depth': 6, 'min_child_samples': 77, 'min_child_weight': 6.171371761606662, 'subsample': 0.9156051203528192, 'colsample_bytree': 0.7154002987774657, 'n_estimators': 484}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005116 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:50:38,603] Trial 27 finished with value: 0.9589845137491909 and parameters: {'learning_rate': 0.11928440320346786, 'num_leaves': 55, 'max_depth': 8, 'min_child_samples': 94, 'min_child_weight': 8.006770258316513, 'subsample': 0.9625248712620327, 'colsample_bytree': 0.5539620070594213, 'n_estimators': 302}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003505 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:50:44,426] Trial 28 finished with value: 0.9579457492469331 and parameters: {'learning_rate': 0.1387545221649601, 'num_leaves': 62, 'max_depth': 6, 'min_child_samples': 48, 'min_child_weight': 2.9498011783878963, 'subsample': 0.9100184815991301, 'colsample_bytree': 0.9255123109771165, 'n_estimators': 725}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008984 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:50:53,001] Trial 29 finished with value: 0.9599081230829968 and parameters: {'learning_rate': 0.048521122241884304, 'num_leaves': 300, 'max_depth': 10, 'min_child_samples': 67, 'min_child_weight': 4.728742701003569, 'subsample': 0.9923952114051227, 'colsample_bytree': 0.5012253365236276, 'n_estimators': 685}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005216 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:50:56,840] Trial 30 finished with value: 0.9595405879463441 and parameters: {'learning_rate': 0.09052548674956958, 'num_leaves': 123, 'max_depth': 4, 'min_child_samples': 36, 'min_child_weight': 1.3755898686566606, 'subsample': 0.9126469605288727, 'colsample_bytree': 0.6284681650560613, 'n_estimators': 606}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010593 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:51:02,616] Trial 31 finished with value: 0.9603784525001863 and parameters: {'learning_rate': 0.0809501612537179, 'num_leaves': 21, 'max_depth': 14, 'min_child_samples': 61, 'min_child_weight': 6.7797886385111195, 'subsample': 0.8376829043903384, 'colsample_bytree': 0.571157063767029, 'n_estimators': 831}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009868 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:51:13,222] Trial 32 finished with value: 0.9585995502341748 and parameters: {'learning_rate': 0.010953095220362724, 'num_leaves': 90, 'max_depth': 15, 'min_child_samples': 51, 'min_child_weight': 6.571454173206597, 'subsample': 0.8106173918353472, 'colsample_bytree': 0.5615375919918362, 'n_estimators': 805}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005500 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:51:19,306] Trial 33 finished with value: 0.9565858147776357 and parameters: {'learning_rate': 0.15862782509142667, 'num_leaves': 40, 'max_depth': 12, 'min_child_samples': 73, 'min_child_weight': 4.474412846207755, 'subsample': 0.7665757218111031, 'colsample_bytree': 0.7171071925167343, 'n_estimators': 716}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005123 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:51:26,201] Trial 34 finished with value: 0.9572372951384107 and parameters: {'learning_rate': 0.12350342733946627, 'num_leaves': 47, 'max_depth': 11, 'min_child_samples': 61, 'min_child_weight': 5.500033032738357, 'subsample': 0.8278152012075801, 'colsample_bytree': 0.6287010411921856, 'n_estimators': 773}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005117 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:51:29,428] Trial 35 finished with value: 0.9579159718205854 and parameters: {'learning_rate': 0.07157789230249631, 'num_leaves': 71, 'max_depth': 4, 'min_child_samples': 89, 'min_child_weight': 6.816025292522282, 'subsample': 0.9639946853112861, 'colsample_bytree': 0.5643889127329417, 'n_estimators': 489}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007082 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:51:36,422] Trial 36 finished with value: 0.9602672281888108 and parameters: {'learning_rate': 0.05151946329384437, 'num_leaves': 87, 'max_depth': 9, 'min_child_samples': 69, 'min_child_weight': 8.07696977059213, 'subsample': 0.7428827028167301, 'colsample_bytree': 0.5278441224299498, 'n_estimators': 681}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009324 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:51:44,155] Trial 37 finished with value: 0.9586139475902141 and parameters: {'learning_rate': 0.1010491540015657, 'num_leaves': 143, 'max_depth': 7, 'min_child_samples': 53, 'min_child_weight': 4.170034307540612, 'subsample': 0.8629832881114072, 'colsample_bytree': 0.807354490623645, 'n_estimators': 827}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004666 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:51:47,992] Trial 38 finished with value: 0.9593743316358977 and parameters: {'learning_rate': 0.14509424087953632, 'num_leaves': 21, 'max_depth': 14, 'min_child_samples': 42, 'min_child_weight': 3.1436516378311907, 'subsample': 0.8007243555443573, 'colsample_bytree': 0.6904666843159316, 'n_estimators': 545}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005425 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:51:57,859] Trial 39 finished with value: 0.9598295296213288 and parameters: {'learning_rate': 0.03134030366836396, 'num_leaves': 70, 'max_depth': 11, 'min_child_samples': 16, 'min_child_weight': 7.4534845373952425, 'subsample': 0.6572468344439175, 'colsample_bytree': 0.7331596641633793, 'n_estimators': 918}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011446 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:52:02,403] Trial 40 finished with value: 0.9603838988684247 and parameters: {'learning_rate': 0.07624255007445743, 'num_leaves': 106, 'max_depth': 9, 'min_child_samples': 79, 'min_child_weight': 4.836351405851758, 'subsample': 0.9018648534453518, 'colsample_bytree': 0.6159958414133191, 'n_estimators': 423}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005121 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:52:06,503] Trial 41 finished with value: 0.9605148011450442 and parameters: {'learning_rate': 0.07571823166153391, 'num_leaves': 102, 'max_depth': 8, 'min_child_samples': 78, 'min_child_weight': 4.6977772734204, 'subsample': 0.9029458466759348, 'colsample_bytree': 0.6339121454865866, 'n_estimators': 415}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010359 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:52:10,974] Trial 42 finished with value: 0.9603572827036416 and parameters: {'learning_rate': 0.058023689734578554, 'num_leaves': 107, 'max_depth': 9, 'min_child_samples': 77, 'min_child_weight': 3.8457167113215167, 'subsample': 0.8966192545317991, 'colsample_bytree': 0.628514172881576, 'n_estimators': 409}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012426 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:52:13,559] Trial 43 finished with value: 0.9590797304739174 and parameters: {'learning_rate': 0.1125040855332061, 'num_leaves': 89, 'max_depth': 8, 'min_child_samples': 85, 'min_child_weight': 4.924655651465339, 'subsample': 0.92482013760574, 'colsample_bytree': 0.6388008552919747, 'n_estimators': 241}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005124 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:52:18,286] Trial 44 finished with value: 0.9597170265974104 and parameters: {'learning_rate': 0.07073467612354901, 'num_leaves': 129, 'max_depth': 9, 'min_child_samples': 95, 'min_child_weight': 2.5488231893325053, 'subsample': 0.968122532689343, 'colsample_bytree': 0.6576019628291905, 'n_estimators': 426}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004011 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:52:21,519] Trial 45 finished with value: 0.9592160791187758 and parameters: {'learning_rate': 0.0933114239867707, 'num_leaves': 212, 'max_depth': 7, 'min_child_samples': 99, 'min_child_weight': 5.619915031311465, 'subsample': 0.8819746105394844, 'colsample_bytree': 0.6820461848133258, 'n_estimators': 354}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005300 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:52:23,055] Trial 46 finished with value: 0.9583511484827745 and parameters: {'learning_rate': 0.17241413713666473, 'num_leaves': 98, 'max_depth': 8, 'min_child_samples': 92, 'min_child_weight': 4.507096558547017, 'subsample': 0.9475435485331537, 'colsample_bytree': 0.8899391601524685, 'n_estimators': 133}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010577 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:52:28,452] Trial 47 finished with value: 0.9565413913566988 and parameters: {'learning_rate': 0.130968279924937, 'num_leaves': 167, 'max_depth': 10, 'min_child_samples': 79, 'min_child_weight': 3.5608649659387415, 'subsample': 0.9263449397257549, 'colsample_bytree': 0.5383165908622122, 'n_estimators': 447}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005304 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:52:32,889] Trial 48 finished with value: 0.9591129533201724 and parameters: {'learning_rate': 0.040054877681874164, 'num_leaves': 80, 'max_depth': 6, 'min_child_samples': 65, 'min_child_weight': 5.915853976004982, 'subsample': 0.8971753640297574, 'colsample_bytree': 0.7533253483685995, 'n_estimators': 511}. Best is trial 22 with value: 0.9609191584669619.


[LightGBM] [Info] Number of positive: 17425, number of negative: 58086
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006185 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1328
[LightGBM] [Info] Number of data points in the train set: 75511, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.230761 -> initscore=-1.204019
[LightGBM] [Info] Start training from score -1.204019


[I 2024-10-12 19:52:37,419] Trial 49 finished with value: 0.9511333844944644 and parameters: {'learning_rate': 0.01937644423109847, 'num_leaves': 117, 'max_depth': 5, 'min_child_samples': 84, 'min_child_weight': 5.074722678393634, 'subsample': 0.9747327043737702, 'colsample_bytree': 0.6123245916882544, 'n_estimators': 575}. Best is trial 22 with value: 0.9609191584669619.


Best LGBM Params:  {'learning_rate': 0.08183082759159926, 'num_leaves': 64, 'max_depth': 6, 'min_child_samples': 72, 'min_child_weight': 6.58015466678275, 'subsample': 0.9264934033935981, 'colsample_bytree': 0.507869285171017, 'n_estimators': 632}
Best AUC for LGBM:  0.9609191584669619


In [83]:
lgb_best_params = lgb_study.best_trial.params
lgb_best_score = lgb_study.best_value
lgb_best_score

0.9609191584669619

prev: 0.9607718460460408

## ExtraTreesClassifier

In [84]:
from sklearn.ensemble import ExtraTreesClassifier

def extratrees_objective(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 5, 20),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
        'max_features': trial.suggest_float('max_features', 0.1, 1.0)
    }

    model = ExtraTreesClassifier(**param)
    score = cross_val_score(model, X_train_prep, y_train_res, cv=3, scoring='roc_auc').mean()

    return score

extratrees_study = optuna.create_study(direction='maximize')
extratrees_study.optimize(extratrees_objective, n_trials=50, timeout=600)

print('Best parameters for ExtraTrees:', extratrees_study.best_params)

[I 2024-10-12 19:52:37,455] A new study created in memory with name: no-name-3cf88c50-ee8b-4551-ace4-0e9fdd49d23f
[I 2024-10-12 19:53:39,265] Trial 0 finished with value: 0.9371687502088122 and parameters: {'n_estimators': 400, 'max_depth': 17, 'min_samples_split': 4, 'min_samples_leaf': 7, 'max_features': 0.3867833531287491}. Best is trial 0 with value: 0.9371687502088122.
[I 2024-10-12 19:54:22,151] Trial 1 finished with value: 0.9432823286929178 and parameters: {'n_estimators': 193, 'max_depth': 18, 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_features': 0.4548813453061207}. Best is trial 1 with value: 0.9432823286929178.
[I 2024-10-12 19:54:42,847] Trial 2 finished with value: 0.9108602505621208 and parameters: {'n_estimators': 145, 'max_depth': 5, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 0.7864539322777361}. Best is trial 1 with value: 0.9432823286929178.
[I 2024-10-12 19:56:03,913] Trial 3 finished with value: 0.9385142682873622 and parameters: {'n_es

Best parameters for ExtraTrees: {'n_estimators': 373, 'max_depth': 17, 'min_samples_split': 8, 'min_samples_leaf': 1, 'max_features': 0.6716623956987676}


In [85]:
extratrees_best_params = extratrees_study.best_params
extratrees_best_score = extratrees_study.best_value
extratrees_best_score

0.9454722065252454

Prev: 0.9298212453316017

## LogisticRegression

In [86]:
from sklearn.linear_model import LogisticRegression

def logreg_objective(trial):
    
    param = {
        'penalty': 'elasticnet',
        'solver' : 'saga',
        'C': trial.suggest_float('C', 1e-4, 1e2, log=True),
        'l1_ratio': trial.suggest_float('l1_ratio', 0, 1)
    }

    model = LogisticRegression(**param, max_iter=1000)
    score = cross_val_score(model, X_train_prep, y_train_res, cv=3, scoring='roc_auc').mean()
    return score

logreg_study = optuna.create_study(direction='maximize')
logreg_study.optimize(logreg_objective, n_trials=50, timeout=600)
print('Best parameters for Logistic Regression:', logreg_study.best_params)

[I 2024-10-12 20:03:44,828] A new study created in memory with name: no-name-6c70cb0f-4583-452c-8958-082284b5c602
[I 2024-10-12 20:04:07,519] Trial 0 finished with value: 0.8949578149433436 and parameters: {'C': 60.539495455962374, 'l1_ratio': 0.9390717209437502}. Best is trial 0 with value: 0.8949578149433436.
[I 2024-10-12 20:04:27,531] Trial 1 finished with value: 0.8949582417887815 and parameters: {'C': 20.40603404997764, 'l1_ratio': 0.977805112586921}. Best is trial 1 with value: 0.8949582417887815.
[I 2024-10-12 20:04:37,758] Trial 2 finished with value: 0.8796284810536301 and parameters: {'C': 0.0007736395458854829, 'l1_ratio': 0.3291803063019507}. Best is trial 1 with value: 0.8949582417887815.
[I 2024-10-12 20:05:01,707] Trial 3 finished with value: 0.8949579779890876 and parameters: {'C': 60.74454912443483, 'l1_ratio': 0.24245475272025174}. Best is trial 1 with value: 0.8949582417887815.
[I 2024-10-12 20:06:17,723] Trial 4 finished with value: 0.8949748230772966 and parameter

Best parameters for Logistic Regression: {'C': 0.034635673794836994, 'l1_ratio': 0.667050998518569}


In [87]:
logreg_best_params = logreg_study.best_params
logreg_best_params['penalty'] = 'elasticnet'
logreg_best_params['solver'] = 'saga'
logreg_best_score = logreg_study.best_value
logreg_best_score

0.8951398196537403

prev: 0.8814172446287084

## HistGradientBoostingClassifier

In [88]:
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier

def histgb_objective(trial):
    param = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
        'max_iter': trial.suggest_int('max_iter', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 5, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 10, 50),
        'max_leaf_nodes': trial.suggest_int('max_leaf_nodes', 10, 50),
        'l2_regularization': trial.suggest_float('l2_regularization', 1e-5, 1.0, log=True)
    }

    model = HistGradientBoostingClassifier(**param)
    score = cross_val_score(model, X_train_prep, y_train_res, cv=3, scoring='roc_auc').mean()

    return score

histgb_study = optuna.create_study(direction='maximize')
histgb_study.optimize(histgb_objective, n_trials=50, timeout=600)
print('Best parameters for HistGradientBoosting:', histgb_study.best_params)

[I 2024-10-12 20:14:31,639] A new study created in memory with name: no-name-a52446d3-06ca-4ec4-995b-c11e8809ba86
[I 2024-10-12 20:14:58,204] Trial 0 finished with value: 0.9527105434383119 and parameters: {'learning_rate': 0.015891334732469724, 'max_iter': 392, 'max_depth': 9, 'min_samples_leaf': 31, 'max_leaf_nodes': 21, 'l2_regularization': 0.07851274814154947}. Best is trial 0 with value: 0.9527105434383119.
[I 2024-10-12 20:15:23,420] Trial 1 finished with value: 0.9594885638898828 and parameters: {'learning_rate': 0.038004830266586756, 'max_iter': 410, 'max_depth': 17, 'min_samples_leaf': 12, 'max_leaf_nodes': 44, 'l2_regularization': 0.02165795937237716}. Best is trial 1 with value: 0.9594885638898828.
[I 2024-10-12 20:15:37,556] Trial 2 finished with value: 0.9562189548392435 and parameters: {'learning_rate': 0.036554249284055394, 'max_iter': 217, 'max_depth': 10, 'min_samples_leaf': 28, 'max_leaf_nodes': 27, 'l2_regularization': 5.864025123950696e-05}. Best is trial 1 with val

Best parameters for HistGradientBoosting: {'learning_rate': 0.09941268072767732, 'max_iter': 451, 'max_depth': 20, 'min_samples_leaf': 19, 'max_leaf_nodes': 34, 'l2_regularization': 0.00045378231647806653}


In [89]:
histgb_best_params = histgb_study.best_trial.params
histgb_best_score = histgb_study.best_trial.value
histgb_best_score

0.9601709868096956

Prev: 0.9519987809872763

## KNN

In [90]:
from sklearn.neighbors import KNeighborsClassifier

def knn_objective(trial):
    param = {
        'n_neighbors': trial.suggest_int('n_neighbors', 3, 15),
        'leaf_size': trial.suggest_int('leaf_size', 20, 50),
        'p': trial.suggest_categorical('p', [1, 2]),  # Minkowski distance parameter
        'weights': trial.suggest_categorical('weights', ['uniform', 'distance'])
    }

    model = KNeighborsClassifier(**param)
    score = cross_val_score(model, X_train_prep, y_train_res, cv=3, scoring='roc_auc').mean()

    return score

knn_study = optuna.create_study(direction='maximize')
knn_study.optimize(knn_objective, n_trials=50, timeout=600)
print('Best parameters for KNN:', knn_study.best_params)
print('Best score for KNN:', knn_study.best_value)

[I 2024-10-12 20:24:38,186] A new study created in memory with name: no-name-34d1daae-4bb1-494a-a072-e1c104fe4712
[I 2024-10-12 20:25:36,573] Trial 0 finished with value: 0.9133490596201589 and parameters: {'n_neighbors': 11, 'leaf_size': 46, 'p': 1, 'weights': 'uniform'}. Best is trial 0 with value: 0.9133490596201589.
[I 2024-10-12 20:26:30,773] Trial 1 finished with value: 0.910168120147794 and parameters: {'n_neighbors': 8, 'leaf_size': 35, 'p': 1, 'weights': 'distance'}. Best is trial 0 with value: 0.9133490596201589.
[I 2024-10-12 20:26:46,738] Trial 2 finished with value: 0.9020045107791387 and parameters: {'n_neighbors': 8, 'leaf_size': 28, 'p': 2, 'weights': 'uniform'}. Best is trial 0 with value: 0.9133490596201589.
[I 2024-10-12 20:27:42,534] Trial 3 finished with value: 0.910168120147794 and parameters: {'n_neighbors': 8, 'leaf_size': 38, 'p': 1, 'weights': 'distance'}. Best is trial 0 with value: 0.9133490596201589.
[I 2024-10-12 20:27:59,012] Trial 4 finished with value: 

Best parameters for KNN: {'n_neighbors': 13, 'leaf_size': 24, 'p': 1, 'weights': 'distance'}
Best score for KNN: 0.9182720698807708


In [91]:
knn_best_params = knn_study.best_trial.params
knn_best_score = knn_study.best_trial.value
knn_best_score

0.9182720698807708

Prev: 0.9003336325395043

# Stacking

In [92]:
from sklearn.ensemble import StackingClassifier

# Init base models

xgb_model = xgb.XGBClassifier(**xgb_best_params)
cat_model = CatBoostClassifier(**cat_best_params)
lgb_model = lgb.LGBMClassifier(**lgb_best_params)
extratrees_model = ExtraTreesClassifier(**extratrees_best_params)
logreg_model = LogisticRegression(**logreg_best_params)
histgb_model = HistGradientBoostingClassifier(**histgb_best_params)
knn_model = KNeighborsClassifier(**knn_best_params)

# Base and meta models
base_estimators = [
	('xgb', xgb_model),
	('cat', cat_model),
	('lgb', lgb_model),
	('extratrees', extratrees_model),
	('logreg', logreg_model),
	('histgb', histgb_model),
	('knn', knn_model)
]

meta_model = LogisticRegression()

# Stack

stack_model = StackingClassifier(estimators=base_estimators, final_estimator=meta_model, cv=5, n_jobs=-1)

stack_model.fit(X_train_prep, y_train_res)

In [93]:
print('Stacking Classifier ROC AUC:', roc_auc_score(y_test, stack_model.predict_proba(X_test_prep)[:, 1]))

Stacking Classifier ROC AUC: 0.9607975623760714


Prev: 0.9614965682196932

In [None]:
# Choose different base models

base_estimators = [
	('xgb', xgb_model),
	('cat', cat_model),
	('lgb', lgb_model),
	('histgb', histgb_model),
	('knn', knn_model)

]

meta_model = lgb.LGBMClassifier()

# Stack
stack_model_2 = StackingClassifier(estimators=base_estimators, final_estimator=meta_model, cv=3, n_jobs=-1)

stack_model_2.fit(X_train_prep, y_train_res)

print('Stacking Classifier ROC AUC:', roc_auc_score(y_test, stack_model_2.predict_proba(X_test_prep)[:, 1]))

# Submission

In [65]:
smote = SMOTENC(sampling_strategy=0.3, categorical_features=[X.columns.get_loc(col) for col in categorical_columns], random_state=2024)
X_res, y_res = smote.fit_resample(X, y)

X_train_all = preprocessor.transform(X_res)

stack_model.fit(X_train_all, y_res)

In [67]:
import joblib

try:
    joblib.dump(stack_model, '../models/stack_model_03_smote.pkl')
except:
    joblib.dump(stack_model, 'stack_model_03_smote.pkl')
    
X_to_pred = preprocessor.transform(data_to_predict)
y_pred_submit = stack_model.predict_proba(X_to_pred)[:, 1]

submission = pd.DataFrame({'id': data_to_predict['id'], 'loan_status': y_pred_submit})
try:
    submission.to_csv('../submissions/stack_model_03_somte.csv', index=False)
except:
    submission.to_csv('stack_model_03_somte.csv', index=False)