In [1]:
# Imports and data loading
!pip install -U imbalanced-learn==0.10.1 scikit-learn==1.2.2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import optuna

from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.metrics import precision_recall_curve, classification_report, fbeta_score, make_scorer
from sklearn.preprocessing import StandardScaler
from sklearn.impute import KNNImputer
from sklearn.impute import SimpleImputer

from imblearn.over_sampling import SMOTE

from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression

import warnings
warnings.filterwarnings('ignore')

# Load datasets
df_train = pd.read_csv('/kaggle/input/creditcsv/cs-training.csv', index_col=0)
df_test = pd.read_csv('/kaggle/input/creditcsv/cs-test.csv', index_col=0)
df_submission = pd.read_csv('/kaggle/input/creditcsv/sampleEntry.csv')

# Basic info
print(df_train.info())
print(df_train.head())

Collecting imbalanced-learn==0.10.1
  Downloading imbalanced_learn-0.10.1-py3-none-any.whl.metadata (8.2 kB)
Downloading imbalanced_learn-0.10.1-py3-none-any.whl (226 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.0/226.0 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: imbalanced-learn
  Attempting uninstall: imbalanced-learn
    Found existing installation: imbalanced-learn 0.13.0
    Uninstalling imbalanced-learn-0.13.0:
      Successfully uninstalled imbalanced-learn-0.13.0
Successfully installed imbalanced-learn-0.10.1
<class 'pandas.core.frame.DataFrame'>
Index: 150000 entries, 1 to 150000
Data columns (total 11 columns):
 #   Column                                Non-Null Count   Dtype  
---  ------                                --------------   -----  
 0   SeriousDlqin2yrs                      150000 non-null  int64  
 1   RevolvingUtilizationOfUnsecuredLines  150000 non-null  float64
 2   age                  

In [5]:
# Advanced data preprocessing and feature engineering

# Impute missing with KNN
imputer = KNNImputer(n_neighbors=5)
df_train_imputed = df_train.copy()
df_train_imputed[['MonthlyIncome', 'NumberOfDependents']] = imputer.fit_transform(df_train_imputed[['MonthlyIncome', 'NumberOfDependents']])

df_test_imputed = df_test.copy()
df_test_imputed[['MonthlyIncome', 'NumberOfDependents']] = imputer.transform(df_test_imputed[['MonthlyIncome', 'NumberOfDependents']])

# Feature engineering example: Debt to income ratio squared, Age buckets
df_train_imputed['DebtRatio_sq'] = df_train_imputed['DebtRatio'] ** 2
df_train_imputed['Age_bucket'] = pd.cut(df_train_imputed['age'], bins=[0,25,40,60,100], labels=[0,1,2,3])

df_test_imputed['DebtRatio_sq'] = df_test_imputed['DebtRatio'] ** 2
df_test_imputed['Age_bucket'] = pd.cut(df_test_imputed['age'], bins=[0,25,40,60,100], labels=[0,1,2,3])

In [2]:
# Prepare data for modeling
target = 'SeriousDlqin2yrs'
features = [c for c in df_train.columns if c != target]

# Si hay categóricas, hacer dummy:
X = pd.get_dummies(df_train[features])

y = df_train[target]

# Imputar faltantes con mediana (en columnas numéricas)
imputer = SimpleImputer(strategy='median')
X_imputed = imputer.fit_transform(X)

# Split
X_train, X_val, y_train, y_val = train_test_split(
    X_imputed, y, test_size=0.2, random_state=42, stratify=y
)

# Escalar
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Balance con SMOTE
smote = SMOTE(random_state=42)
X_train_bal, y_train_bal = smote.fit_resample(X_train_scaled, y_train)

In [5]:
# Define custom F2 scorer and threshold finder

def f2_scorer(y_true, y_pred):
    return fbeta_score(y_true, y_pred, beta=2)

def find_best_threshold_f2(y_true, y_prob):
    precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
    f2_scores = 5 * (precision * recall) / (4 * precision + recall + 1e-9)
    best_idx = np.argmax(f2_scores)
    return thresholds[best_idx], f2_scores[best_idx]


In [6]:
# Hyperparameter tuning with Optuna for LightGBM

def objective(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 2000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0, 1.0),
        'random_state': 42,
        'n_jobs': -1
    }
    
    model = LGBMClassifier(**param)
    model.fit(X_train_bal, y_train_bal)
    y_val_prob = model.predict_proba(X_val_scaled)[:, 1]
    
    best_thresh, best_f2 = find_best_threshold_f2(y_val, y_val_prob)
    return best_f2

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

print("Best params:", study.best_params)
print("Best F2 score:", study.best_value)


[I 2025-06-30 20:18:33,315] A new study created in memory with name: no-name-f2798fc3-fc6a-4b92-9df3-f6d3627a3d47


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011288 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:18:37,589] Trial 0 finished with value: 0.49597469779716935 and parameters: {'n_estimators': 392, 'learning_rate': 0.029657457039949927, 'num_leaves': 20, 'max_depth': 5, 'min_child_samples': 8, 'subsample': 0.7210440763364347, 'colsample_bytree': 0.8188106946965754, 'reg_alpha': 0.7230596559550575, 'reg_lambda': 0.2286419615690013}. Best is trial 0 with value: 0.49597469779716935.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.013909 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:18:51,867] Trial 1 finished with value: 0.5043992715468248 and parameters: {'n_estimators': 1336, 'learning_rate': 0.03977385813151292, 'num_leaves': 33, 'max_depth': 5, 'min_child_samples': 35, 'subsample': 0.6833788252054572, 'colsample_bytree': 0.8409103436138448, 'reg_alpha': 0.5507491022269766, 'reg_lambda': 0.5434834964078473}. Best is trial 1 with value: 0.5043992715468248.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008086 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:19:01,314] Trial 2 finished with value: 0.5053507725884413 and parameters: {'n_estimators': 1353, 'learning_rate': 0.15712469809350219, 'num_leaves': 47, 'max_depth': 3, 'min_child_samples': 56, 'subsample': 0.6912836301002181, 'colsample_bytree': 0.906026531698704, 'reg_alpha': 0.6809119217583998, 'reg_lambda': 0.4433314527708556}. Best is trial 2 with value: 0.5053507725884413.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006430 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:19:09,955] Trial 3 finished with value: 0.5037737228597083 and parameters: {'n_estimators': 456, 'learning_rate': 0.016911376100107206, 'num_leaves': 58, 'max_depth': 15, 'min_child_samples': 82, 'subsample': 0.5783169130056488, 'colsample_bytree': 0.5383668242472461, 'reg_alpha': 0.47346772241117663, 'reg_lambda': 0.6659428338073026}. Best is trial 2 with value: 0.5053507725884413.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007940 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:19:22,831] Trial 4 finished with value: 0.4926888997886834 and parameters: {'n_estimators': 1635, 'learning_rate': 0.15171800241498729, 'num_leaves': 31, 'max_depth': 4, 'min_child_samples': 51, 'subsample': 0.7837760551506967, 'colsample_bytree': 0.8381241206083094, 'reg_alpha': 0.8653326344652772, 'reg_lambda': 0.9265670716603144}. Best is trial 2 with value: 0.5053507725884413.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008074 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:19:43,011] Trial 5 finished with value: 0.5024994401233823 and parameters: {'n_estimators': 1440, 'learning_rate': 0.025124590548402884, 'num_leaves': 134, 'max_depth': 7, 'min_child_samples': 45, 'subsample': 0.9181222362937531, 'colsample_bytree': 0.8748992747981319, 'reg_alpha': 0.4715532917893398, 'reg_lambda': 0.08499208060183916}. Best is trial 2 with value: 0.5053507725884413.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006693 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:19:52,543] Trial 6 finished with value: 0.4943764233839885 and parameters: {'n_estimators': 957, 'learning_rate': 0.011451758015936207, 'num_leaves': 93, 'max_depth': 5, 'min_child_samples': 56, 'subsample': 0.6080422539139221, 'colsample_bytree': 0.5133932864323316, 'reg_alpha': 0.8272313314485435, 'reg_lambda': 0.09974102465779167}. Best is trial 2 with value: 0.5053507725884413.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006600 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:20:10,376] Trial 7 finished with value: 0.49748855044422186 and parameters: {'n_estimators': 1119, 'learning_rate': 0.019359288285812612, 'num_leaves': 93, 'max_depth': 10, 'min_child_samples': 42, 'subsample': 0.8484317764092025, 'colsample_bytree': 0.5056623613350443, 'reg_alpha': 0.08749132223003053, 'reg_lambda': 0.37493086914444373}. Best is trial 2 with value: 0.5053507725884413.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006908 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:20:22,081] Trial 8 finished with value: 0.47190358616321476 and parameters: {'n_estimators': 863, 'learning_rate': 0.11552490495355353, 'num_leaves': 84, 'max_depth': 11, 'min_child_samples': 97, 'subsample': 0.7378300139891472, 'colsample_bytree': 0.6297368554932887, 'reg_alpha': 0.7762657187154467, 'reg_lambda': 0.34802968415463076}. Best is trial 2 with value: 0.5053507725884413.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008584 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:20:26,044] Trial 9 finished with value: 0.49763550351578284 and parameters: {'n_estimators': 276, 'learning_rate': 0.11583629785696013, 'num_leaves': 70, 'max_depth': 13, 'min_child_samples': 92, 'subsample': 0.8128626144295656, 'colsample_bytree': 0.9874814325880434, 'reg_alpha': 0.04247620078045067, 'reg_lambda': 0.9850882955772979}. Best is trial 2 with value: 0.5053507725884413.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008114 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:20:38,992] Trial 10 finished with value: 0.4937377829432882 and parameters: {'n_estimators': 1873, 'learning_rate': 0.26117265770420456, 'num_leaves': 129, 'max_depth': 3, 'min_child_samples': 73, 'subsample': 0.5007616160030841, 'colsample_bytree': 0.7218404078079125, 'reg_alpha': 0.9914291491929935, 'reg_lambda': 0.710070728875124}. Best is trial 2 with value: 0.5053507725884413.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008317 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:20:55,271] Trial 11 finished with value: 0.48879131008155813 and parameters: {'n_estimators': 1342, 'learning_rate': 0.060195423429194365, 'num_leaves': 46, 'max_depth': 8, 'min_child_samples': 24, 'subsample': 0.656915653766017, 'colsample_bytree': 0.9730556558478549, 'reg_alpha': 0.6001902948454767, 'reg_lambda': 0.5440698373231414}. Best is trial 2 with value: 0.5053507725884413.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008196 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:21:17,066] Trial 12 finished with value: 0.48613573474766036 and parameters: {'n_estimators': 1979, 'learning_rate': 0.053954785617748156, 'num_leaves': 40, 'max_depth': 6, 'min_child_samples': 28, 'subsample': 0.6670947690748141, 'colsample_bytree': 0.8996669732655009, 'reg_alpha': 0.2705116124291257, 'reg_lambda': 0.5295698457998884}. Best is trial 2 with value: 0.5053507725884413.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007956 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:21:26,232] Trial 13 finished with value: 0.49876018389451676 and parameters: {'n_estimators': 1295, 'learning_rate': 0.05236880046858721, 'num_leaves': 58, 'max_depth': 3, 'min_child_samples': 63, 'subsample': 0.6973676974078685, 'colsample_bytree': 0.7248265812943416, 'reg_alpha': 0.5957334339341981, 'reg_lambda': 0.7467725680672052}. Best is trial 2 with value: 0.5053507725884413.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008162 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:21:37,445] Trial 14 finished with value: 0.44513797603531896 and parameters: {'n_estimators': 742, 'learning_rate': 0.2640529316568069, 'num_leaves': 114, 'max_depth': 8, 'min_child_samples': 31, 'subsample': 0.5802103067479252, 'colsample_bytree': 0.7886307608165456, 'reg_alpha': 0.312687879376065, 'reg_lambda': 0.38314718000484177}. Best is trial 2 with value: 0.5053507725884413.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.015637 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:21:54,391] Trial 15 finished with value: 0.5078513976356643 and parameters: {'n_estimators': 1654, 'learning_rate': 0.07867235014317286, 'num_leaves': 21, 'max_depth': 3, 'min_child_samples': 12, 'subsample': 0.9656173623278734, 'colsample_bytree': 0.9210578140304333, 'reg_alpha': 0.6392749239894834, 'reg_lambda': 0.5987692812886658}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008247 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:22:05,992] Trial 16 finished with value: 0.5044201765060432 and parameters: {'n_estimators': 1645, 'learning_rate': 0.08839446935949773, 'num_leaves': 25, 'max_depth': 3, 'min_child_samples': 7, 'subsample': 0.974397755410564, 'colsample_bytree': 0.9310613059173953, 'reg_alpha': 0.6573209222301845, 'reg_lambda': 0.8434019966729099}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008163 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:22:28,480] Trial 17 finished with value: 0.4406388297820443 and parameters: {'n_estimators': 1717, 'learning_rate': 0.18648587502869993, 'num_leaves': 53, 'max_depth': 7, 'min_child_samples': 67, 'subsample': 0.8873371421296943, 'colsample_bytree': 0.9179273246931223, 'reg_alpha': 0.36066057711281774, 'reg_lambda': 0.2787848595329867}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008141 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:22:48,236] Trial 18 finished with value: 0.4747806232225817 and parameters: {'n_estimators': 1548, 'learning_rate': 0.07851930657264757, 'num_leaves': 73, 'max_depth': 12, 'min_child_samples': 15, 'subsample': 0.9813633729278315, 'colsample_bytree': 0.6719081782081326, 'reg_alpha': 0.9509352901442459, 'reg_lambda': 0.6327503761695468}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.015983 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:23:01,246] Trial 19 finished with value: 0.46148031032234393 and parameters: {'n_estimators': 1120, 'learning_rate': 0.17906269696873725, 'num_leaves': 48, 'max_depth': 9, 'min_child_samples': 20, 'subsample': 0.7816945962957763, 'colsample_bytree': 0.7754569053902988, 'reg_alpha': 0.6598714950714007, 'reg_lambda': 0.43124413095909064}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008680 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:23:16,024] Trial 20 finished with value: 0.49753229756208633 and parameters: {'n_estimators': 1814, 'learning_rate': 0.09775295618645684, 'num_leaves': 38, 'max_depth': 4, 'min_child_samples': 81, 'subsample': 0.9080662878042655, 'colsample_bytree': 0.9524649054198445, 'reg_alpha': 0.40020240583271544, 'reg_lambda': 0.1698218422800286}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008796 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:23:29,081] Trial 21 finished with value: 0.5038677915350944 and parameters: {'n_estimators': 1553, 'learning_rate': 0.08922096163155743, 'num_leaves': 21, 'max_depth': 3, 'min_child_samples': 11, 'subsample': 0.9879611966218864, 'colsample_bytree': 0.9290194951748669, 'reg_alpha': 0.7073318590865812, 'reg_lambda': 0.8465953412868047}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008273 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:23:43,172] Trial 22 finished with value: 0.5003262994565243 and parameters: {'n_estimators': 1731, 'learning_rate': 0.0743790200565636, 'num_leaves': 20, 'max_depth': 4, 'min_child_samples': 7, 'subsample': 0.9476302669483239, 'colsample_bytree': 0.8791330109957957, 'reg_alpha': 0.6481160593718077, 'reg_lambda': 0.8128395831530806}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008721 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:24:01,519] Trial 23 finished with value: 0.47001677352489996 and parameters: {'n_estimators': 1472, 'learning_rate': 0.13958437766767895, 'num_leaves': 150, 'max_depth': 6, 'min_child_samples': 38, 'subsample': 0.8543717652562635, 'colsample_bytree': 0.9940955946823479, 'reg_alpha': 0.8872728752245237, 'reg_lambda': 0.5970313703552059}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008142 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:24:17,327] Trial 24 finished with value: 0.5036469727089256 and parameters: {'n_estimators': 1982, 'learning_rate': 0.04006210841734117, 'num_leaves': 29, 'max_depth': 3, 'min_child_samples': 19, 'subsample': 0.9460189446640745, 'colsample_bytree': 0.944344855984942, 'reg_alpha': 0.7606594915540676, 'reg_lambda': 0.4409973541905652}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014269 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:24:31,661] Trial 25 finished with value: 0.4885162228098601 and parameters: {'n_estimators': 1228, 'learning_rate': 0.20691528997694986, 'num_leaves': 64, 'max_depth': 4, 'min_child_samples': 51, 'subsample': 0.9949370578169714, 'colsample_bytree': 0.8726171379447892, 'reg_alpha': 0.5316562632579285, 'reg_lambda': 0.8235751490635339}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008492 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:24:51,675] Trial 26 finished with value: 0.47316704429721357 and parameters: {'n_estimators': 1629, 'learning_rate': 0.12099052936278502, 'num_leaves': 44, 'max_depth': 6, 'min_child_samples': 6, 'subsample': 0.8342168897189042, 'colsample_bytree': 0.9089672058911834, 'reg_alpha': 0.6831491135970897, 'reg_lambda': 0.7456886382668055}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007746 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:25:07,013] Trial 27 finished with value: 0.49743213469360154 and parameters: {'n_estimators': 1412, 'learning_rate': 0.0682251846742548, 'num_leaves': 31, 'max_depth': 5, 'min_child_samples': 59, 'subsample': 0.8802627566296649, 'colsample_bytree': 0.8029965486059124, 'reg_alpha': 0.18277146431508473, 'reg_lambda': 0.4558930160217127}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008598 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:25:16,233] Trial 28 finished with value: 0.4961971621950276 and parameters: {'n_estimators': 1199, 'learning_rate': 0.04358309699311516, 'num_leaves': 28, 'max_depth': 3, 'min_child_samples': 15, 'subsample': 0.9504335706736049, 'colsample_bytree': 0.8521644183317295, 'reg_alpha': 0.4325949428582929, 'reg_lambda': 0.9314521909562389}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008344 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:25:33,432] Trial 29 finished with value: 0.49731961719787265 and parameters: {'n_estimators': 1851, 'learning_rate': 0.09251870133288889, 'num_leaves': 20, 'max_depth': 4, 'min_child_samples': 26, 'subsample': 0.764460021334849, 'colsample_bytree': 0.9520707493005077, 'reg_alpha': 0.7891781757919115, 'reg_lambda': 0.26787878888002187}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008542 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:25:42,063] Trial 30 finished with value: 0.5018416203302998 and parameters: {'n_estimators': 711, 'learning_rate': 0.03133825822858312, 'num_leaves': 38, 'max_depth': 7, 'min_child_samples': 45, 'subsample': 0.7198721985534207, 'colsample_bytree': 0.8329335633779769, 'reg_alpha': 0.5622545967039401, 'reg_lambda': 0.6110743213982525}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008105 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:25:55,196] Trial 31 finished with value: 0.5032248384584322 and parameters: {'n_estimators': 1362, 'learning_rate': 0.03886413196802518, 'num_leaves': 37, 'max_depth': 5, 'min_child_samples': 34, 'subsample': 0.645769156095373, 'colsample_bytree': 0.8972174607826336, 'reg_alpha': 0.6074942651029874, 'reg_lambda': 0.5317461992766747}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008644 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:26:11,676] Trial 32 finished with value: 0.5028735629200615 and parameters: {'n_estimators': 1533, 'learning_rate': 0.029547477753116214, 'num_leaves': 26, 'max_depth': 5, 'min_child_samples': 11, 'subsample': 0.6850494537821908, 'colsample_bytree': 0.8179843790399057, 'reg_alpha': 0.48611944383003924, 'reg_lambda': 0.6842303507013117}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008174 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:26:29,278] Trial 33 finished with value: 0.46171248340897997 and parameters: {'n_estimators': 1689, 'learning_rate': 0.1605292015740626, 'num_leaves': 53, 'max_depth': 15, 'min_child_samples': 5, 'subsample': 0.6160754270843559, 'colsample_bytree': 0.8608904848379622, 'reg_alpha': 0.725274351431521, 'reg_lambda': 0.4812427883143795}. Best is trial 15 with value: 0.5078513976356643.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008033 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:26:30,421] Trial 34 finished with value: 0.5101010097962411 and parameters: {'n_estimators': 114, 'learning_rate': 0.04701657414052819, 'num_leaves': 34, 'max_depth': 3, 'min_child_samples': 35, 'subsample': 0.5357042219987502, 'colsample_bytree': 0.7614867677875868, 'reg_alpha': 0.640722749746057, 'reg_lambda': 0.5853505406439469}. Best is trial 34 with value: 0.5101010097962411.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007116 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:26:31,727] Trial 35 finished with value: 0.5056875335600483 and parameters: {'n_estimators': 113, 'learning_rate': 0.05978773433399794, 'num_leaves': 34, 'max_depth': 4, 'min_child_samples': 22, 'subsample': 0.5108405103698451, 'colsample_bytree': 0.6095775983218941, 'reg_alpha': 0.5291132100710242, 'reg_lambda': 0.5896653167608596}. Best is trial 34 with value: 0.5101010097962411.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007048 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:26:35,232] Trial 36 finished with value: 0.5029880475024909 and parameters: {'n_estimators': 168, 'learning_rate': 0.047821742537590986, 'num_leaves': 35, 'max_depth': 4, 'min_child_samples': 37, 'subsample': 0.5254343578039653, 'colsample_bytree': 0.6050478368287728, 'reg_alpha': 0.5116924933030831, 'reg_lambda': 0.5789875072486375}. Best is trial 34 with value: 0.5101010097962411.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007161 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:26:38,792] Trial 37 finished with value: 0.49524853770849625 and parameters: {'n_estimators': 403, 'learning_rate': 0.06249364628940304, 'num_leaves': 52, 'max_depth': 4, 'min_child_samples': 22, 'subsample': 0.5434524681004161, 'colsample_bytree': 0.5590081993347533, 'reg_alpha': 0.8342132181849007, 'reg_lambda': 0.6482101238395853}. Best is trial 34 with value: 0.5101010097962411.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007847 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:26:45,569] Trial 38 finished with value: 0.4942734427867019 and parameters: {'n_estimators': 545, 'learning_rate': 0.02080146820130979, 'num_leaves': 61, 'max_depth': 6, 'min_child_samples': 47, 'subsample': 0.5575347826429915, 'colsample_bytree': 0.7041137068547538, 'reg_alpha': 0.5652252832846181, 'reg_lambda': 0.31699920427297645}. Best is trial 34 with value: 0.5101010097962411.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008411 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:26:47,147] Trial 39 finished with value: 0.5098565006379163 and parameters: {'n_estimators': 111, 'learning_rate': 0.03449315119045977, 'num_leaves': 80, 'max_depth': 5, 'min_child_samples': 32, 'subsample': 0.6038249883933458, 'colsample_bytree': 0.7584894424843497, 'reg_alpha': 0.461637395888005, 'reg_lambda': 0.010001598065759398}. Best is trial 34 with value: 0.5101010097962411.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008300 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:26:49,193] Trial 40 finished with value: 0.5056260816340326 and parameters: {'n_estimators': 142, 'learning_rate': 0.03506875740514752, 'num_leaves': 110, 'max_depth': 5, 'min_child_samples': 30, 'subsample': 0.5043318177869881, 'colsample_bytree': 0.6641831805670549, 'reg_alpha': 0.43692656924196227, 'reg_lambda': 0.19087111705699972}. Best is trial 34 with value: 0.5101010097962411.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007959 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:26:51,327] Trial 41 finished with value: 0.5042205121372125 and parameters: {'n_estimators': 161, 'learning_rate': 0.02936543172348608, 'num_leaves': 113, 'max_depth': 5, 'min_child_samples': 30, 'subsample': 0.5013077256488633, 'colsample_bytree': 0.655432227062452, 'reg_alpha': 0.44007284103475464, 'reg_lambda': 0.0003044847301423379}. Best is trial 34 with value: 0.5101010097962411.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007317 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:26:54,130] Trial 42 finished with value: 0.5087825557285174 and parameters: {'n_estimators': 267, 'learning_rate': 0.024140006277950217, 'num_leaves': 110, 'max_depth': 4, 'min_child_samples': 15, 'subsample': 0.5959978148456097, 'colsample_bytree': 0.5860066310589864, 'reg_alpha': 0.3671253590560535, 'reg_lambda': 0.028880580523861152}. Best is trial 34 with value: 0.5101010097962411.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007154 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:26:57,330] Trial 43 finished with value: 0.5124010952595783 and parameters: {'n_estimators': 299, 'learning_rate': 0.014847005317051118, 'num_leaves': 87, 'max_depth': 4, 'min_child_samples': 17, 'subsample': 0.6028413703548049, 'colsample_bytree': 0.574634376254013, 'reg_alpha': 0.26274869957642943, 'reg_lambda': 0.012416079117458106}. Best is trial 43 with value: 0.5124010952595783.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007081 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:27:00,032] Trial 44 finished with value: 0.5121874752156497 and parameters: {'n_estimators': 289, 'learning_rate': 0.01232633691220827, 'num_leaves': 102, 'max_depth': 3, 'min_child_samples': 15, 'subsample': 0.6007963261390215, 'colsample_bytree': 0.5609693776893239, 'reg_alpha': 0.18208714785676317, 'reg_lambda': 0.0026803198320006416}. Best is trial 43 with value: 0.5124010952595783.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007444 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:27:08,386] Trial 45 finished with value: 0.5040465149911463 and parameters: {'n_estimators': 306, 'learning_rate': 0.012341936124366034, 'num_leaves': 104, 'max_depth': 14, 'min_child_samples': 17, 'subsample': 0.6126177810860227, 'colsample_bytree': 0.566246477155467, 'reg_alpha': 0.2174812393113636, 'reg_lambda': 0.023865564341494498}. Best is trial 43 with value: 0.5124010952595783.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006833 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:27:13,284] Trial 46 finished with value: 0.5062509643602177 and parameters: {'n_estimators': 536, 'learning_rate': 0.014399770805627854, 'num_leaves': 86, 'max_depth': 4, 'min_child_samples': 41, 'subsample': 0.5803515415451639, 'colsample_bytree': 0.5329936348989798, 'reg_alpha': 0.13226122311040564, 'reg_lambda': 0.07827903699173856}. Best is trial 43 with value: 0.5124010952595783.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007970 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:27:17,990] Trial 47 finished with value: 0.5075553557741921 and parameters: {'n_estimators': 301, 'learning_rate': 0.010438180676159217, 'num_leaves': 97, 'max_depth': 6, 'min_child_samples': 33, 'subsample': 0.6295148254490328, 'colsample_bytree': 0.7554561803503443, 'reg_alpha': 0.29011404318738954, 'reg_lambda': 0.0611049107058795}. Best is trial 43 with value: 0.5124010952595783.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007092 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:27:20,141] Trial 48 finished with value: 0.5107023680734549 and parameters: {'n_estimators': 241, 'learning_rate': 0.022861844105512785, 'num_leaves': 123, 'max_depth': 3, 'min_child_samples': 25, 'subsample': 0.5908731536375549, 'colsample_bytree': 0.5619581334884983, 'reg_alpha': 0.34322438021503365, 'reg_lambda': 0.13660756722650136}. Best is trial 43 with value: 0.5124010952595783.


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006843 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


[I 2025-06-30 20:27:24,031] Trial 49 finished with value: 0.5091781549317455 and parameters: {'n_estimators': 491, 'learning_rate': 0.015668100974729877, 'num_leaves': 121, 'max_depth': 3, 'min_child_samples': 25, 'subsample': 0.5545194704004348, 'colsample_bytree': 0.5224406307260109, 'reg_alpha': 0.24085165588195467, 'reg_lambda': 0.12414953616348667}. Best is trial 43 with value: 0.5124010952595783.


Best params: {'n_estimators': 299, 'learning_rate': 0.014847005317051118, 'num_leaves': 87, 'max_depth': 4, 'min_child_samples': 17, 'subsample': 0.6028413703548049, 'colsample_bytree': 0.574634376254013, 'reg_alpha': 0.26274869957642943, 'reg_lambda': 0.012416079117458106}
Best F2 score: 0.5124010952595783


In [7]:
# Train final LightGBM with best params

best_params = study.best_params
best_params['random_state'] = 42
best_params['n_jobs'] = -1

final_lgbm = LGBMClassifier(**best_params)
final_lgbm.fit(X_train_bal, y_train_bal)

# Validation predictions
y_val_prob_final = final_lgbm.predict_proba(X_val_scaled)[:,1]
best_thresh, best_f2 = find_best_threshold_f2(y_val, y_val_prob_final)
print(f"Best threshold: {best_thresh:.4f}, Best F2-score: {best_f2:.4f}")

y_val_pred_final = (y_val_prob_final >= best_thresh).astype(int)
print(classification_report(y_val, y_val_pred_final))


[LightGBM] [Info] Number of positive: 111979, number of negative: 111979
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007643 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2546
[LightGBM] [Info] Number of data points in the train set: 223958, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
Best threshold: 0.4972, Best F2-score: 0.5124
              precision    recall  f1-score   support

           0       0.97      0.87      0.92     27995
           1       0.26      0.67      0.38      2005

    accuracy                           0.85     30000
   macro avg       0.62      0.77      0.65     30000
weighted avg       0.93      0.85      0.88     30000



In [10]:
# Prepare test set and make predictions
df_test_imputed = df_test.copy()
imputer = SimpleImputer(strategy='median')
df_test_imputed[['MonthlyIncome', 'NumberOfDependents']] = imputer.fit_transform(
    df_test_imputed[['MonthlyIncome', 'NumberOfDependents']]
)

# Preprocess test set same as train
X_test = df_test_imputed[features]
X_test_scaled = scaler.transform(X_test)

# Predict probabilities
y_test_prob = final_lgbm.predict_proba(X_test_scaled)[:, 1]

# Predict labels based on best threshold
y_test_pred = (y_test_prob >= best_thresh).astype(int)

# Prepare submission dataframe with only Id and prediction probability
submission = pd.DataFrame({
    'Id': df_submission['Id'],
    'Probability': y_test_prob
})

submission.to_csv('submission.csv', index=False)
print("Submission saved!")

Submission saved!
