In [None]:
import pandas as pd

In [None]:
test = pd.read_csv('test.csv')
train = pd.read_csv('train.csv')


In [None]:
print(" =============== TRAIN ================")
print(train.info())
print(train.isna().sum())
print(" ======================================")
print(" =============== TEST ================")
print(test.info())
print(test.isna().sum())
print(" ======================================")

### Data preprocessing  

In [None]:
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder


In [None]:
train.columns

In [None]:
# Binary categorical features mapping
binary_map = {'yes' : 1, 'no' : 0}
binary_columns = ['default', 'housing', 'loan']

for col in binary_columns:
    train[col] = train[col].map(binary_map)
    test[col] = test[col].map(binary_map)

# Ordinal encoding for date-related features
month_order = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']

ord_enc = OrdinalEncoder(categories=[month_order])
train[['month']] = ord_enc.fit_transform(train[['month']]).astype(int)
test[['month']] = ord_enc.fit_transform(test[['month']]).astype(int)


# One-Hot encode categorical features (excluding already encoded ones)






In [None]:
categorical_cols = ['job', 'marital', 'education', 'contact', 'poutcome']

# Fill missing values in categorical columns
# train[categorical_cols] = train[categorical_cols].fillna("missing")
# test[categorical_cols] = test[categorical_cols].fillna("missing")

# Fit the encoder on train data only
encoder = OneHotEncoder(drop='first', handle_unknown='ignore')
encoder.fit(train[categorical_cols])

# Transform train and test data
encoded_train = encoder.transform(train[categorical_cols])
encoded_test = encoder.transform(test[categorical_cols])

# Convert to DataFrame
encoded_train_df = pd.DataFrame(encoded_train.toarray().astype(int), columns=encoder.get_feature_names_out(categorical_cols))
encoded_test_df = pd.DataFrame(encoded_test.toarray().astype(int), columns=encoder.get_feature_names_out(categorical_cols))

# Concatenate the encoded features with the original data
train = pd.concat([train.reset_index(drop=True), encoded_train_df], axis=1).drop(columns=categorical_cols)
test = pd.concat([test.reset_index(drop=True), encoded_test_df], axis=1).drop(columns=categorical_cols)

In [None]:
# Show the final dataframe structure
print("DataFrame shape after encoding:", train.shape)
print("Sample data:\n", train.head())

In [None]:
train

In [None]:
# interaction features
train['default_housing'] = train['default'] * train['housing']
test['default_housing'] = test['default'] * test['housing']




In [None]:
# Cyclic encoding for month feature
import numpy as np
train['month_sin'] = train['month'].apply(lambda x: np.sin(2 * np.pi * x / 12))
test['month_sin'] = test['month'].apply(lambda x: np.sin(2 * np.pi * x / 12))

train['month_cos'] = train['month'].apply(lambda x: np.cos(2 * np.pi * x / 12))
test['month_cos'] = test['month'].apply(lambda x: np.cos(2 * np.pi * x / 12))


In [None]:
# Log transformation for balance feature

train['balance_log'] = np.log1p(train['balance'])
test['balance_log'] = np.log1p(test['balance'])

In [None]:
train

In [None]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split the data
X = train.drop(columns=['y'])  # Replace 'target' with your target column
y = train['y']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a LightGBM model
lgb_model = lgb.LGBMClassifier(random_state=42)
lgb_model.fit(X_train, y_train)

# Evaluate the model
y_pred = lgb_model.predict(X_val)
print("Accuracy:", accuracy_score(y_val, y_pred))

In [None]:
from sklearn.model_selection import StratifiedKFold, cross_validate
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import make_scorer, accuracy_score, f1_score, roc_auc_score

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

In [None]:

# -------------------- Models --------------------
models = {
    'Logistic Regression': Pipeline([
        ('clf', LogisticRegression(max_iter=1000))
    ]),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    'LightGBM': LGBMClassifier(),
    'CatBoost': CatBoostClassifier(verbose=0)
}

# -------------------- Handle Missing Values --------------------
from sklearn.impute import SimpleImputer

# Replace infinity values with NaN
X.replace([np.inf, -np.inf], np.nan, inplace=True)

# Impute missing values with the mean for numerical columns
imputer = SimpleImputer(strategy='mean')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# -------------------- Cross-Validation --------------------
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = {
    'accuracy': make_scorer(accuracy_score),
    'f1': make_scorer(f1_score),
    'roc_auc': make_scorer(roc_auc_score)
}

# -------------------- Evaluation --------------------
results = {}
for name, model in models.items():
    print(f"Evaluating: {name}")
    scores = cross_validate(model, X, y, cv=cv, scoring=scoring)
    results[name] = {
        'Accuracy': np.mean(scores['test_accuracy']),
        'F1 Score': np.mean(scores['test_f1']),
        'ROC AUC': np.mean(scores['test_roc_auc'])
    }

# -------------------- Display Results --------------------
results_df = pd.DataFrame(results).T.sort_values(by="ROC AUC", ascending=False)
print("\nModel Performance:\n")
print(results_df)


In [24]:
final_model = models['CatBoost']  # or whichever model you trained
final_model.fit(X, y)

<catboost.core.CatBoostClassifier at 0x34a691310>

In [26]:
test_preds = final_model.predict(test)

In [27]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import StackingClassifier

# Define hyperparameter grid for LightGBM
param_grid = {
    'learning_rate': [0.01, 0.05, 0.1],
    'n_estimators': [100, 200, 500],
    'max_depth': [3, 5, 7],
    'num_leaves': [15, 31, 63],
    'min_child_samples': [10, 20, 30]
}

# Randomized search for LightGBM
lgbm = LGBMClassifier()
random_search = RandomizedSearchCV(
    estimator=lgbm,
    param_distributions=param_grid,
    n_iter=50,
    scoring='roc_auc',
    cv=5,
    verbose=1,
    n_jobs=-1,
    random_state=42
)

random_search.fit(X, y)
print("Best Parameters for LightGBM:", random_search.best_params_)

# Stacking Classifier
stacking_clf = StackingClassifier(
    estimators=[
        ('lr', LogisticRegression(max_iter=1000)),
        ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='logloss')),
        ('lgbm', LGBMClassifier(**random_search.best_params_)),
        ('cat', CatBoostClassifier(verbose=0))
    ],
    final_estimator=LogisticRegression()
)

# Cross-validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_validate(stacking_clf, X, y, cv=cv, scoring=scoring)

# Display results
print("Stacking Classifier Performance:")
print("Accuracy:", np.mean(scores['test_accuracy']))
print("F1 Score:", np.mean(scores['test_f1']))
print("ROC AUC:", np.mean(scores['test_roc_auc']))

Fitting 5 folds for each of 50 candidates, totalling 250 fits
[LightGBM] [Info] Number of positive: 72391, number of negative: 527609
[LightGBM] [Info] Number of positive: 72391, number of negative: 527609
[LightGBM] [Info] Number of positive: 72390, number of negative: 527610
[LightGBM] [Info] Number of positive: 72391, number of negative: 527609
[LightGBM] [Info] Number of positive: 72391, number of negative: 527609
[LightGBM] [Info] Number of positive: 72390, number of negative: 527610
[LightGBM] [Info] Number of positive: 72390, number of negative: 527610
[LightGBM] [Info] Number of positive: 72390, number of negative: 527610
[LightGBM] [Info] Number of positive: 72390, number of negative: 527610
[LightGBM] [Info] Number of positive: 72390, number of negative: 527610
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.083953 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 72391, number of negative: 527609
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013660 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1545
[LightGBM] [Info] Number of data points in the train set: 600000, number of used features: 37
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.120652 -> initscore=-1.986273
[LightGBM] [Info] Start training from score -1.986273


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[LightGBM] [Info] Number of positive: 57913, number of negative: 422087
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009961 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1546
[LightGBM] [Info] Number of data points in the train set: 480000, number of used features: 37
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.120652 -> initscore=-1.986270
[LightGBM] [Info] Start training from score -1.986270
[LightGBM] [Info] Number of positive: 57913, number of negative: 422087
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009491 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1546
[LightGBM] [Info] Number of data points in the train set: 480000, number of used features: 37
[LightGBM] [In

Exception ignored in: <function ResourceTracker.__del__ at 0x1028e3f60>
Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.13/3.13.3_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/opt/homebrew/Cellar/python@3.13/3.13.3_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/opt/homebrew/Cellar/python@3.13/3.13.3_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x10486ff60>
Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.13/3.13.3_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/opt/homebrew/Cellar/python@3.13/3.13.3_1/Frameworks/Python.frame

[LightGBM] [Info] Number of positive: 72391, number of negative: 527609
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011506 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1545
[LightGBM] [Info] Number of data points in the train set: 600000, number of used features: 37
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.120652 -> initscore=-1.986273
[LightGBM] [Info] Start training from score -1.986273


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[LightGBM] [Info] Number of positive: 57913, number of negative: 422087
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009171 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1545
[LightGBM] [Info] Number of data points in the train set: 480000, number of used features: 37
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.120652 -> initscore=-1.986270
[LightGBM] [Info] Start training from score -1.986270
[LightGBM] [Info] Number of positive: 57913, number of negative: 422087
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009258 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1546
[LightGBM] [Info] Number of data points in the train set: 480000, number of used features: 37
[LightGBM] [In

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 72390, number of negative: 527610
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013120 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1544
[LightGBM] [Info] Number of data points in the train set: 600000, number of used features: 37
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.120650 -> initscore=-1.986289
[LightGBM] [Info] Start training from score -1.986289


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[LightGBM] [Info] Number of positive: 57912, number of negative: 422088
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009992 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1546
[LightGBM] [Info] Number of data points in the train set: 480000, number of used features: 37
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.120650 -> initscore=-1.986289
[LightGBM] [Info] Start training from score -1.986289
[LightGBM] [Info] Number of positive: 57912, number of negative: 422088
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009758 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1546
[LightGBM] [Info] Number of data points in the train set: 480000, number of used features: 37
[LightGBM] [In

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 72390, number of negative: 527610
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.021476 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1546
[LightGBM] [Info] Number of data points in the train set: 600000, number of used features: 37
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.120650 -> initscore=-1.986289
[LightGBM] [Info] Start training from score -1.986289


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[LightGBM] [Info] Number of positive: 57912, number of negative: 422088
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009259 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1546
[LightGBM] [Info] Number of data points in the train set: 480000, number of used features: 37
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.120650 -> initscore=-1.986289
[LightGBM] [Info] Start training from score -1.986289
[LightGBM] [Info] Number of positive: 57912, number of negative: 422088
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010106 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1546
[LightGBM] [Info] Number of data points in the train set: 480000, number of used features: 37
[LightGBM] [In

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[LightGBM] [Info] Number of positive: 72390, number of negative: 527610
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012905 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1547
[LightGBM] [Info] Number of data points in the train set: 600000, number of used features: 37
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.120650 -> initscore=-1.986289
[LightGBM] [Info] Start training from score -1.986289


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[LightGBM] [Info] Number of positive: 57912, number of negative: 422088
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009216 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1547
[LightGBM] [Info] Number of data points in the train set: 480000, number of used features: 37
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.120650 -> initscore=-1.986289
[LightGBM] [Info] Start training from score -1.986289
[LightGBM] [Info] Number of positive: 57912, number of negative: 422088
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009520 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1547
[LightGBM] [Info] Number of data points in the train set: 480000, number of used features: 37
[LightGBM] [In

In [28]:
scores

{'fit_time': array([346.80215406, 344.54678607, 342.04367781, 340.56609797,
        343.19367719]),
 'score_time': array([0.5430181 , 0.54706979, 0.59583592, 0.54646611, 0.56369567]),
 'test_accuracy': array([0.93569333, 0.93530667, 0.93523333, 0.93596667, 0.93447333]),
 'test_f1': array([0.71026072, 0.70808014, 0.70995671, 0.71241654, 0.70221468]),
 'test_roc_auc': array([0.81387423, 0.81236711, 0.81519538, 0.81577921, 0.8075884 ])}

In [None]:
%pip install CatBoost