In [27]:
# Import libs
import os
import pandas as pd

# Pipeline lib
from sklearn.pipeline import Pipeline

# feature-engine libs
from feature_engine.encoding import OneHotEncoder
from feature_engine.imputation import CategoricalImputer
from feature_engine.imputation import ArbitraryNumberImputer

# machine learning models
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.utils.fixes import loguniform
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

# trainning models
from sklearn.model_selection import GridSearchCV

In [6]:
# Read the dataset
df_abt = pd.read_csv('/Users/dellacorte/py-projects/data-science/supervised-learning-pipeline-reference/databases/propensao_revenda_abt.csv')

# pega a base de treinamento
df_train = df_abt.query('data_ref_safra < "2018-03-01"')

# pega a base de avaliação (out of time)
df_oot   = df_abt.query('data_ref_safra == "2018-03-01"')

key_vars = ['data_ref_safra', 'seller_id']
num_vars = ['tot_orders_12m', 'tot_items_12m', 'tot_items_dist_12m', 'receita_12m', 'recencia']
cat_vars = ['uf']
target = 'nao_revendeu_next_6m'

features = cat_vars + num_vars

# dados de treinamento
X_train = df_train[features]
y_train = df_train[target]

# dados de avaliação (out of time)
X_oot = df_oot[features]
y_oot = df_oot[target]

In [7]:
# Install libs
!pip install feature-engine



### Decision Tree

In [8]:
dt = Pipeline(steps=[
    ('numeric_imputer', ArbitraryNumberImputer(variables=num_vars, arbitrary_number=-999)),
    ('categoric_imputer', CategoricalImputer(variables=cat_vars, return_object=True)),
    ('one_hot_encoder', OneHotEncoder(variables=cat_vars)),
    ('Decision_Tree', DecisionTreeClassifier())
])

## Grid Search

In [10]:
# We create a dictionary that contains the hyperparameters we want to train the model
parameters = {
  'Decision_Tree__max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
}

# We create a GridSearch passing the model, the parameters, the metric we want to optimize. 
grid_search = GridSearchCV(dt, parameters, scoring='roc_auc', cv=5, n_jobs=-1)

# Perform training with data
grid_search.fit(X_train, y_train)

In [11]:
# creating a table with grid_search results
results = pd.DataFrame(grid_search.cv_results_)

# ranking these results
results.sort_values(by='rank_test_score')

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_Decision_Tree__max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
3,0.029004,0.006693,0.014339,0.003482,4,{'Decision_Tree__max_depth': 4},0.89905,0.870713,0.900654,0.90516,0.898453,0.894806,0.012273,1
4,0.035514,0.007182,0.016402,0.005012,5,{'Decision_Tree__max_depth': 5},0.899596,0.872724,0.896551,0.900762,0.892545,0.892436,0.010258,2
2,0.026001,0.005314,0.014595,0.002052,3,{'Decision_Tree__max_depth': 3},0.892092,0.869464,0.892527,0.902151,0.895258,0.890299,0.011021,3
5,0.038351,0.007797,0.016844,0.007773,6,{'Decision_Tree__max_depth': 6},0.886496,0.856607,0.881366,0.882799,0.888481,0.87915,0.011554,4
1,0.032948,0.004919,0.022842,0.009206,2,{'Decision_Tree__max_depth': 2},0.878607,0.848479,0.880941,0.885121,0.881562,0.874942,0.013395,5
6,0.033544,0.004916,0.016663,0.002971,7,{'Decision_Tree__max_depth': 7},0.874233,0.852324,0.871191,0.870362,0.877472,0.869116,0.008762,6
7,0.039492,0.005693,0.017491,0.002563,8,{'Decision_Tree__max_depth': 8},0.867362,0.834495,0.851847,0.863811,0.859018,0.855307,0.011632,7
8,0.034166,0.004295,0.017581,0.001564,9,{'Decision_Tree__max_depth': 9},0.850434,0.793734,0.838016,0.857425,0.836544,0.835231,0.022159,8
9,0.034675,0.008469,0.012781,0.00431,10,{'Decision_Tree__max_depth': 10},0.829514,0.793448,0.830914,0.848248,0.830297,0.826484,0.017935,9
0,0.032179,0.005492,0.019961,0.006703,1,{'Decision_Tree__max_depth': 1},0.805868,0.791368,0.817157,0.824619,0.812373,0.810277,0.011264,10


### Random Forest

In [13]:
rf = Pipeline(steps=[
    ('numeric_imputer', ArbitraryNumberImputer(variables=num_vars, arbitrary_number=-999)),
    ('categoric_imputer', CategoricalImputer(variables=cat_vars, return_object=True)),
    ('one_hot_encoder', OneHotEncoder(variables=cat_vars)),
    ('Random_Forest', RandomForestClassifier(n_jobs=-1))
])

In [14]:
parameters = {'Random_Forest__max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
              'Random_Forest__n_estimators': [100, 300, 500]}

grid_search = GridSearchCV(rf, parameters, scoring='roc_auc', cv=5, n_jobs=-1)

In [15]:
grid_search.fit(X_train, y_train)

In [16]:
results = pd.DataFrame(grid_search.cv_results_)
results.sort_values(by='rank_test_score').head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_Random_Forest__max_depth,param_Random_Forest__n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
29,1.328188,0.187782,0.107286,0.011068,10,500,"{'Random_Forest__max_depth': 10, 'Random_Fores...",0.924998,0.904278,0.91501,0.926852,0.921426,0.918513,0.008185,1
28,1.208296,0.123065,0.09977,0.028955,10,300,"{'Random_Forest__max_depth': 10, 'Random_Fores...",0.924123,0.903428,0.915392,0.925541,0.921365,0.91797,0.00806,2
27,0.573152,0.180956,0.160765,0.099973,10,100,"{'Random_Forest__max_depth': 10, 'Random_Fores...",0.923698,0.902743,0.914532,0.923987,0.921261,0.917244,0.008011,3
25,1.09284,0.116075,0.100873,0.012614,9,300,"{'Random_Forest__max_depth': 9, 'Random_Forest...",0.921452,0.901269,0.913013,0.922312,0.918552,0.91532,0.007742,4
26,2.049349,0.087387,0.149787,0.060875,9,500,"{'Random_Forest__max_depth': 9, 'Random_Forest...",0.920525,0.89983,0.913369,0.923301,0.918734,0.915152,0.008319,5


In [17]:
grid_search.best_estimator_.steps[-1][1].get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': 10,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 500,
 'n_jobs': -1,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

### LightGBM

In [19]:
lgbm = Pipeline(steps=[
    ('numeric_imputer', ArbitraryNumberImputer(variables=num_vars, arbitrary_number=-999)),
    ('categoric_imputer', CategoricalImputer(variables=cat_vars, return_object=True)),
    ('one_hot_encoder', OneHotEncoder(variables=cat_vars)),
    ('LGBM', LGBMClassifier(n_jobs=-1))
])

In [21]:
parameters = {'LGBM__learning_rate': [0.001, 0.01], 
              'LGBM__num_leaves': [2, 128],
              'LGBM__min_child_samples': [1, 100],
              'LGBM__subsample': [0.05, 1.0],
              'LGBM__colsample_bytree': [0.1, 1.0]}

random_search = RandomizedSearchCV(lgbm, parameters, scoring='roc_auc', cv=5, n_iter=5, n_jobs=-1)

random_search.fit(X_train, y_train)

[LightGBM] [Info] Number of positive: 1332, number of negative: 2163
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000389 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 904
[LightGBM] [Info] Number of data points in the train set: 3495, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381116 -> initscore=-0.484815
[LightGBM] [Info] Start training from score -0.484815


In [22]:
results = pd.DataFrame(random_search.cv_results_)
results.sort_values(by='rank_test_score')

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_LGBM__subsample,param_LGBM__num_leaves,param_LGBM__min_child_samples,param_LGBM__learning_rate,param_LGBM__colsample_bytree,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
1,2.275078,0.186969,0.014885,0.001412,0.05,128,100,0.001,1.0,"{'LGBM__subsample': 0.05, 'LGBM__num_leaves': ...",0.90811,0.881611,0.903015,0.913299,0.904791,0.902165,0.010857,1
3,0.282394,0.014628,0.012533,0.003513,0.05,2,100,0.01,1.0,"{'LGBM__subsample': 0.05, 'LGBM__num_leaves': ...",0.888213,0.852645,0.883984,0.886793,0.878601,0.878047,0.01312,2
2,4.521753,0.752578,0.020838,0.004568,1.0,128,1,0.01,0.1,"{'LGBM__subsample': 1.0, 'LGBM__num_leaves': 1...",0.86223,0.855146,0.874386,0.885247,0.863099,0.868022,0.01059,3
0,0.408562,0.047358,0.013363,0.000843,1.0,2,100,0.01,0.1,"{'LGBM__subsample': 1.0, 'LGBM__num_leaves': 2...",0.850322,0.847465,0.866928,0.828075,0.857573,0.850072,0.012893,4
4,0.292514,0.01946,0.012324,0.002648,0.05,2,1,0.001,1.0,"{'LGBM__subsample': 0.05, 'LGBM__num_leaves': ...",0.830533,0.815877,0.870201,0.842861,0.848574,0.841609,0.018179,5


In [24]:
parameters = {'LGBM__learning_rate': loguniform(1e-3, 1e-1), 
              'LGBM__num_leaves': [2, 128],
              'LGBM__min_child_samples': [1, 100],
              'LGBM__subsample': [0.05, 1.0],
              'LGBM__colsample_bytree': [0.1, 1.0]}

random_search = RandomizedSearchCV(lgbm, parameters, scoring='roc_auc', cv=5, n_iter=30, n_jobs=-1)

In [25]:
random_search.fit(X_train, y_train)

[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012386 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 870
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002260 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 854
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008735 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 862
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001179 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 858
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM]

[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002161 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 854
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000973 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 862
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724


[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001339 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 870
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001106 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 858
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208


[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000893 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 865
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001005 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 854
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208


[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001520 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 870
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001340 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 890
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 26
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208


[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000835 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 899
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001006 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 896
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001956 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 865
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002134 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 896
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001434 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 899
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000737 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 884
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 26
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001082 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 858
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001226 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 884
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 26
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011992 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 858
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003351 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 899
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012283 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 865
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000840 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 904
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001404 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 890
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 26
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003631 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 865
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM]

[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005193 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 899
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003054 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 904
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001006 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 890
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 26
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001271 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 884
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 26
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012009 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 854
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001635 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 890
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 26
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001486 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 862
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001582 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 854
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208


[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001397 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 899
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 884
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 26
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001481 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 870
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001154 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 884
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 26
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001404 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 896
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000639 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 904
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM]

[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001409 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 899
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001214 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 890
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 26
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001310 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 865
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001776 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 858
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208


[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003798 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 884
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 26
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001147 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 896
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001883 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 862
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002279 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 854
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208


[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001194 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 896
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001246 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 904
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002754 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 899
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001891 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 896
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM]

[LightGBM] [Info] Number of positive: 1332, number of negative: 2163
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001039 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 936
[LightGBM] [Info] Number of data points in the train set: 3495, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381116 -> initscore=-0.484815
[LightGBM] [Info] Start training from score -0.484815


In [26]:
results = pd.DataFrame(random_search.cv_results_)
results.sort_values(by='rank_test_score')

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_LGBM__colsample_bytree,param_LGBM__learning_rate,param_LGBM__min_child_samples,param_LGBM__num_leaves,param_LGBM__subsample,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
17,15.734459,0.430628,0.026855,0.005064,1.0,0.078729,1,128,1.0,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.918288,0.903428,0.910486,0.923193,0.924126,0.915904,0.007892,1
25,2.538705,0.225109,0.029811,0.011036,1.0,0.029203,100,128,1.0,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.910919,0.888347,0.904569,0.918157,0.906532,0.905705,0.009854,2
10,2.410623,0.285155,0.014282,0.002733,1.0,0.016296,100,128,0.05,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.911504,0.888178,0.90404,0.915982,0.907508,0.905442,0.009509,3
26,2.825277,0.151727,0.034196,0.010221,1.0,0.012233,100,128,0.05,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.910706,0.888321,0.904101,0.915513,0.906836,0.905095,0.009224,4
23,2.529013,0.101112,0.021544,0.006674,1.0,0.005702,100,128,1.0,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.911239,0.885113,0.902256,0.915661,0.907105,0.904275,0.010558,5
5,1.943477,0.058058,0.013693,0.002311,1.0,0.002418,100,128,0.05,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.909887,0.882889,0.902286,0.913964,0.904179,0.902641,0.010708,6
27,2.817147,0.056163,0.032793,0.009072,1.0,0.00118,100,128,0.05,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.909475,0.881888,0.902981,0.913299,0.904791,0.902487,0.010914,7
2,0.415112,0.119209,0.013838,0.001773,1.0,0.023604,100,2,0.05,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.906692,0.881697,0.902056,0.913299,0.903449,0.901439,0.010606,8
13,15.183411,2.252642,0.024188,0.004494,1.0,0.021064,1,128,1.0,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.895903,0.883189,0.903923,0.914011,0.910022,0.901409,0.010969,9
1,15.51327,1.629802,0.025451,0.005629,1.0,0.01986,1,128,1.0,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.893601,0.883756,0.902872,0.908885,0.910165,0.899856,0.009952,10


### XGBoosting

In [28]:
xgb_pipeline = Pipeline(steps=[
    ('numeric_imputer', ArbitraryNumberImputer(variables=num_vars, arbitrary_number=-999)),  # Impute numeric variables
    ('categoric_imputer', CategoricalImputer(variables=cat_vars, return_object=True)),      # Impute categorical variables
    ('one_hot_encoder', OneHotEncoder(variables=cat_vars)),                                 # One-hot encode categorical variables
    ('XGB', XGBClassifier(n_jobs=-1))                                                      # XGBoost classifier
])

In [29]:
# Define the parameter grid for the XGBClassifier
parameters = {
    'XGB__learning_rate': [0.001, 0.01, 0.1],     # Learning rate
    'XGB__max_depth': [3, 6, 10],                 # Maximum depth of trees
    'XGB__min_child_weight': [1, 5, 10],          # Minimum child weight
    'XGB__subsample': [0.5, 0.8, 1.0],            # Subsample ratio
    'XGB__colsample_bytree': [0.5, 0.8, 1.0],     # Feature subsample ratio
    'XGB__n_estimators': [50, 100, 200]           # Number of estimators (trees)
}

# Define the RandomizedSearchCV
random_search_xgb = RandomizedSearchCV(
    xgb_pipeline,
    parameters,
    scoring='roc_auc',  # Use ROC AUC as the scoring metric
    cv=5,               # 5-fold cross-validation
    n_iter=10,          # Number of parameter settings sampled
    n_jobs=-1           # Use all available cores
)

# Fit the random search to the training data
random_search_xgb.fit(X_train, y_train)

In [30]:
results = pd.DataFrame(random_search.cv_results_)
results.sort_values(by='rank_test_score')

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_LGBM__colsample_bytree,param_LGBM__learning_rate,param_LGBM__min_child_samples,param_LGBM__num_leaves,param_LGBM__subsample,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
17,15.734459,0.430628,0.026855,0.005064,1.0,0.078729,1,128,1.0,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.918288,0.903428,0.910486,0.923193,0.924126,0.915904,0.007892,1
25,2.538705,0.225109,0.029811,0.011036,1.0,0.029203,100,128,1.0,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.910919,0.888347,0.904569,0.918157,0.906532,0.905705,0.009854,2
10,2.410623,0.285155,0.014282,0.002733,1.0,0.016296,100,128,0.05,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.911504,0.888178,0.90404,0.915982,0.907508,0.905442,0.009509,3
26,2.825277,0.151727,0.034196,0.010221,1.0,0.012233,100,128,0.05,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.910706,0.888321,0.904101,0.915513,0.906836,0.905095,0.009224,4
23,2.529013,0.101112,0.021544,0.006674,1.0,0.005702,100,128,1.0,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.911239,0.885113,0.902256,0.915661,0.907105,0.904275,0.010558,5
5,1.943477,0.058058,0.013693,0.002311,1.0,0.002418,100,128,0.05,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.909887,0.882889,0.902286,0.913964,0.904179,0.902641,0.010708,6
27,2.817147,0.056163,0.032793,0.009072,1.0,0.00118,100,128,0.05,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.909475,0.881888,0.902981,0.913299,0.904791,0.902487,0.010914,7
2,0.415112,0.119209,0.013838,0.001773,1.0,0.023604,100,2,0.05,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.906692,0.881697,0.902056,0.913299,0.903449,0.901439,0.010606,8
13,15.183411,2.252642,0.024188,0.004494,1.0,0.021064,1,128,1.0,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.895903,0.883189,0.903923,0.914011,0.910022,0.901409,0.010969,9
1,15.51327,1.629802,0.025451,0.005629,1.0,0.01986,1,128,1.0,"{'LGBM__colsample_bytree': 1.0, 'LGBM__learnin...",0.893601,0.883756,0.902872,0.908885,0.910165,0.899856,0.009952,10


[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001543 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 858
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001190 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 862
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM]

[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001264 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 870
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001766 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 854
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000975 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 890
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 26
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001769 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 870
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001486 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 865
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001317 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 865
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001860 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 884
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 26
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001355 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 862
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Nu

[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001033 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 862
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001904 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 870
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208


[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002637 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 904
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001855 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 854
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 11
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208


[LightGBM] [Info] Number of positive: 1065, number of negative: 1731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002006 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 896
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380901 -> initscore=-0.485724
[LightGBM] [Info] Start training from score -0.485724
[LightGBM] [Info] Number of positive: 1066, number of negative: 1730
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001063 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 858
[LightGBM] [Info] Number of data points in the train set: 2796, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.381259 -> initscore=-0.484208
[LightGBM] [Info] Start training from score -0.484208
