In [1]:
import os
import gc
import numpy as np
import pandas as pd
import joblib
from datetime import datetime

from lightgbm import LGBMClassifier
import optuna
from prunedcv import PrunedCV

from codes.utils import import_data, cross_val_score_auc, reduce_mem_usage, fix_dtypes
from codes.fe_browser import latest
from codes.fe_emails import proton, mappings
from codes.fe_cards import stats
from codes.fe_date import dates
from codes.fe_relatives import divisions, divisions_float
from codes.fe_categorical import pairs, wtf, cat_limit, encode_cat
from codes.prepro import prepro
from codes.fe_users import users_stats

from sklearn.feature_selection import SelectFromModel
from sklearn.pipeline import make_pipeline
from sklearn.base import TransformerMixin

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [2]:
DATA_PATH = '../input/'
SEARCH_PARAMS = True
N_FOLD = 5
BOOSTING = 'goss'
RANDOM_STATE = 43

In [3]:
X_train, X_test, sample_submission = import_data(DATA_PATH)

### Some Feature Engineering

drop columns, count encoding, aggregation, fillna

In [4]:
if os.path.isfile('features_train.pkl'):
    X_train = joblib.load('features_train.pkl')
    X_test = joblib.load('features_test.pkl')
    y_train = joblib.load('y_train.pkl')
    
else:

    print('fix_dtypes')
    X_train, X_test = fix_dtypes(X_train, X_test)
    print('users_stats')
    X_train, X_test = users_stats(X_train, X_test)
    print('latest')
    X_train, X_test = latest(X_train, X_test)
    print('proton')
    X_train, X_test = proton(X_train, X_test)
    print('nulls1')
    X_train['nulls1'] = X_train.isna().sum(axis=1)
    X_test['nulls1'] = X_test.isna().sum(axis=1)
    print('mappings')
    X_train, X_test = mappings(X_train, X_test)
    print('stats')
    X_train, X_test = stats(X_train, X_test)
    print('divisions')
    X_train, X_test = divisions(X_train, X_test)
    print('dates')
    X_train, X_test = dates(X_train, X_test)
    print('pairs')
    X_train, X_test = pairs(X_train, X_test)
    print('encode_cat')
    X_train, X_test = encode_cat(X_train, X_test)
    print('wtf')
    # X_train, X_test = wtf(X_train, X_test)
    print('y')
    y_train = X_train['isFraud'].copy()
    X_train = X_train.drop('isFraud', axis=1)
    print('divisions_float')
    X_train, X_test = divisions_float(X_train, X_test)
    print('prepro')
    X_train, X_test = prepro(X_train, X_test)
    print('reduce_mem_usage')
    # X_train = reduce_mem_usage(X_train)
    # X_test = reduce_mem_usage(X_test)
    print('np.inf')
    X_train[X_train == np.inf] = -1
    X_train[X_train == -np.inf] = -1
    X_train[X_train.isna()] = -1
    X_test[X_test == np.inf] = -1
    X_test[X_test == -np.inf] = -1
    X_test[X_test.isna()] = -1
    print('TransactionDT')
    X_test.drop(['TransactionDT'], axis=1, inplace=True)
    X_train.drop(['TransactionDT'], axis=1, inplace=True)
    
    joblib.dump(X_train, 'features_train.pkl')
    joblib.dump(X_test, 'features_test.pkl')
    joblib.dump(y_train, 'y_train.pkl')

fix_dtypes
users_stats


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  all_data = pd.concat([train, test])


latest
proton
nulls1
mappings
stats
divisions
dates
pairs
encode_cat
wtf
y
divisions_float
774
prepro
reduce_mem_usage
np.inf
TransactionDT


In [None]:
train_new_feats = joblib.load('train_feats.pkl')
test_new_feats = joblib.load('test_feats.pkl')

for col in train_new_feats.select_dtypes('category').columns:
    train_new_feats.loc[:, col] = train_new_feats.loc[:, col].astype('int')
    test_new_feats.loc[:, col] = test_new_feats.loc[:, col].astype('int')

print('np.inf')
train_new_feats[train_new_feats == np.inf] = -1
train_new_feats[train_new_feats == -np.inf] = -1
train_new_feats[train_new_feats.isna()] = -1
test_new_feats[test_new_feats == np.inf] = -1
test_new_feats[test_new_feats == -np.inf] = -1
test_new_feats[test_new_feats.isna()] = -1

print(train_new_feats.shape[1])
print(X_train.shape[1])
X_train = pd.concat([X_train, train_new_feats], axis=1)
print(X_train.shape[1])
X_test = pd.concat([X_test, test_new_feats], axis=1)
del train_new_feats, test_new_feats

### Model and training

In [8]:
sel_mod = LGBMClassifier(metric='auc', boosting_type=BOOSTING)
sfm = SelectFromModel(sel_mod, threshold=0.5)
print(X_train.shape[1])
sfm.fit(X_train, y_train)
columns = list(X_train.columns[sfm.get_support()])
print(len(columns))
X_train = X_train.loc[:,columns]
X_test = X_test.loc[:,columns]

1264
858


In [None]:
# 854
# 481

In [9]:
class Counter(TransformerMixin):
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        print(X.shape[1])
        return X

In [10]:
sel_mod = LGBMClassifier(metric='auc', n_estimators=200, boosting_type=BOOSTING)
sel_mod.set_params(**pars)
model = make_pipeline(
    SelectFromModel(sel_mod),
    Counter(),
    LGBMClassifier(metric='auc',
                   boosting_type=BOOSTING,
                   n_estimators=2000)
)

In [11]:
prun = PrunedCV(N_FOLD, 0.02, minimize=False)

In [12]:
def objective(trial):
    
    joblib.dump(study, 'study_{}.pkl'.format(BOOSTING)) 

    
    params = {
        'selectfrommodel__threshold': trial.suggest_int('selectfrommodel__threshold', 1, 200),
        'lgbmclassifier__num_leaves': trial.suggest_int('lgbmclassifier__num_leaves', 10, 1500), 
        'lgbmclassifier__subsample_for_bin': trial.suggest_int('lgbmclassifier__subsample_for_bin', 1000, 5000000), 
        'lgbmclassifier__min_child_samples': trial.suggest_int('lgbmclassifier__min_child_samples', 200, 100000), 
        'lgbmclassifier__reg_alpha': trial.suggest_loguniform('lgbmclassifier__reg_alpha', 0.00000000001, 10.0),
        'lgbmclassifier__colsample_bytree': trial.suggest_loguniform('lgbmclassifier__colsample_bytree', 0.0001, 1.0),
        'lgbmclassifier__learning_rate': trial.suggest_loguniform('lgbmclassifier__learning_rate', 0.00001, 2.0)
    }
    
    print(params)
    
    model.set_params(**params)
    return prun.cross_val_score(model, 
                                X_train, 
                                y_train, 
                                metric='auc', 
                                shuffle=True, 
                                random_state=RANDOM_STATE)

In [None]:
if SEARCH_PARAMS:
    if os.path.isfile('study_{}.pkl'.format(BOOSTING)):
        study = joblib.load('study_{}.pkl'.format(BOOSTING))
    else:
        study = optuna.create_study()

    study.optimize(objective, timeout=60 * 60 * 22)
    joblib.dump(study, 'study_{}.pkl'.format(BOOSTING))
    best_params = study.best_params

else:

    best_params = {
        'selectfrommodel__threshold': 20,
        'lgbmclassifier__num_leaves': 330,
        'lgbmclassifier__subsample_for_bin': 2077193,
        'lgbmclassifier__min_child_samples': 2227,
        'lgbmclassifier__reg_alpha': 0.16758905622425835,
        'lgbmclassifier__colsample_bytree': 0.49030006727392056,
        'lgbmclassifier__learning_rate': 0.07916040470631734
    }

{'selectfrommodel__threshold': 170, 'lgbmclassifier__num_leaves': 339, 'lgbmclassifier__subsample_for_bin': 1186152, 'lgbmclassifier__min_child_samples': 72164, 'lgbmclassifier__reg_alpha': 1.2579198177394756e-09, 'lgbmclassifier__colsample_bytree': 0.002257234008559371, 'lgbmclassifier__learning_rate': 0.1985648810166687}
37
37
39
39
39
39
38
38
40
40
36
36
40
40
39
39


[I 2019-09-13 15:04:38,489] Finished trial#0 resulted in value: -0.8163123545125364. Current best value is -0.8163123545125364 with parameters: {'selectfrommodel__threshold': 170, 'lgbmclassifier__num_leaves': 339, 'lgbmclassifier__subsample_for_bin': 1186152, 'lgbmclassifier__min_child_samples': 72164, 'lgbmclassifier__reg_alpha': 1.2579198177394756e-09, 'lgbmclassifier__colsample_bytree': 0.002257234008559371, 'lgbmclassifier__learning_rate': 0.1985648810166687}.


{'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}
126
126
125
125
124
124
126
126
127
127
128
128
129
129
127
127


[I 2019-09-13 15:34:46,783] Finished trial#1 resulted in value: -0.8996888834888637. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 165, 'lgbmclassifier__num_leaves': 508, 'lgbmclassifier__subsample_for_bin': 1391520, 'lgbmclassifier__min_child_samples': 20661, 'lgbmclassifier__reg_alpha': 3.8077553579075474, 'lgbmclassifier__colsample_bytree': 0.7443367963866814, 'lgbmclassifier__learning_rate': 0.00022456711080226036}
41
41
40
40


[I 2019-09-13 15:43:01,857] Finished trial#2 resulted in value: -0.7853495131931535. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 169, 'lgbmclassifier__num_leaves': 666, 'lgbmclassifier__subsample_for_bin': 4202330, 'lgbmclassifier__min_child_samples': 73164, 'lgbmclassifier__reg_alpha': 0.00497278342443507, 'lgbmclassifier__colsample_bytree': 0.014191635011074177, 'lgbmclassifier__learning_rate': 0.28752182745571353}
38
38
39
39


[I 2019-09-13 15:53:40,483] Finished trial#3 resulted in value: -0.8185254504779146. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 7, 'lgbmclassifier__num_leaves': 216, 'lgbmclassifier__subsample_for_bin': 235905, 'lgbmclassifier__min_child_samples': 5887, 'lgbmclassifier__reg_alpha': 0.07031505525916246, 'lgbmclassifier__colsample_bytree': 0.00014219632204050137, 'lgbmclassifier__learning_rate': 0.00012248659336996215}
595
595
588
588


[I 2019-09-13 16:03:22,162] Finished trial#4 resulted in value: -0.7996339686325297. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 42, 'lgbmclassifier__num_leaves': 946, 'lgbmclassifier__subsample_for_bin': 3651446, 'lgbmclassifier__min_child_samples': 25190, 'lgbmclassifier__reg_alpha': 0.0003168838086725943, 'lgbmclassifier__colsample_bytree': 0.5016271580814694, 'lgbmclassifier__learning_rate': 2.6392844128253646e-05}
228
228
227
227


[I 2019-09-13 16:13:47,844] Finished trial#5 resulted in value: -0.840667961346465. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 134, 'lgbmclassifier__num_leaves': 135, 'lgbmclassifier__subsample_for_bin': 1679304, 'lgbmclassifier__min_child_samples': 44534, 'lgbmclassifier__reg_alpha': 2.9218529318490984e-09, 'lgbmclassifier__colsample_bytree': 0.5860293894513599, 'lgbmclassifier__learning_rate': 0.002098024030311807}
62
62
62
62


[I 2019-09-13 16:21:16,895] Finished trial#6 resulted in value: -0.8196477081727529. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 15, 'lgbmclassifier__num_leaves': 1412, 'lgbmclassifier__subsample_for_bin': 3215903, 'lgbmclassifier__min_child_samples': 4971, 'lgbmclassifier__reg_alpha': 7.960775965390245e-06, 'lgbmclassifier__colsample_bytree': 0.05726015646426102, 'lgbmclassifier__learning_rate': 0.0010402326523307659}
399
399
396
396


[I 2019-09-13 16:29:42,287] Finished trial#7 resulted in value: -0.8815412671229893. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 165, 'lgbmclassifier__num_leaves': 1120, 'lgbmclassifier__subsample_for_bin': 6754, 'lgbmclassifier__min_child_samples': 80356, 'lgbmclassifier__reg_alpha': 3.189051566911886e-08, 'lgbmclassifier__colsample_bytree': 0.12640136366179378, 'lgbmclassifier__learning_rate': 0.06964337219634135}
41
41
40
40


[I 2019-09-13 16:36:10,516] Finished trial#8 resulted in value: -0.8328112133216767. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 194, 'lgbmclassifier__num_leaves': 292, 'lgbmclassifier__subsample_for_bin': 2234442, 'lgbmclassifier__min_child_samples': 19883, 'lgbmclassifier__reg_alpha': 1.9552290776616274e-07, 'lgbmclassifier__colsample_bytree': 0.0002120187703952114, 'lgbmclassifier__learning_rate': 0.15248882994541058}
25
25
29
29


[I 2019-09-13 16:42:46,196] Finished trial#9 resulted in value: -0.8222678540390438. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 73, 'lgbmclassifier__num_leaves': 1482, 'lgbmclassifier__subsample_for_bin': 4975232, 'lgbmclassifier__min_child_samples': 96768, 'lgbmclassifier__reg_alpha': 1.1130799332995245, 'lgbmclassifier__colsample_bytree': 0.0018480600959298842, 'lgbmclassifier__learning_rate': 1.3322806810363992}
141
141
136
136
137
137
139
139
141
141
140
140
141
141
141
141


[I 2019-09-13 17:09:11,523] Finished trial#10 resulted in value: -0.886444293826447. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 71, 'lgbmclassifier__num_leaves': 1499, 'lgbmclassifier__subsample_for_bin': 4817090, 'lgbmclassifier__min_child_samples': 98004, 'lgbmclassifier__reg_alpha': 5.71419149221798, 'lgbmclassifier__colsample_bytree': 0.0019282178941608373, 'lgbmclassifier__learning_rate': 1.851680176111229}
143
143
139
139


[I 2019-09-13 17:16:27,007] Finished trial#11 resulted in value: -0.8621721650147123. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 96, 'lgbmclassifier__num_leaves': 1201, 'lgbmclassifier__subsample_for_bin': 4747153, 'lgbmclassifier__min_child_samples': 93783, 'lgbmclassifier__reg_alpha': 1.4531230461262523, 'lgbmclassifier__colsample_bytree': 0.002367518915349692, 'lgbmclassifier__learning_rate': 1.7938911578166394}
98
98
102
102


[I 2019-09-13 17:23:10,225] Finished trial#12 resulted in value: -0.8590491555053165. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 71, 'lgbmclassifier__num_leaves': 1274, 'lgbmclassifier__subsample_for_bin': 3322534, 'lgbmclassifier__min_child_samples': 58392, 'lgbmclassifier__reg_alpha': 0.041551850330248746, 'lgbmclassifier__colsample_bytree': 0.010115965283749315, 'lgbmclassifier__learning_rate': 0.019013406682478626}
143
143
139
139


[I 2019-09-13 17:29:44,325] Finished trial#13 resulted in value: -0.831717748453506. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 111, 'lgbmclassifier__num_leaves': 902, 'lgbmclassifier__subsample_for_bin': 4923633, 'lgbmclassifier__min_child_samples': 89782, 'lgbmclassifier__reg_alpha': 0.0001879713362832381, 'lgbmclassifier__colsample_bytree': 0.0006980643300354153, 'lgbmclassifier__learning_rate': 1.5794838841961794}
83
83
83
83


[I 2019-09-13 17:36:13,642] Finished trial#14 resulted in value: -0.8754014049868699. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 39, 'lgbmclassifier__num_leaves': 958, 'lgbmclassifier__subsample_for_bin': 4113462, 'lgbmclassifier__min_child_samples': 55498, 'lgbmclassifier__reg_alpha': 0.12385140916975897, 'lgbmclassifier__colsample_bytree': 0.06146020898188049, 'lgbmclassifier__learning_rate': 0.014543649315825542}
238
238
238
238


[I 2019-09-13 17:43:11,782] Finished trial#15 resulted in value: -0.8795740493466351. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 78, 'lgbmclassifier__num_leaves': 664, 'lgbmclassifier__subsample_for_bin': 2772053, 'lgbmclassifier__min_child_samples': 82392, 'lgbmclassifier__reg_alpha': 0.0009430431489978852, 'lgbmclassifier__colsample_bytree': 0.022022977701625156, 'lgbmclassifier__learning_rate': 0.7413142126154312}
129
129
128
128
127
127
128
128
131
131
130
130
132
132
129
129


[I 2019-09-13 18:11:07,835] Finished trial#16 resulted in value: -0.8962775816879598. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 109, 'lgbmclassifier__num_leaves': 700, 'lgbmclassifier__subsample_for_bin': 2779244, 'lgbmclassifier__min_child_samples': 66481, 'lgbmclassifier__reg_alpha': 1.5130430734162468e-05, 'lgbmclassifier__colsample_bytree': 0.03608062193201811, 'lgbmclassifier__learning_rate': 0.5215030572258339}
83
83
85
85
82
82
84
84
79
79
83
83
80
80
82
82


[I 2019-09-13 18:39:00,903] Finished trial#17 resulted in value: -0.8983460233560768. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 131, 'lgbmclassifier__num_leaves': 485, 'lgbmclassifier__subsample_for_bin': 2326880, 'lgbmclassifier__min_child_samples': 43227, 'lgbmclassifier__reg_alpha': 4.799921161115478e-06, 'lgbmclassifier__colsample_bytree': 0.181562885684635, 'lgbmclassifier__learning_rate': 0.04360970282385008}
63
63
66
66
62
62
62
62
60
60
60
60
64
64
61
61


[I 2019-09-13 19:07:48,904] Finished trial#18 resulted in value: -0.8893772952463321. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 110, 'lgbmclassifier__num_leaves': 821, 'lgbmclassifier__subsample_for_bin': 2826011, 'lgbmclassifier__min_child_samples': 62777, 'lgbmclassifier__reg_alpha': 7.88909467610149e-07, 'lgbmclassifier__colsample_bytree': 0.03151689919995013, 'lgbmclassifier__learning_rate': 0.007349547916245932}
83
83
84
84


[I 2019-09-13 19:14:28,232] Finished trial#19 resulted in value: -0.8008869381962679. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 45, 'lgbmclassifier__num_leaves': 1073, 'lgbmclassifier__subsample_for_bin': 3961188, 'lgbmclassifier__min_child_samples': 66692, 'lgbmclassifier__reg_alpha': 6.642545622100097e-05, 'lgbmclassifier__colsample_bytree': 0.005005626886949522, 'lgbmclassifier__learning_rate': 0.45180451901445356}
215
215
212
212
217
217
220
220
210
210
219
219
218
218
213
213


[I 2019-09-13 19:41:53,168] Finished trial#20 resulted in value: -0.8897917661520284. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 91, 'lgbmclassifier__num_leaves': 686, 'lgbmclassifier__subsample_for_bin': 2804764, 'lgbmclassifier__min_child_samples': 79298, 'lgbmclassifier__reg_alpha': 0.0023470911116013453, 'lgbmclassifier__colsample_bytree': 0.024427389265935958, 'lgbmclassifier__learning_rate': 0.4723302359684374}
106
106
108
108
110
110
107
107
106
106
113
113
105
105
109
109


[I 2019-09-13 20:09:24,577] Finished trial#21 resulted in value: -0.8941131712709374. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 130, 'lgbmclassifier__num_leaves': 591, 'lgbmclassifier__subsample_for_bin': 1943799, 'lgbmclassifier__min_child_samples': 85136, 'lgbmclassifier__reg_alpha': 0.0029496340595582295, 'lgbmclassifier__colsample_bytree': 0.007236926643866486, 'lgbmclassifier__learning_rate': 0.7213163359396888}
64
64
67
67


[I 2019-09-13 20:16:05,675] Finished trial#22 resulted in value: -0.8674593219567555. Current best value is -0.8996888834888637 with parameters: {'selectfrommodel__threshold': 80, 'lgbmclassifier__num_leaves': 964, 'lgbmclassifier__subsample_for_bin': 3809907, 'lgbmclassifier__min_child_samples': 70895, 'lgbmclassifier__reg_alpha': 0.020722815534357546, 'lgbmclassifier__colsample_bytree': 0.021012409267783357, 'lgbmclassifier__learning_rate': 0.5444138809014398}.


{'selectfrommodel__threshold': 85, 'lgbmclassifier__num_leaves': 775, 'lgbmclassifier__subsample_for_bin': 2884713, 'lgbmclassifier__min_child_samples': 48881, 'lgbmclassifier__reg_alpha': 8.187706591769971e-05, 'lgbmclassifier__colsample_bytree': 0.1649794256655354, 'lgbmclassifier__learning_rate': 0.08125756562044507}
117
117
116
116
117
117
119
119
124
124
119
119
116
116
118
118


[I 2019-09-13 20:45:14,076] Finished trial#23 resulted in value: -0.9108946900963909. Current best value is -0.9108946900963909 with parameters: {'selectfrommodel__threshold': 85, 'lgbmclassifier__num_leaves': 775, 'lgbmclassifier__subsample_for_bin': 2884713, 'lgbmclassifier__min_child_samples': 48881, 'lgbmclassifier__reg_alpha': 8.187706591769971e-05, 'lgbmclassifier__colsample_bytree': 0.1649794256655354, 'lgbmclassifier__learning_rate': 0.08125756562044507}.


{'selectfrommodel__threshold': 60, 'lgbmclassifier__num_leaves': 1014, 'lgbmclassifier__subsample_for_bin': 3378427, 'lgbmclassifier__min_child_samples': 37185, 'lgbmclassifier__reg_alpha': 4.8256178011052756e-11, 'lgbmclassifier__colsample_bytree': 0.2390295587213087, 'lgbmclassifier__learning_rate': 0.07612111274027039}
163
163
163
163
170
170
171
171
165
165
168
168
170
170
173
173


[I 2019-09-13 21:14:52,813] Finished trial#24 resulted in value: -0.9235310497026047. Current best value is -0.9235310497026047 with parameters: {'selectfrommodel__threshold': 60, 'lgbmclassifier__num_leaves': 1014, 'lgbmclassifier__subsample_for_bin': 3378427, 'lgbmclassifier__min_child_samples': 37185, 'lgbmclassifier__reg_alpha': 4.8256178011052756e-11, 'lgbmclassifier__colsample_bytree': 0.2390295587213087, 'lgbmclassifier__learning_rate': 0.07612111274027039}.


{'selectfrommodel__threshold': 57, 'lgbmclassifier__num_leaves': 826, 'lgbmclassifier__subsample_for_bin': 3351576, 'lgbmclassifier__min_child_samples': 33341, 'lgbmclassifier__reg_alpha': 1.2296368838346156e-10, 'lgbmclassifier__colsample_bytree': 0.22845843824449125, 'lgbmclassifier__learning_rate': 0.06818071120572061}
173
173
170
170
178
178
175
175
176
176
184
184
178
178
175
175


[I 2019-09-13 21:44:59,805] Finished trial#25 resulted in value: -0.9251830886335525. Current best value is -0.9251830886335525 with parameters: {'selectfrommodel__threshold': 57, 'lgbmclassifier__num_leaves': 826, 'lgbmclassifier__subsample_for_bin': 3351576, 'lgbmclassifier__min_child_samples': 33341, 'lgbmclassifier__reg_alpha': 1.2296368838346156e-10, 'lgbmclassifier__colsample_bytree': 0.22845843824449125, 'lgbmclassifier__learning_rate': 0.06818071120572061}.


{'selectfrommodel__threshold': 54, 'lgbmclassifier__num_leaves': 806, 'lgbmclassifier__subsample_for_bin': 3248300, 'lgbmclassifier__min_child_samples': 33930, 'lgbmclassifier__reg_alpha': 1.9624258916573002e-11, 'lgbmclassifier__colsample_bytree': 0.2489788094329805, 'lgbmclassifier__learning_rate': 0.06555441812976279}
181
181
184
184
187
187
187
187
184
184
190
190
185
185
186
186


[I 2019-09-13 22:15:07,592] Finished trial#26 resulted in value: -0.9249272807840303. Current best value is -0.9251830886335525 with parameters: {'selectfrommodel__threshold': 57, 'lgbmclassifier__num_leaves': 826, 'lgbmclassifier__subsample_for_bin': 3351576, 'lgbmclassifier__min_child_samples': 33341, 'lgbmclassifier__reg_alpha': 1.2296368838346156e-10, 'lgbmclassifier__colsample_bytree': 0.22845843824449125, 'lgbmclassifier__learning_rate': 0.06818071120572061}.


{'selectfrommodel__threshold': 25, 'lgbmclassifier__num_leaves': 1070, 'lgbmclassifier__subsample_for_bin': 3389532, 'lgbmclassifier__min_child_samples': 33702, 'lgbmclassifier__reg_alpha': 1.8637035006551452e-11, 'lgbmclassifier__colsample_bytree': 0.2929234713175779, 'lgbmclassifier__learning_rate': 0.027731925181667925}
291
291
297
297
292
292
292
292
297
297
294
294
297
297
299
299


[I 2019-09-13 22:48:07,044] Finished trial#27 resulted in value: -0.9124453212015301. Current best value is -0.9251830886335525 with parameters: {'selectfrommodel__threshold': 57, 'lgbmclassifier__num_leaves': 826, 'lgbmclassifier__subsample_for_bin': 3351576, 'lgbmclassifier__min_child_samples': 33341, 'lgbmclassifier__reg_alpha': 1.2296368838346156e-10, 'lgbmclassifier__colsample_bytree': 0.22845843824449125, 'lgbmclassifier__learning_rate': 0.06818071120572061}.


{'selectfrommodel__threshold': 57, 'lgbmclassifier__num_leaves': 844, 'lgbmclassifier__subsample_for_bin': 4479167, 'lgbmclassifier__min_child_samples': 36047, 'lgbmclassifier__reg_alpha': 1.1344878994971624e-11, 'lgbmclassifier__colsample_bytree': 0.3266856867307047, 'lgbmclassifier__learning_rate': 0.006917779363749246}
173
173
170
170


[I 2019-09-13 22:55:28,342] Finished trial#28 resulted in value: -0.8812836444864776. Current best value is -0.9251830886335525 with parameters: {'selectfrommodel__threshold': 57, 'lgbmclassifier__num_leaves': 826, 'lgbmclassifier__subsample_for_bin': 3351576, 'lgbmclassifier__min_child_samples': 33341, 'lgbmclassifier__reg_alpha': 1.2296368838346156e-10, 'lgbmclassifier__colsample_bytree': 0.22845843824449125, 'lgbmclassifier__learning_rate': 0.06818071120572061}.


{'selectfrommodel__threshold': 57, 'lgbmclassifier__num_leaves': 1274, 'lgbmclassifier__subsample_for_bin': 3635263, 'lgbmclassifier__min_child_samples': 31674, 'lgbmclassifier__reg_alpha': 1.1016423515088317e-10, 'lgbmclassifier__colsample_bytree': 0.9124095174643158, 'lgbmclassifier__learning_rate': 0.0717684934187983}
173
173
170
170
178
178
175
175
176
176
184
184
178
178
175
175


[I 2019-09-13 23:31:01,996] Finished trial#29 resulted in value: -0.9326031963367543. Current best value is -0.9326031963367543 with parameters: {'selectfrommodel__threshold': 57, 'lgbmclassifier__num_leaves': 1274, 'lgbmclassifier__subsample_for_bin': 3635263, 'lgbmclassifier__min_child_samples': 31674, 'lgbmclassifier__reg_alpha': 1.1016423515088317e-10, 'lgbmclassifier__colsample_bytree': 0.9124095174643158, 'lgbmclassifier__learning_rate': 0.0717684934187983}.


{'selectfrommodel__threshold': 27, 'lgbmclassifier__num_leaves': 1244, 'lgbmclassifier__subsample_for_bin': 4347339, 'lgbmclassifier__min_child_samples': 12037, 'lgbmclassifier__reg_alpha': 2.336079214340847e-10, 'lgbmclassifier__colsample_bytree': 0.9395862712810226, 'lgbmclassifier__learning_rate': 0.14605897446735108}
280
280
290
290
285
285
284
284
291
291
285
285
284
284
284
284


[I 2019-09-14 00:24:46,617] Finished trial#30 resulted in value: -0.9611633269303406. Current best value is -0.9611633269303406 with parameters: {'selectfrommodel__threshold': 27, 'lgbmclassifier__num_leaves': 1244, 'lgbmclassifier__subsample_for_bin': 4347339, 'lgbmclassifier__min_child_samples': 12037, 'lgbmclassifier__reg_alpha': 2.336079214340847e-10, 'lgbmclassifier__colsample_bytree': 0.9395862712810226, 'lgbmclassifier__learning_rate': 0.14605897446735108}.


{'selectfrommodel__threshold': 55, 'lgbmclassifier__num_leaves': 1337, 'lgbmclassifier__subsample_for_bin': 4342384, 'lgbmclassifier__min_child_samples': 12448, 'lgbmclassifier__reg_alpha': 1.9530720074903245e-10, 'lgbmclassifier__colsample_bytree': 0.9571032746480657, 'lgbmclassifier__learning_rate': 0.18870013181301803}
180
180
180
180
185
185
184
184
181
181
188
188
185
185
185
185


[I 2019-09-14 01:08:43,534] Finished trial#31 resulted in value: -0.9599752750689291. Current best value is -0.9611633269303406 with parameters: {'selectfrommodel__threshold': 27, 'lgbmclassifier__num_leaves': 1244, 'lgbmclassifier__subsample_for_bin': 4347339, 'lgbmclassifier__min_child_samples': 12037, 'lgbmclassifier__reg_alpha': 2.336079214340847e-10, 'lgbmclassifier__colsample_bytree': 0.9395862712810226, 'lgbmclassifier__learning_rate': 0.14605897446735108}.


{'selectfrommodel__threshold': 28, 'lgbmclassifier__num_leaves': 1326, 'lgbmclassifier__subsample_for_bin': 4344561, 'lgbmclassifier__min_child_samples': 14209, 'lgbmclassifier__reg_alpha': 1.8202659703017088e-10, 'lgbmclassifier__colsample_bytree': 0.9498677874036722, 'lgbmclassifier__learning_rate': 0.17046551107520821}
274
274
285
285
275
275
279
279
286
286
281
281
278
278
281
281


[I 2019-09-14 01:59:17,266] Finished trial#32 resulted in value: -0.960182083974176. Current best value is -0.9611633269303406 with parameters: {'selectfrommodel__threshold': 27, 'lgbmclassifier__num_leaves': 1244, 'lgbmclassifier__subsample_for_bin': 4347339, 'lgbmclassifier__min_child_samples': 12037, 'lgbmclassifier__reg_alpha': 2.336079214340847e-10, 'lgbmclassifier__colsample_bytree': 0.9395862712810226, 'lgbmclassifier__learning_rate': 0.14605897446735108}.


{'selectfrommodel__threshold': 28, 'lgbmclassifier__num_leaves': 1350, 'lgbmclassifier__subsample_for_bin': 4478735, 'lgbmclassifier__min_child_samples': 13034, 'lgbmclassifier__reg_alpha': 4.137319863026573e-10, 'lgbmclassifier__colsample_bytree': 0.9017062678627178, 'lgbmclassifier__learning_rate': 0.17848821280990468}
274
274
285
285
275
275
279
279
286
286
281
281
278
278
281
281


[I 2019-09-14 02:50:23,351] Finished trial#33 resulted in value: -0.9603892638712648. Current best value is -0.9611633269303406 with parameters: {'selectfrommodel__threshold': 27, 'lgbmclassifier__num_leaves': 1244, 'lgbmclassifier__subsample_for_bin': 4347339, 'lgbmclassifier__min_child_samples': 12037, 'lgbmclassifier__reg_alpha': 2.336079214340847e-10, 'lgbmclassifier__colsample_bytree': 0.9395862712810226, 'lgbmclassifier__learning_rate': 0.14605897446735108}.


{'selectfrommodel__threshold': 27, 'lgbmclassifier__num_leaves': 1379, 'lgbmclassifier__subsample_for_bin': 4361526, 'lgbmclassifier__min_child_samples': 11244, 'lgbmclassifier__reg_alpha': 1.0807896139262164e-09, 'lgbmclassifier__colsample_bytree': 0.9140938764571404, 'lgbmclassifier__learning_rate': 0.18912666104291284}
280
280
290
290
285
285
284
284
291
291
285
285
284
284
284
284


[I 2019-09-14 03:45:07,056] Finished trial#34 resulted in value: -0.9617737934032883. Current best value is -0.9617737934032883 with parameters: {'selectfrommodel__threshold': 27, 'lgbmclassifier__num_leaves': 1379, 'lgbmclassifier__subsample_for_bin': 4361526, 'lgbmclassifier__min_child_samples': 11244, 'lgbmclassifier__reg_alpha': 1.0807896139262164e-09, 'lgbmclassifier__colsample_bytree': 0.9140938764571404, 'lgbmclassifier__learning_rate': 0.18912666104291284}.


{'selectfrommodel__threshold': 2, 'lgbmclassifier__num_leaves': 1383, 'lgbmclassifier__subsample_for_bin': 4469896, 'lgbmclassifier__min_child_samples': 1292, 'lgbmclassifier__reg_alpha': 2.001811876397163e-09, 'lgbmclassifier__colsample_bytree': 0.9873066190734683, 'lgbmclassifier__learning_rate': 0.1793404007647492}
800
800
810
810
805
805
808
808
799
799
813
813
796
796
812
812


[I 2019-09-14 07:38:54,386] Finished trial#35 resulted in value: -0.9661841702672479. Current best value is -0.9661841702672479 with parameters: {'selectfrommodel__threshold': 2, 'lgbmclassifier__num_leaves': 1383, 'lgbmclassifier__subsample_for_bin': 4469896, 'lgbmclassifier__min_child_samples': 1292, 'lgbmclassifier__reg_alpha': 2.001811876397163e-09, 'lgbmclassifier__colsample_bytree': 0.9873066190734683, 'lgbmclassifier__learning_rate': 0.1793404007647492}.


{'selectfrommodel__threshold': 2, 'lgbmclassifier__num_leaves': 1406, 'lgbmclassifier__subsample_for_bin': 4596746, 'lgbmclassifier__min_child_samples': 2787, 'lgbmclassifier__reg_alpha': 1.9909124818304063e-09, 'lgbmclassifier__colsample_bytree': 0.4901999314700306, 'lgbmclassifier__learning_rate': 0.26666008814905623}
800
800
810
810
805
805
808
808
799
799
813
813
796
796
812


In [None]:
model.set_params(**best_params)

cross_val_score_auc(model,
                    X_train,
                    y_train,
                    n_fold=N_FOLD,
                    stratify=True,
                    shuffle=True,
                    random_state=RANDOM_STATE,
                    predict=True,
                    X_test=X_test,
                    submission=sample_submission)

In [None]:
# ROC accuracy: 0.9668942182909179, Train: 0.9999901167411397
# ROC accuracy: 0.9720552290202384, Train: 0.9999891233350843
# ROC accuracy: 0.9710663975253696, Train: 0.9999918268060299
# ROC accuracy: 0.9703005116766165, Train: 0.9999910116495871
# ROC accuracy: 0.9677524410936837, Train: 0.9999883123936292
# ROC accuracy: 0.970521434805755, Train: 0.9999753389326952
# ROC accuracy: 0.9709850608667766, Train: 0.9999787304381259
# ROC accuracy: 0.9708245135815027, Train: 0.9999796449943333


# 0.9700499758576075