In [1]:
import os
import gc
import numpy as np
import pandas as pd
import joblib
from datetime import datetime

from lightgbm import LGBMClassifier
import optuna
from prunedcv import PrunedCV

from codes.utils import import_data, drop_columns, cross_val_score_auc, reduce_mem_usage
from codes.fe_browser import latest
from codes.fe_emails import proton, mappings, labeling
from codes.fe_cards import stats
from codes.fe_date import dates
from codes.fe_relatives import divisions
from codes.fe_categorical import pairs, wtf
from codes.prepro import prepro
from codes.fe_users import users_stats

from sklearn.feature_selection import RFECV, SelectFromModel
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.base import TransformerMixin

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [3]:
DATA_PATH = '../input/'
SEARCH_PARAMS = True
SEARCH_FEATURES = False
N_FOLD = 8

In [4]:
train, test, sample_submission = import_data(DATA_PATH)

### Some Feature Engineering

drop columns, count encoding, aggregation, fillna

In [5]:
train, test = users_stats(train, test)

# train, test = drop_columns(train, test)

train, test = latest(train, test)

train, test = proton(train, test)

train['nulls1'] = train.isna().sum(axis=1)
test['nulls1'] = test.isna().sum(axis=1)

train, test = mappings(train, test)
train, test = labeling(train, test)

train, test = stats(train, test)

train, test = divisions(train, test)

train, test = dates(train, test)

train, test = pairs(train, test)
train, test = wtf(train, test)

y_train = train['isFraud'].copy()


X_train = train.drop('isFraud', axis=1)
X_test = test.copy()

del train, test

#fill in mean for floats
X_train, X_test = prepro(X_train, X_test)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  all_data = pd.concat([train, test])


### Model and training

In [6]:
columns = list(set(
['C{}'.format(i) for i in range(1,15)] \
+ ['D{}'.format(i) for i in range(1,16)] \
+ ['V' + str(i) for i in range(1,340)]))

for col in columns:
    if col in X_train.columns:
        X_train[col + '_' + 'trx'] = X_train[col] / X_train.TransactionAmt
        X_test[col + '_' + 'trx'] = X_test[col] / X_test.TransactionAmt

In [7]:
X_train = reduce_mem_usage(X_train)
X_test = reduce_mem_usage(X_test)

Mem. usage decreased to 1944.96 Mb (6.7% reduction)
Mem. usage decreased to 1653.57 Mb (6.7% reduction)


In [8]:
X_train[X_train == np.inf] = -1
X_train[X_train == -np.inf] = -1
X_test[X_test == np.inf] = -1
X_test[X_test == -np.inf] = -1
X_train[X_test.isna()] = -1
X_test[X_test.isna()] = -1

In [9]:
X_test.drop(['TransactionDT'], axis=1, inplace=True)
X_train.drop(['TransactionDT'], axis=1, inplace=True)

In [37]:
sfm = SelectFromModel(LGBMClassifier(metric='auc'), threshold=0.5)
sfm.fit(X_train, y_train)


print(X_train.shape[1])
columns = list(X_train.columns[sfm.get_support()])
print(len(columns))
X_train = X_train.loc[:,columns]
X_test = X_test.loc[:,columns]

SelectFromModel(estimator=LGBMClassifier(boosting_type='gbdt',
                                         class_weight=None,
                                         colsample_bytree=1.0,
                                         importance_type='split',
                                         learning_rate=0.1, max_depth=-1,
                                         metric='auc', min_child_samples=20,
                                         min_child_weight=0.001,
                                         min_split_gain=0.0, n_estimators=100,
                                         n_jobs=-1, num_leaves=31,
                                         objective=None, random_state=None,
                                         reg_alpha=0.0, reg_lambda=0.0,
                                         silent=True, subsample=1.0,
                                         subsample_for_bin=200000,
                                         subsample_freq=0),
                max_features=None, norm_or

In [46]:
class Counter(TransformerMixin):
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        print(X.shape[1])
        return X

In [56]:
model = make_pipeline(
    SelectFromModel(LGBMClassifier(metric='auc')),
#     Counter(),
    LGBMClassifier(metric='auc',
                   n_estimators=1000)
)

In [48]:
prun = PrunedCV(N_FOLD, 0.03, minimize=False)

In [50]:
def objective(trial):
    
    joblib.dump(study, 'study.pkl') 

    
    params = {
        'selectfrommodel__threshold': trial.suggest_int('selectfrommodel__threshold', 1, 100),
        'lgbmclassifier__num_leaves': trial.suggest_int('lgbmclassifier__num_leaves', 10, 1500), 
        'lgbmclassifier__subsample_for_bin': trial.suggest_int('lgbmclassifier__subsample_for_bin', 10, 3000000), 
        'lgbmclassifier__min_child_samples': trial.suggest_int('lgbmclassifier__min_child_samples', 2, 100000), 
        'lgbmclassifier__reg_alpha': trial.suggest_loguniform('lgbmclassifier__reg_alpha', 0.00000000001, 10.0),
        'lgbmclassifier__colsample_bytree': trial.suggest_loguniform('lgbmclassifier__colsample_bytree', 0.0001, 1.0),
        'lgbmclassifier__learning_rate': trial.suggest_loguniform('lgbmclassifier__learning_rate', 0.000001, 10.0)
    }
    
    print(params)
    
    model.set_params(**params)
    return prun.cross_val_score(model, 
                                X_train, 
                                y_train, 
                                metric='auc', 
                                shuffle=True, 
                                random_state=42)

In [55]:
if SEARCH_PARAMS:
    if os.path.isfile('study.pkl'):
        study = joblib.load('study.pkl')
    else:
        study = optuna.create_study()

    study.optimize(objective, timeout=60 * 60 * 22)
    joblib.dump(study, 'study.pkl')
    best_params = study.best_params

else:

    best_params = {
        'selectfrommodel__threshold': 11,
        'lgbmclassifier__num_leaves': 330,
        'lgbmclassifier__subsample_for_bin': 2077193,
        'lgbmclassifier__min_child_samples': 2227,
        'lgbmclassifier__reg_alpha': 0.16758905622425835,
        'lgbmclassifier__colsample_bytree': 0.49030006727392056,
        'lgbmclassifier__learning_rate': 0.07916040470631734
    }

{'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 290, 'lgbmclassifier__subsample_for_bin': 2022797, 'lgbmclassifier__min_child_samples': 8958, 'lgbmclassifier__reg_alpha': 0.0592854713909757, 'lgbmclassifier__colsample_bytree': 0.17295118760469255, 'lgbmclassifier__learning_rate': 0.10293221244710013}
74
74
66
66
73
73
80
80
73
73
79
79
73
73
72
72


[I 2019-09-11 09:10:17,136] Finished trial#125 resulted in value: -0.9567001742786652. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 17, 'lgbmclassifier__num_leaves': 317, 'lgbmclassifier__subsample_for_bin': 1898697, 'lgbmclassifier__min_child_samples': 5589, 'lgbmclassifier__reg_alpha': 0.2601135670697154, 'lgbmclassifier__colsample_bytree': 0.3706504276055055, 'lgbmclassifier__learning_rate': 0.026075703002899617}
42
42
41
41
44
44
42
42
44
44
44
44
44
44
42
42


[I 2019-09-11 09:25:29,837] Finished trial#126 resulted in value: -0.9512384838876272. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 6, 'lgbmclassifier__num_leaves': 513, 'lgbmclassifier__subsample_for_bin': 1722963, 'lgbmclassifier__min_child_samples': 2325, 'lgbmclassifier__reg_alpha': 0.08880634208100523, 'lgbmclassifier__colsample_bytree': 0.5170543055441995, 'lgbmclassifier__learning_rate': 0.1719791491681687}
143
143
151
151
144
144
144
144
150
150
139
139
144
144
148
148


[I 2019-09-11 10:03:13,962] Finished trial#127 resulted in value: -0.9665535039905091. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 5, 'lgbmclassifier__num_leaves': 511, 'lgbmclassifier__subsample_for_bin': 2095030, 'lgbmclassifier__min_child_samples': 2407, 'lgbmclassifier__reg_alpha': 0.09680545700369973, 'lgbmclassifier__colsample_bytree': 0.5273887857888594, 'lgbmclassifier__learning_rate': 0.18944828528428273}
177
177
169
169
172
172
175
175
179
179
177
177
168
168
180
180


[I 2019-09-11 10:48:14,808] Finished trial#128 resulted in value: -0.9719525866691703. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 6, 'lgbmclassifier__num_leaves': 518, 'lgbmclassifier__subsample_for_bin': 1733701, 'lgbmclassifier__min_child_samples': 2267, 'lgbmclassifier__reg_alpha': 0.12482265454476321, 'lgbmclassifier__colsample_bytree': 0.5180199461153596, 'lgbmclassifier__learning_rate': 0.15967646004259173}
143
143
151
151
144
144
144
144
150
150
139
139
144
144
148
148


[I 2019-09-11 11:26:32,457] Finished trial#129 resulted in value: -0.9722932750574992. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 6, 'lgbmclassifier__num_leaves': 582, 'lgbmclassifier__subsample_for_bin': 1732418, 'lgbmclassifier__min_child_samples': 3186, 'lgbmclassifier__reg_alpha': 0.09966791223734184, 'lgbmclassifier__colsample_bytree': 0.5090494913996954, 'lgbmclassifier__learning_rate': 0.21740591922635605}
143
143
151
151
144
144
144
144
150
150
139
139
144
144
148
148


[I 2019-09-11 12:01:30,789] Finished trial#130 resulted in value: -0.9720167335206623. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 4, 'lgbmclassifier__num_leaves': 645, 'lgbmclassifier__subsample_for_bin': 1771098, 'lgbmclassifier__min_child_samples': 11143, 'lgbmclassifier__reg_alpha': 0.029070483652720292, 'lgbmclassifier__colsample_bytree': 0.8447027055771761, 'lgbmclassifier__learning_rate': 0.21638830662936961}
220
220
210
210
215
215
209
209
217
217
212
212
212
212
216
216


[I 2019-09-11 12:43:55,189] Finished trial#131 resulted in value: -0.968854305606269. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 4, 'lgbmclassifier__num_leaves': 666, 'lgbmclassifier__subsample_for_bin': 1767621, 'lgbmclassifier__min_child_samples': 11699, 'lgbmclassifier__reg_alpha': 0.03988797882959766, 'lgbmclassifier__colsample_bytree': 0.5277574726780381, 'lgbmclassifier__learning_rate': 0.21174172359700993}
220
220
210
210
215
215
209
209
217
217
212
212
212
212
216
216


[I 2019-09-11 13:16:05,169] Finished trial#132 resulted in value: -0.9660668142780159. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 10, 'lgbmclassifier__num_leaves': 573, 'lgbmclassifier__subsample_for_bin': 1534293, 'lgbmclassifier__min_child_samples': 10084, 'lgbmclassifier__reg_alpha': 0.029723171346149144, 'lgbmclassifier__colsample_bytree': 0.26160704773930654, 'lgbmclassifier__learning_rate': 0.1352118372228258}
83
83
77
77
84
84
88
88
83
83
90
90
81
81
84
84


[I 2019-09-11 13:38:37,743] Finished trial#133 resulted in value: -0.9619398238358029. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 13, 'lgbmclassifier__num_leaves': 739, 'lgbmclassifier__subsample_for_bin': 1825790, 'lgbmclassifier__min_child_samples': 7735, 'lgbmclassifier__reg_alpha': 0.4708796940783995, 'lgbmclassifier__colsample_bytree': 0.8113796623924662, 'lgbmclassifier__learning_rate': 0.0936399239870199}
59
59
58
58
58
58
58
58
62
62
59
59
59
59
55
55


[I 2019-09-11 14:05:24,812] Finished trial#134 resulted in value: -0.9633802975151666. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 4, 'lgbmclassifier__num_leaves': 646, 'lgbmclassifier__subsample_for_bin': 1970506, 'lgbmclassifier__min_child_samples': 5222, 'lgbmclassifier__reg_alpha': 0.13724768575268448, 'lgbmclassifier__colsample_bytree': 0.8450038078247972, 'lgbmclassifier__learning_rate': 0.24956126601037854}
220
220
210
210
215
215
209
209
217
217
212
212
212
212
216
216


[I 2019-09-11 14:45:04,088] Finished trial#135 resulted in value: -0.9698040962918637. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 5, 'lgbmclassifier__num_leaves': 614, 'lgbmclassifier__subsample_for_bin': 1981302, 'lgbmclassifier__min_child_samples': 14674, 'lgbmclassifier__reg_alpha': 0.15378859624592445, 'lgbmclassifier__colsample_bytree': 0.8126859727198114, 'lgbmclassifier__learning_rate': 0.1161742906406667}
177
177
169
169
172
172
175
175
179
179
177
177
168
168
180
180


[I 2019-09-11 15:10:35,899] Finished trial#136 resulted in value: -0.9605845245804054. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 7, 'lgbmclassifier__num_leaves': 633, 'lgbmclassifier__subsample_for_bin': 2094937, 'lgbmclassifier__min_child_samples': 5379, 'lgbmclassifier__reg_alpha': 0.302828306606305, 'lgbmclassifier__colsample_bytree': 0.4521235192563191, 'lgbmclassifier__learning_rate': 0.25932534506492977}
120
120
123
123
126
126
128
128
128
128
119
119
123
123
130
130


[I 2019-09-11 15:41:46,152] Finished trial#137 resulted in value: -0.9677253194587199. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 3, 'lgbmclassifier__num_leaves': 787, 'lgbmclassifier__subsample_for_bin': 1570676, 'lgbmclassifier__min_child_samples': 2468, 'lgbmclassifier__reg_alpha': 0.07529119424889144, 'lgbmclassifier__colsample_bytree': 0.5830485981040581, 'lgbmclassifier__learning_rate': 0.06532271492321932}
258
258
260
260
263
263
256
256
264
264
262
262
270
270
269
269


[I 2019-09-11 17:04:12,452] Finished trial#138 resulted in value: -0.9716881953154586. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 3, 'lgbmclassifier__num_leaves': 748, 'lgbmclassifier__subsample_for_bin': 1655117, 'lgbmclassifier__min_child_samples': 2719, 'lgbmclassifier__reg_alpha': 0.05056217265012496, 'lgbmclassifier__colsample_bytree': 0.5713049952056655, 'lgbmclassifier__learning_rate': 0.06881203235835905}
258
258
260
260
263
263
256
256
264
264
262
262
270
270
269
269


[I 2019-09-11 18:08:42,630] Finished trial#139 resulted in value: -0.9718536682120718. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 1, 'lgbmclassifier__num_leaves': 866, 'lgbmclassifier__subsample_for_bin': 1599471, 'lgbmclassifier__min_child_samples': 3221, 'lgbmclassifier__reg_alpha': 0.06783081754854196, 'lgbmclassifier__colsample_bytree': 0.592926779398248, 'lgbmclassifier__learning_rate': 0.08253655547748885}
401
401
400
400
393
393
395
395
400
400
406
406
405
405
400
400


[I 2019-09-11 19:07:52,439] Finished trial#140 resulted in value: -0.9712549375983156. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 1, 'lgbmclassifier__num_leaves': 823, 'lgbmclassifier__subsample_for_bin': 1566930, 'lgbmclassifier__min_child_samples': 2337, 'lgbmclassifier__reg_alpha': 0.10505900974443283, 'lgbmclassifier__colsample_bytree': 0.5597348936405419, 'lgbmclassifier__learning_rate': 0.07247924026491974}
401
401
400
400
393
393
395
395
400
400
406
406
405
405
400
400


[I 2019-09-11 20:11:31,738] Finished trial#141 resulted in value: -0.9715015674650406. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 1, 'lgbmclassifier__num_leaves': 901, 'lgbmclassifier__subsample_for_bin': 1398425, 'lgbmclassifier__min_child_samples': 2766, 'lgbmclassifier__reg_alpha': 0.07273624749032628, 'lgbmclassifier__colsample_bytree': 0.512322828355236, 'lgbmclassifier__learning_rate': 0.07118864548038827}
401
401
400
400
393
393
395
395
400
400
406
406
405
405
400
400


[I 2019-09-11 21:09:00,545] Finished trial#142 resulted in value: -0.9711650264463491. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 1, 'lgbmclassifier__num_leaves': 861, 'lgbmclassifier__subsample_for_bin': 1572605, 'lgbmclassifier__min_child_samples': 2697, 'lgbmclassifier__reg_alpha': 0.054287352202824074, 'lgbmclassifier__colsample_bytree': 0.3432539906328586, 'lgbmclassifier__learning_rate': 0.06647045082591056}
401
401
400
400
393
393
395
395
400
400
406
406
405
405
400
400


[I 2019-09-11 21:56:33,572] Finished trial#143 resulted in value: -0.9708800896529916. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 2, 'lgbmclassifier__num_leaves': 805, 'lgbmclassifier__subsample_for_bin': 1414258, 'lgbmclassifier__min_child_samples': 8024, 'lgbmclassifier__reg_alpha': 0.010277531419963617, 'lgbmclassifier__colsample_bytree': 0.49640564971367607, 'lgbmclassifier__learning_rate': 0.04841655443225752}
323
323
329
329
322
322
325
325
327
327
328
328
328
328
331
331


[I 2019-09-11 22:35:33,081] Finished trial#144 resulted in value: -0.9599685295151704. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 8, 'lgbmclassifier__num_leaves': 940, 'lgbmclassifier__subsample_for_bin': 1671586, 'lgbmclassifier__min_child_samples': 2337, 'lgbmclassifier__reg_alpha': 0.08901410515515777, 'lgbmclassifier__colsample_bytree': 0.5468470523253443, 'lgbmclassifier__learning_rate': 0.1254150998108348}
105
105
108
108
106
106
115
115
110
110
109
109
103
103
114
114


[I 2019-09-11 23:06:41,316] Finished trial#145 resulted in value: -0.9690077708712459. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 9, 'lgbmclassifier__num_leaves': 912, 'lgbmclassifier__subsample_for_bin': 1156632, 'lgbmclassifier__min_child_samples': 4204, 'lgbmclassifier__reg_alpha': 0.019420360482619876, 'lgbmclassifier__colsample_bytree': 0.2156606263445333, 'lgbmclassifier__learning_rate': 0.07617974907221624}
97
97
91
91
90
90
99
99
93
93
99
99
91
91
93
93


[I 2019-09-11 23:26:22,873] Finished trial#146 resulted in value: -0.9698972990626721. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 1, 'lgbmclassifier__num_leaves': 756, 'lgbmclassifier__subsample_for_bin': 1587653, 'lgbmclassifier__min_child_samples': 6494, 'lgbmclassifier__reg_alpha': 0.2435129624304019, 'lgbmclassifier__colsample_bytree': 0.4210242948962073, 'lgbmclassifier__learning_rate': 0.15535345765379407}
401
401
400
400
393
393
395
395
400
400
406
406
405
405
400
400


[I 2019-09-12 00:08:17,434] Finished trial#147 resulted in value: -0.9687763964233038. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 12, 'lgbmclassifier__num_leaves': 1031, 'lgbmclassifier__subsample_for_bin': 1429760, 'lgbmclassifier__min_child_samples': 9944, 'lgbmclassifier__reg_alpha': 0.0037625265302217424, 'lgbmclassifier__colsample_bytree': 0.35002067306301277, 'lgbmclassifier__learning_rate': 0.044124158900068884}
63
63
61
61
64
64
70
70
67
67
67
67
67
67
63
63


[I 2019-09-12 00:25:17,244] Finished trial#148 resulted in value: -0.9520961824164655. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 2, 'lgbmclassifier__num_leaves': 845, 'lgbmclassifier__subsample_for_bin': 1542763, 'lgbmclassifier__min_child_samples': 2827, 'lgbmclassifier__reg_alpha': 0.0629067494377833, 'lgbmclassifier__colsample_bytree': 0.5669256885726723, 'lgbmclassifier__learning_rate': 0.06522972652589298}
323
323
329
329
322
322
325
325
327
327
328
328
328
328
331
331


[I 2019-09-12 01:22:16,687] Finished trial#149 resulted in value: -0.9649460880791105. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 6, 'lgbmclassifier__num_leaves': 890, 'lgbmclassifier__subsample_for_bin': 1621797, 'lgbmclassifier__min_child_samples': 2253, 'lgbmclassifier__reg_alpha': 0.04763223422927616, 'lgbmclassifier__colsample_bytree': 0.3454543831160964, 'lgbmclassifier__learning_rate': 0.0794624502360359}
143
143
151
151
144
144
144
144
150
150
139
139
144
144
148
148


[I 2019-09-12 01:53:02,376] Finished trial#150 resulted in value: -0.9726618951231533. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 92, 'lgbmclassifier__num_leaves': 917, 'lgbmclassifier__subsample_for_bin': 1626264, 'lgbmclassifier__min_child_samples': 2072, 'lgbmclassifier__reg_alpha': 0.4848529586737055, 'lgbmclassifier__colsample_bytree': 0.4368101671304168, 'lgbmclassifier__learning_rate': 0.16452993308226174}




0


[W 2019-09-12 01:54:29,173] Setting status of trial#151 as TrialState.FAIL because of the following error: ValueError('Found array with 0 feature(s) (shape=(516722, 0)) while a minimum of 1 is required.',)
Traceback (most recent call last):
  File "/Users/piotrgabrys/.pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/optuna/study.py", line 469, in _run_trial
    result = func(trial)
  File "<ipython-input-50-4e1f9a5ed058>", line 24, in objective
    random_state=42)
  File "/Users/piotrgabrys/data/python3/pruned-cv/prunedcv/src.py", line 445, in cross_val_score
    model.fit(x_train, y_train)
  File "/Users/piotrgabrys/.pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/sklearn/pipeline.py", line 356, in fit
    self._final_estimator.fit(Xt, y, **fit_params)
  File "/Users/piotrgabrys/.pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/lightgbm/sklearn.py", line 744, in fit
    callbacks=callbacks)
  File "/Users/piotrgabrys/.pyenv/versions/miniconda3-4.3

{'selectfrommodel__threshold': 98, 'lgbmclassifier__num_leaves': 880, 'lgbmclassifier__subsample_for_bin': 1333472, 'lgbmclassifier__min_child_samples': 2364, 'lgbmclassifier__reg_alpha': 0.04211462199404153, 'lgbmclassifier__colsample_bytree': 0.47474529760832573, 'lgbmclassifier__learning_rate': 0.09207147470193812}




0


[W 2019-09-12 01:55:58,498] Setting status of trial#152 as TrialState.FAIL because of the following error: ValueError('Found array with 0 feature(s) (shape=(516722, 0)) while a minimum of 1 is required.',)
Traceback (most recent call last):
  File "/Users/piotrgabrys/.pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/optuna/study.py", line 469, in _run_trial
    result = func(trial)
  File "<ipython-input-50-4e1f9a5ed058>", line 24, in objective
    random_state=42)
  File "/Users/piotrgabrys/data/python3/pruned-cv/prunedcv/src.py", line 445, in cross_val_score
    model.fit(x_train, y_train)
  File "/Users/piotrgabrys/.pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/sklearn/pipeline.py", line 356, in fit
    self._final_estimator.fit(Xt, y, **fit_params)
  File "/Users/piotrgabrys/.pyenv/versions/miniconda3-4.3.30/lib/python3.6/site-packages/lightgbm/sklearn.py", line 744, in fit
    callbacks=callbacks)
  File "/Users/piotrgabrys/.pyenv/versions/miniconda3-4.3

{'selectfrommodel__threshold': 6, 'lgbmclassifier__num_leaves': 912, 'lgbmclassifier__subsample_for_bin': 1659024, 'lgbmclassifier__min_child_samples': 2216, 'lgbmclassifier__reg_alpha': 0.09273820181539873, 'lgbmclassifier__colsample_bytree': 0.45555002414426643, 'lgbmclassifier__learning_rate': 0.08947709712284115}
143
143
151
151
144
144
144
144
150
150
139
139
144
144
148
148


[I 2019-09-12 02:30:35,342] Finished trial#153 resulted in value: -0.973200161229854. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 6, 'lgbmclassifier__num_leaves': 1119, 'lgbmclassifier__subsample_for_bin': 1652210, 'lgbmclassifier__min_child_samples': 5639, 'lgbmclassifier__reg_alpha': 0.4427967777390985, 'lgbmclassifier__colsample_bytree': 0.44743721122965985, 'lgbmclassifier__learning_rate': 0.09955289157064369}
143
143
151
151
144
144
144
144
150
150
139
139
144
144
148
148


[I 2019-09-12 02:56:52,057] Finished trial#154 resulted in value: -0.9702353449873944. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 9, 'lgbmclassifier__num_leaves': 804, 'lgbmclassifier__subsample_for_bin': 1489493, 'lgbmclassifier__min_child_samples': 1997, 'lgbmclassifier__reg_alpha': 0.1145545010066131, 'lgbmclassifier__colsample_bytree': 0.24931972410110093, 'lgbmclassifier__learning_rate': 0.17578576828574236}
97
97
91
91
90
90
99
99
93
93
99
99
91
91
93
93


[I 2019-09-12 03:19:37,961] Finished trial#155 resulted in value: -0.9718197195266196. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 9, 'lgbmclassifier__num_leaves': 806, 'lgbmclassifier__subsample_for_bin': 1714054, 'lgbmclassifier__min_child_samples': 8167, 'lgbmclassifier__reg_alpha': 0.03216383807997907, 'lgbmclassifier__colsample_bytree': 0.1910961375915295, 'lgbmclassifier__learning_rate': 0.3264543929503196}
97
97
91
91
90
90
99
99
93
93
99
99
91
91
93
93


[I 2019-09-12 03:36:58,582] Finished trial#156 resulted in value: -0.9691585024029197. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 14, 'lgbmclassifier__num_leaves': 756, 'lgbmclassifier__subsample_for_bin': 1505856, 'lgbmclassifier__min_child_samples': 1953, 'lgbmclassifier__reg_alpha': 0.010681133031451977, 'lgbmclassifier__colsample_bytree': 0.25133148467557653, 'lgbmclassifier__learning_rate': 0.14775494566939062}
56
56
52
52
51
51
53
53
58
58
53
53
50
50
46
46


[I 2019-09-12 03:56:45,615] Finished trial#157 resulted in value: -0.9722714464445517. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 14, 'lgbmclassifier__num_leaves': 776, 'lgbmclassifier__subsample_for_bin': 1487592, 'lgbmclassifier__min_child_samples': 5108, 'lgbmclassifier__reg_alpha': 0.011517417341468121, 'lgbmclassifier__colsample_bytree': 0.24786087611808094, 'lgbmclassifier__learning_rate': 0.17613349632668915}
56
56
52
52
51
51
53
53
58
58
53
53
50
50
46
46


[I 2019-09-12 04:13:57,024] Finished trial#158 resulted in value: -0.9716459834178822. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 10, 'lgbmclassifier__num_leaves': 685, 'lgbmclassifier__subsample_for_bin': 1886612, 'lgbmclassifier__min_child_samples': 7115, 'lgbmclassifier__reg_alpha': 0.20195474766244384, 'lgbmclassifier__colsample_bytree': 0.14923825542680466, 'lgbmclassifier__learning_rate': 0.164589655665321}
83
83
77
77
84
84
88
88
83
83
90
90
81
81
84
84


[I 2019-09-12 04:30:31,821] Finished trial#159 resulted in value: -0.9674560728878144. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 7, 'lgbmclassifier__num_leaves': 967, 'lgbmclassifier__subsample_for_bin': 1771996, 'lgbmclassifier__min_child_samples': 1885, 'lgbmclassifier__reg_alpha': 0.040910415422640914, 'lgbmclassifier__colsample_bytree': 0.31952561558754355, 'lgbmclassifier__learning_rate': 0.682450578794043}
120
120


[I 2019-09-12 04:32:30,359] Finished trial#160 resulted in value: -0.8379180379498764. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 13, 'lgbmclassifier__num_leaves': 760, 'lgbmclassifier__subsample_for_bin': 1295869, 'lgbmclassifier__min_child_samples': 5392, 'lgbmclassifier__reg_alpha': 0.011220609021468996, 'lgbmclassifier__colsample_bytree': 0.2536088239521666, 'lgbmclassifier__learning_rate': 0.17319033364723016}
59
59
58
58
58
58
58
58
62
62
59
59
59
59
55
55


[I 2019-09-12 04:49:43,631] Finished trial#161 resulted in value: -0.9702651774983981. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 17, 'lgbmclassifier__num_leaves': 894, 'lgbmclassifier__subsample_for_bin': 1483031, 'lgbmclassifier__min_child_samples': 4305, 'lgbmclassifier__reg_alpha': 0.0186174274197823, 'lgbmclassifier__colsample_bytree': 0.2187725126298745, 'lgbmclassifier__learning_rate': 0.12406992023237738}
42
42
41
41
44
44
42
42
44
44
44
44
44
44
42
42


[I 2019-09-12 05:06:19,670] Finished trial#162 resulted in value: -0.9693872830958297. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 15, 'lgbmclassifier__num_leaves': 721, 'lgbmclassifier__subsample_for_bin': 1518132, 'lgbmclassifier__min_child_samples': 9519, 'lgbmclassifier__reg_alpha': 0.5974215376491796, 'lgbmclassifier__colsample_bytree': 0.37253803578797645, 'lgbmclassifier__learning_rate': 0.3061899326570399}
52
52
47
47
47
47
48
48
52
52
50
50
46
46
43
43


[I 2019-09-12 05:22:26,774] Finished trial#163 resulted in value: -0.9681940681149601. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 15, 'lgbmclassifier__num_leaves': 832, 'lgbmclassifier__subsample_for_bin': 1677971, 'lgbmclassifier__min_child_samples': 1816, 'lgbmclassifier__reg_alpha': 0.11958464519651738, 'lgbmclassifier__colsample_bytree': 0.09826618251351119, 'lgbmclassifier__learning_rate': 0.2229967198008842}
52
52
47
47
47
47
48
48
52
52
50
50
46
46
43
43


[I 2019-09-12 05:39:08,970] Finished trial#164 resulted in value: -0.971919266470605. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 792, 'lgbmclassifier__subsample_for_bin': 1826911, 'lgbmclassifier__min_child_samples': 1598, 'lgbmclassifier__reg_alpha': 0.13747275643345883, 'lgbmclassifier__colsample_bytree': 0.06196048103857655, 'lgbmclassifier__learning_rate': 0.042641562492928636}
74
74
66
66
73
73
80
80
73
73
79
79
73
73
72
72


[I 2019-09-12 05:56:17,319] Finished trial#165 resulted in value: -0.962918016610256. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 5, 'lgbmclassifier__num_leaves': 726, 'lgbmclassifier__subsample_for_bin': 1630013, 'lgbmclassifier__min_child_samples': 7086, 'lgbmclassifier__reg_alpha': 0.3204189829343458, 'lgbmclassifier__colsample_bytree': 0.12223682828891896, 'lgbmclassifier__learning_rate': 0.22756311260014042}
177
177
169
169
172
172
175
175
179
179
177
177
168
168
180
180


[I 2019-09-12 06:17:39,977] Finished trial#166 resulted in value: -0.9669141795680541. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 9, 'lgbmclassifier__num_leaves': 932, 'lgbmclassifier__subsample_for_bin': 1715983, 'lgbmclassifier__min_child_samples': 2123, 'lgbmclassifier__reg_alpha': 0.12373484145560895, 'lgbmclassifier__colsample_bytree': 0.4194922058485455, 'lgbmclassifier__learning_rate': 0.11362452870855498}
97
97
91
91
90
90
99
99
93
93
99
99
91
91
93
93


[I 2019-09-12 06:45:07,585] Finished trial#167 resulted in value: -0.9734610021767773. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 8, 'lgbmclassifier__num_leaves': 958, 'lgbmclassifier__subsample_for_bin': 1718383, 'lgbmclassifier__min_child_samples': 4655, 'lgbmclassifier__reg_alpha': 0.21540785375019736, 'lgbmclassifier__colsample_bytree': 0.31807977316922204, 'lgbmclassifier__learning_rate': 8.145501879041405e-06}
105
105
108
108


[I 2019-09-12 06:50:13,560] Finished trial#168 resulted in value: -0.8741745503381328. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


{'selectfrommodel__threshold': 15, 'lgbmclassifier__num_leaves': 939, 'lgbmclassifier__subsample_for_bin': 1911999, 'lgbmclassifier__min_child_samples': 72, 'lgbmclassifier__reg_alpha': 0.11805833543438835, 'lgbmclassifier__colsample_bytree': 0.09538171371383206, 'lgbmclassifier__learning_rate': 0.10893461260466755}
52
52
47
47
47
47
48
48
52
52
50
50
46
46
43
43


[I 2019-09-12 07:09:38,528] Finished trial#169 resulted in value: -0.973367222089144. Current best value is -0.9737159592210338 with parameters: {'selectfrommodel__threshold': 11, 'lgbmclassifier__num_leaves': 330, 'lgbmclassifier__subsample_for_bin': 2077193, 'lgbmclassifier__min_child_samples': 2227, 'lgbmclassifier__reg_alpha': 0.16758905622425835, 'lgbmclassifier__colsample_bytree': 0.49030006727392056, 'lgbmclassifier__learning_rate': 0.07916040470631734}.


In [57]:
model.set_params(**best_params)

cross_val_score_auc(model,
                    X_train,
                    y_train,
                    n_fold=N_FOLD,
                    stratify=True,
                    shuffle=True,
                    random_state=42,
                    predict=True,
                    X_test=X_test,
                    submission=sample_submission)

HBox(children=(IntProgress(value=0, max=8), HTML(value='')))

ROC accuracy: 0.9707565062294428, Train: 0.9999416292415686
ROC accuracy: 0.9758652343514882, Train: 0.9998818960438143
ROC accuracy: 0.9747893539459415, Train: 0.9999033992474002
ROC accuracy: 0.9741729952670382, Train: 0.999944229888998
ROC accuracy: 0.9735064735460197, Train: 0.9999515715657177
ROC accuracy: 0.9728703535857148, Train: 0.9999501665218518
ROC accuracy: 0.9746020273044912, Train: 0.9999374155994768
ROC accuracy: 0.973164729538134, Train: 0.9999402925194638




0.9737159592210338

In [None]:
# ROC accuracy: 0.9707565062294428, Train: 0.9999416292415686
# ROC accuracy: 0.9758652343514882, Train: 0.9998818960438143
# ROC accuracy: 0.9747893539459415, Train: 0.9999033992474002
# ROC accuracy: 0.9741729952670382, Train: 0.999944229888998
# ROC accuracy: 0.9735064735460197, Train: 0.9999515715657177
# ROC accuracy: 0.9728703535857148, Train: 0.9999501665218518
# ROC accuracy: 0.9746020273044912, Train: 0.9999374155994768
# ROC accuracy: 0.973164729538134, Train: 0.9999402925194638


# 0.9737159592210338