In [2]:
import numpy as np
from glob import glob
import torch
import pandas as pd
import random
from tqdm import tqdm

In [3]:
from sklearn.model_selection import train_test_split
import lightgbm as lgb
import sklearn.metrics as metrics
from sklearn.model_selection import KFold
import numpy as np
from bayes_opt import BayesianOptimization

In [4]:
data_path = './train'
train_pkl_files = glob(data_path+'/*.pkl')

In [5]:
valid_path = './test_A'
valid_pkl_files = glob(valid_path+'/*.pkl')

In [6]:
def load_data(file_name, is_train=True):
    data, label = [], []
    if is_train:
        for pkl in file_name:
            item = torch.load(pkl)
            data.append(item[0][:7][-1])
            label.append(int(item[1]['label'][0]))
    else:
        for pkl in file_name:
            item = torch.load(pkl)
            data.append(item[0][:7][-1])
    data = pd.DataFrame(data, columns=['volt','current','soc','max_single_volt','min_single_volt','max_temp','min_temp','time_stamp'])
    data.drop('time_stamp', axis=1, inplace=True)
    label = pd.Series(label)
    data_mean = np.mean(data, axis=0)
    data_std = np.std(data, axis=0)
    data = (data - data_mean) / (data_std + 1e-5)
    return data, label

In [7]:
train_data, train_label = load_data(train_pkl_files)
valid_data, _ = load_data(valid_pkl_files, is_train=False)

  label = pd.Series(label)


In [8]:
X, X_test,y, y_test = train_test_split(train_data, train_label, test_size=0.2, random_state=42)

In [9]:
print(X.shape)
print(y.shape)

(22711, 7)
(22711,)


In [13]:
def LGB_CV(learning_rate,
           max_depth,
          num_leaves,
          min_data_in_leaf,
          feature_fraction,
          bagging_fraction,
          lambda_l1):
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    f = np.zeros(X.shape[0])
    for index, (train_index, val_index) in enumerate(kf.split(X, y)):
        print("fold--{}".format(index))
        train_data = lgb.Dataset(X.iloc[train_index], label=y.iloc[train_index])
        val_data = lgb.Dataset(X.iloc[val_index], label=y.iloc[val_index])
    
        params = {'num_leaves': int(num_leaves),
                'min_data_in_leaf': int(min_data_in_leaf), 
                'objective':'binary',
                'max_depth': int(max_depth),
                'learning_rate': learning_rate,
                "boosting": "gbdt",
                "feature_fraction": feature_fraction,
                "bagging_fraction": bagging_fraction ,
                "metric": 'auc',
                "lambda_l1": lambda_l1,
                }
        model = lgb.train(params, train_data, valid_sets=[train_data, val_data],num_boost_round=300, verbose_eval=500,early_stopping_rounds = 30)
        f[val_index] = model.predict(X.iloc[val_index], num_iteration=model.best_iteration)
        del model, train_index, val_index
    f = np.array([1 if i>0.5 else 0 for i in f])
    return metrics.f1_score(f, y)
LGB_CV(   learning_rate=0.01,
          max_depth=5,
          num_leaves=32,
          min_data_in_leaf=15,
          feature_fraction=0.8,
          bagging_fraction=0.8,
          lambda_l1=0.1
         )
lgb_ba = BayesianOptimization(LGB_CV, {"learning_rate":(0.01, 0.5),
                                       "max_depth":(1,30),
                                       "num_leaves":(10,200),
                                       "min_data_in_leaf":(5,30),
                                       "feature_fraction":(0.1,1),
                                       "bagging_fraction":(0.3,1),
                                       "lambda_l1":(0,6)})
lgb_ba.maximize()

fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds






Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.938364	valid_1's auc: 0.931495
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds




Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.938066	valid_1's auc: 0.928444
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Start training from score -1.624239
Training until validation scores don't improve for 30 rounds


Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.939403	valid_1's auc: 0.923217
fold--3


[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start training from score -1.640739
Training until validation scores don't improve for 30 rounds




Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.939668	valid_1's auc: 0.929427
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Start training from score -1.611090
Training until validation scores don't improve for 30 rounds




Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.937716	valid_1's auc: 0.93216
|   iter    |  target   | baggin... | featur... | lambda_l1 | learni... | max_depth | min_da... | num_le... |
-------------------------------------------------------------------------------------------------------------
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[111]	training's auc: 0.999981	valid_1's auc: 0.979776
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds


Early stopping, best iteration is:
[50]	training's auc: 0.99992	valid_1's auc: 0.978419
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Start training from score -1.624239
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[70]	training's auc: 0.999955	valid_1's auc: 0.980988
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start 

Early stopping, best iteration is:
[117]	training's auc: 0.999985	valid_1's auc: 0.980044
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Start training from score -1.611090
Training until validation scores don't improve for 30 rounds


Early stopping, best iteration is:
[83]	training's auc: 0.999975	valid_1's auc: 0.982323
| [0m 1       [0m | [0m 0.8519  [0m | [0m 0.9887  [0m | [0m 0.7334  [0m | [0m 1.325   [0m | [0m 0.4204  [0m | [0m 18.05   [0m | [0m 26.35   [0m | [0m 126.2   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[162]	training's auc: 0.991157	valid_1's auc: 0.974763
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[174]	training's auc: 0.99225	valid_1's auc: 0.972692
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromSco

[LightGBM] [Info] Start training from score -1.624239
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[155]	training's auc: 0.990981	valid_1's auc: 0.971179
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start training from score -1.640739
Training until validation scores don't improve for 30 rounds


Early stopping, best iteration is:
[144]	training's auc: 0.991305	valid_1's auc: 0.971238
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Start training from score -1.611090
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[141]	training's auc: 0.990607	valid_1's auc: 0.974683
| [0m 2       [0m | [0m 0.818   [0m | [0m 0.3229  [0m | [0m 0.3592  [0m | [0m 3.727   [0m | [0m 0.4252  [0m | [0m 20.12   [0m | [0m 21.08   [0m | [0m 19.31   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `for





Did not meet early stopping. Best iteration is:
[299]	training's auc: 0.918011	valid_1's auc: 0.905216
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds






Did not meet early stopping. Best iteration is:
[299]	training's auc: 0.916614	valid_1's auc: 0.904326
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Start training from score -1.624239
Training until validation scores don't improve for 30 rounds


Did not meet early stopping. Best iteration is:
[299]	training's auc: 0.918128	valid_1's auc: 0.897444
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start training from score -1.640739
Training until validation scores don't improve for 30 rounds


Early stopping, best iteration is:
[145]	training's auc: 0.912615	valid_1's auc: 0.900228
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Start training from score -1.611090
Training until validation scores don't improve for 30 rounds




Did not meet early stopping. Best iteration is:
[299]	training's auc: 0.917404	valid_1's auc: 0.902441


| [0m 3       [0m | [0m 0.5478  [0m | [0m 0.4317  [0m | [0m 0.1174  [0m | [0m 4.502   [0m | [0m 0.2189  [0m | [0m 12.53   [0m | [0m 27.42   [0m | [0m 43.83   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[171]	training's auc: 0.999764	valid_1's auc: 0.981364
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds


Early stopping, best iteration is:
[181]	training's auc: 0.999837	valid_1's auc: 0.979291
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178


You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Start training from score -1.624239
Training until validation scores don't improve for 30 rounds


Early stopping, best iteration is:
[212]	training's auc: 0.999879	valid_1's auc: 0.979934
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start training from score -1.640739
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[196]	training's auc: 0.999862	valid_1's auc: 0.979138
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Start training from score -1.611090
Training until validation scores don't improve for 30 rounds


Early stopping, best iteration is:
[189]	training's auc: 0.999819	valid_1's auc: 0.982602
| [95m 4       [0m | [95m 0.8551  [0m | [95m 0.9195  [0m | [95m 0.8562  [0m | [95m 1.561   [0m | [95m 0.4163  [0m | [95m 6.753   [0m | [95m 16.95   [0m | [95m 48.12   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds






Did not meet early stopping. Best iteration is:
[299]	training's auc: 0.92624	valid_1's auc: 0.900441
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds


Early stopping, best iteration is:
[151]	training's auc: 0.918714	valid_1's auc: 0.897493
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Start training from score -1.624239
Training until validation scores don't improve for 30 rounds


Did not meet early stopping. Best iteration is:
[299]	training's auc: 0.926869	valid_1's auc: 0.890826
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start training from score -1.640739
Training until validation scores don't improve for 30 rounds




Did not meet early stopping. Best iteration is:
[299]	training's auc: 0.926374	valid_1's auc: 0.89956
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Start training from score -1.611090
Training until validation scores don't improve for 30 rounds


Early stopping, best iteration is:
[220]	training's auc: 0.923446	valid_1's auc: 0.895461
| [0m 5       [0m | [0m 0.5404  [0m | [0m 0.6258  [0m | [0m 0.1474  [0m | [0m 0.8929  [0m | [0m 0.1222  [0m | [0m 22.17   [0m | [0m 12.02   [0m | [0m 71.72   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153




You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[240]	training's auc: 0.999213	valid_1's auc: 0.977579
fold--1


[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[203]	training's auc: 0.999179	valid_1's auc: 0.973893
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Start training from score -1.624239
T

Early stopping, best iteration is:
[206]	training's auc: 0.998892	valid_1's auc: 0.975144
fold--3


[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start training from score -1.640739
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[164]	training's auc: 0.998393	valid_1's auc: 0.9749
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7


[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Start training from score -1.611090
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[187]	training's auc: 0.998777	valid_1's auc: 0.980405
| [0m 6       [0m | [0m 0.842   [0m | [0m 0.6684  [0m | [0m 0.3736  [0m | [0m 1.929   [0m | [0m 0.4823  [0m | [0m 6.097   [0m | [0m 8.654   [0m | [0m 34.63   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds




Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.991493	valid_1's auc: 0.976629
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds


Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.991685	valid_1's auc: 0.974638
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Start training from score -1.624239
Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[294]	training's auc: 0.991087	valid_1's auc: 0.974172
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.64

Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.991508	valid_1's auc: 0.973806
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Start training from score -1.611090
Training until validation scores don't improve for 30 rounds


Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.991135	valid_1's auc: 0.977688
| [0m 7       [0m | [0m 0.8239  [0m | [0m 0.9882  [0m | [0m 0.8546  [0m | [0m 5.098   [0m | [0m 0.06745 [0m | [0m 14.79   [0m | [0m 29.81   [0m | [0m 56.29   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[124]	training's auc: 0.993109	valid_1's auc: 0.974307
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[100]	training's auc: 0.992418	valid_1's auc: 0.972966
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Sta

Early stopping, best iteration is:
[137]	training's auc: 0.993497	valid_1's auc: 0.972542
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start training from score -1.640739
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[122]	training's auc: 0.992492	valid_1's auc: 0.970901
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Sta

Early stopping, best iteration is:
[127]	training's auc: 0.993122	valid_1's auc: 0.975747
| [0m 8       [0m | [0m 0.8263  [0m | [0m 0.9831  [0m | [0m 0.442   [0m | [0m 3.918   [0m | [0m 0.4554  [0m | [0m 14.11   [0m | [0m 9.411   [0m | [0m 29.33   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds




Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.994563	valid_1's auc: 0.978645
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.995116	valid_1's auc: 0.976406
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.62



Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.995621	valid_1's auc: 0.975344
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds


Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.995738	valid_1's auc: 0.973011
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Start training from score -1.624239
Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.995408	valid_1's auc: 0.972328
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] 

Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.995736	valid_1's auc: 0.970185
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Start training from score -1.611090
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[166]	training's auc: 0.979433	valid_1's auc: 0.964105
| [0m 10      [0m | [0m 0.8168  [0m | [0m 1.0     [0m | [0m 1.0     [0m | [0m 0.04976 [0m | [0m 0.4743  [0m | [0m 3.788   [0m | [0m 7.496   [0m | [0m 47.08   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 109



Did not meet early stopping. Best iteration is:
[299]	training's auc: 0.901247	valid_1's auc: 0.89626
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[297]	training's auc: 0.901245	valid_1's auc: 0.895141
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624



Early stopping, best iteration is:
[144]	training's auc: 0.999979	valid_1's auc: 0.981719
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[141]	training's auc: 0.999977	valid_1's auc: 0.978127
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromSc

Early stopping, best iteration is:
[149]	training's auc: 0.999977	valid_1's auc: 0.979553
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start training from score -1.640739
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[200]	training's auc: 0.999987	valid_1's auc: 0.980306
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090


[LightGBM] [Info] Start training from score -1.611090
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[148]	training's auc: 0.999978	valid_1's auc: 0.9833
| [95m 12      [0m | [95m 0.8608  [0m | [95m 1.0     [0m | [95m 1.0     [0m | [95m 1.346   [0m | [95m 0.5     [0m | [95m 8.661   [0m | [95m 12.55   [0m | [95m 42.08   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	training's auc: 0.861896	valid_1's auc: 0.86243
fold--1
[LightGBM] [Info] Number of positive: 2980, number of 



Early stopping, best iteration is:
[9]	training's auc: 0.86045	valid_1's auc: 0.855195
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Start training from score -1.624239
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	training's auc: 0.862732	valid_1's auc: 0.851129
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start tr

Early stopping, best iteration is:
[9]	training's auc: 0.859153	valid_1's auc: 0.856111
| [0m 13      [0m | [0m 0.0     [0m | [0m 1.0     [0m | [0m 0.1     [0m | [0m 6.0     [0m | [0m 0.01    [0m | [0m 12.63   [0m | [0m 9.826   [0m | [0m 48.25   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds




Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.995986	valid_1's auc: 0.976673
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.995504	valid_1's auc: 0.972764
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.62

Did not meet early stopping. Best iteration is:
[299]	training's auc: 0.995349	valid_1's auc: 0.971335
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start training from score -1.640739
Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.996181	valid_1's auc: 0.972888
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] 

Early stopping, best iteration is:
[170]	training's auc: 0.982718	valid_1's auc: 0.968176
| [0m 14      [0m | [0m 0.8197  [0m | [0m 0.8994  [0m | [0m 1.0     [0m | [0m 0.0     [0m | [0m 0.5     [0m | [0m 3.806   [0m | [0m 14.01   [0m | [0m 43.92   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[203]	training's auc: 1	valid_1's auc: 0.980042
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[73]	training's auc: 0.997582	valid_1's auc: 0.975406
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Start train



Early stopping, best iteration is:
[268]	training's auc: 0.935144	valid_1's auc: 0.903707
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[122]	training's auc: 0.92748	valid_1's auc: 0.901103
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, n



Early stopping, best iteration is:
[104]	training's auc: 0.991309	valid_1's auc: 0.976115
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds


Early stopping, best iteration is:
[105]	training's auc: 0.991341	valid_1's auc: 0.972607
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Start training from score -1.624239
Training until validation scores don't improve for 30 rounds


Early stopping, best iteration is:
[102]	training's auc: 0.990626	valid_1's auc: 0.972132
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start training from score -1.640739
Training until validation scores don't improve for 30 rounds


Early stopping, best iteration is:
[112]	training's auc: 0.991436	valid_1's auc: 0.973302
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Start training from score -1.611090
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[67]	training's auc: 0.986497	valid_1's auc: 0.973311
| [0m 17      [0m | [0m 0.8211  [0m | [0m 0.3     [0m | [0m 1.0     [0m | [0m 5.112   [0m | [0m 0.5     [0m | [0m 5.0     [0m | [0m 13.8    [0m | [0m 38.23   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds




Did not meet early stopping. Best iteration is:
[299]	training's auc: 0.963786	valid_1's auc: 0.952716
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.961428	valid_1's auc: 0.952793
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.62



Early stopping, best iteration is:
[96]	training's auc: 0.996849	valid_1's auc: 0.978727
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[187]	training's auc: 1	valid_1's auc: 0.979185
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pa



[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[114]	training's auc: 0.997569	valid_1's auc: 0.978639
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Earl

Early stopping, best iteration is:
[96]	training's auc: 0.996735	valid_1's auc: 0.975907
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start training from score -1.640739
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[123]	training's auc: 0.997698	valid_1's auc: 0.976987
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Star

Early stopping, best iteration is:
[132]	training's auc: 0.997964	valid_1's auc: 0.981987
| [0m 20      [0m | [0m 0.8428  [0m | [0m 0.3     [0m | [0m 1.0     [0m | [0m 3.207   [0m | [0m 0.4834  [0m | [0m 21.23   [0m | [0m 14.22   [0m | [0m 30.77   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[61]	training's auc: 0.992413	valid_1's auc: 0.967795
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [



Early stopping, best iteration is:
[93]	training's auc: 0.998074	valid_1's auc: 0.974316
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Start training from score -1.624239
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[179]	training's auc: 1	valid_1's auc: 0.975961
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start train



[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.990313	valid_1's auc: 0.975015
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 3



Early stopping, best iteration is:
[9]	training's auc: 0.86719	valid_1's auc: 0.852403
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start training from score -1.640739
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[9]	training's auc: 0.867412	valid_1's auc: 0.857032
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]:



Early stopping, best iteration is:
[71]	training's auc: 0.988457	valid_1's auc: 0.970613
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Start training from score -1.624239
Training until validation scores don't improve for 30 rounds


Early stopping, best iteration is:
[65]	training's auc: 0.987939	valid_1's auc: 0.96934
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start training from score -1.640739
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[69]	training's auc: 0.98823	valid_1's auc: 0.970154
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090


[LightGBM] [Info] Start training from score -1.611090
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[81]	training's auc: 0.98897	valid_1's auc: 0.974496
| [0m 24      [0m | [0m 0.8121  [0m | [0m 0.3     [0m | [0m 1.0     [0m | [0m 6.0     [0m | [0m 0.5     [0m | [0m 18.25   [0m | [0m 15.48   [0m | [0m 24.92   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[122]	training's auc: 0.989199	valid_1's auc: 0.975049
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[101]	training's auc: 0.989041	valid_1's auc: 0.973387
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromSc

Early stopping, best iteration is:
[104]	training's auc: 0.988748	valid_1's auc: 0.971154
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start training from score -1.640739
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[101]	training's auc: 0.988447	valid_1's auc: 0.971205
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Sta

Early stopping, best iteration is:
[128]	training's auc: 0.989761	valid_1's auc: 0.975329
| [0m 25      [0m | [0m 0.8214  [0m | [0m 0.5679  [0m | [0m 0.9517  [0m | [0m 5.844   [0m | [0m 0.2452  [0m | [0m 14.17   [0m | [0m 15.47   [0m | [0m 32.81   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[67]	training's auc: 0.987094	valid_1's auc: 0.973979
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [



Early stopping, best iteration is:
[86]	training's auc: 0.989874	valid_1's auc: 0.971764
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.624239
[LightGBM] [Info] Start training from score -1.624239
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[86]	training's auc: 0.989556	valid_1's auc: 0.972041
fold--3
[LightGBM] [Info] Number of positive: 2950, number of negative: 15219
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.162364 -> initscore=-1.640739
[LightGBM] [Info] Start

Early stopping, best iteration is:
[82]	training's auc: 0.989356	valid_1's auc: 0.971058
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Start training from score -1.611090
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[72]	training's auc: 0.988456	valid_1's auc: 0.97428
| [0m 26      [0m | [0m 0.8185  [0m | [0m 0.3     [0m | [0m 1.0     [0m | [0m 6.0     [0m | [0m 0.5     [0m | [0m 18.6    [0m | [0m 5.0     [0m | [0m 23.88   [0m |
fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [In



Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.959549	valid_1's auc: 0.949034
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.959459	valid_1's auc: 0.946895
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] 



[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[251]	training's auc: 1	valid_1's auc: 0.982169
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Early stopp



[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[69]	training's auc: 0.988438	valid_1's auc: 0.973378
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Early

Early stopping, best iteration is:
[70]	training's auc: 0.988651	valid_1's auc: 0.970555
fold--4
[LightGBM] [Info] Number of positive: 3024, number of negative: 15145
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1092
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.166437 -> initscore=-1.611090
[LightGBM] [Info] Start training from score -1.611090
Training until validation scores don't improve for 30 rounds
Early stopping, best iteration is:
[60]	training's auc: 0.988131	valid_1's auc: 0.975057
| [0m 29      [0m | [0m 0.814   [0m | [0m 0.3     [0m | [0m 1.0     [0m | [0m 6.0     [0m | [0m 0.5     [0m | [0m 22.83   [0m | [0m 25.69   [0m | [0m 27.05   [0m |


fold--0
[LightGBM] [Info] Number of positive: 3015, number of negative: 15153
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1090
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165951 -> initscore=-1.614599
[LightGBM] [Info] Start training from score -1.614599
Training until validation scores don't improve for 30 rounds




Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.998883	valid_1's auc: 0.979083
fold--1
[LightGBM] [Info] Number of positive: 2980, number of negative: 15189
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164016 -> initscore=-1.628648
[LightGBM] [Info] Start training from score -1.628648
Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[300]	training's auc: 0.998816	valid_1's auc: 0.976891
fold--2
[LightGBM] [Info] Number of positive: 2991, number of negative: 15178
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1093
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164621 -> initscore=-1.62

In [14]:
lgb_ba.max["params"]

{'bagging_fraction': 0.3,
 'feature_fraction': 1.0,
 'lambda_l1': 0.5081360687479299,
 'learning_rate': 0.5,
 'max_depth': 18.654350029907974,
 'min_data_in_leaf': 21.754764589661075,
 'num_leaves': 29.26985390389911}

In [15]:
kf = KFold(n_splits=5,shuffle=True)
f = np.zeros(X.shape[0])
prediction = np.zeros(X_test.shape[0])
for index, (train_index, val_index) in enumerate(kf.split(X)):
    print("fold--{}".format(index))
    train_data = lgb.Dataset(X.iloc[train_index], label=y.iloc[train_index])
    val_data = lgb.Dataset(X.iloc[val_index], label=y.iloc[val_index])

    params = { 'bagging_fraction': 0.31,
              'feature_fraction': 1.0,
              'lambda_l1': 0.5,
              'learning_rate': 0.5,
              'max_depth': 19,
              'min_data_in_leaf': 21,
              'num_leaves': 29,
              'objective':'binary',
              'boosting': 'gbdt',
              'metric': 'auc',
            }
    model = lgb.train(params, train_data, valid_sets=[train_data, val_data],num_boost_round=300, verbose_eval=500,early_stopping_rounds=30)
    f[val_index] = model.predict(X.iloc[val_index], num_iteration=model.best_iteration)
    prediction +=model.predict(X_test, num_iteration=model.best_iteration)/kf.n_splits
metrics.f1_score(np.array([1 if i>0.5 else 0 for i in prediction]), y_test)

fold--0
[LightGBM] [Info] Number of positive: 3014, number of negative: 15154
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1088
[LightGBM] [Info] Number of data points in the train set: 18168, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.165896 -> initscore=-1.614996
[LightGBM] [Info] Start training from score -1.614996
Training until validation scores don't improve for 30 rounds




Early stopping, best iteration is:
[211]	training's auc: 1	valid_1's auc: 0.977239
fold--1
[LightGBM] [Info] Number of positive: 2986, number of negative: 15183
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1088
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.164346 -> initscore=-1.626242
[LightGBM] [Info] Start training from score -1.626242
Training until validation scores don't improve for 30 rounds
Did not meet early stopping. Best iteration is:
[157]	training's auc: 1	valid_1's auc: 0.978017
fold--2
[LightGBM] [Info] Number of positive: 2928, number of negative: 15241
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1089
[LightGBM] [Info] Number of data points in the train set: 18169, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.161154 -> initscore=-1.649670
[LightGBM] [Info] Star

0.8532423208191127

In [16]:
prediction_A =model.predict_proba(valid_data, num_iteration=model.best_iteration)

In [17]:
prediction_A

array([1.12805795e-03, 5.42077038e-08, 1.45799376e-03, ...,
       6.37962132e-03, 6.35651168e-08, 4.76412343e-07])

In [21]:
pred_series = pd.Series(prediction_A)

In [24]:
pred_series.count()

6234

In [34]:
predict_result={}
for i in tqdm(range(len(valid_pkl_files))):
    file=valid_pkl_files[i]
    name=file.split('\\')[-1]
    predict_result[name]=prediction_A[i]
predict_score=pd.DataFrame(list(predict_result.items()),columns=['file_name','score'])#列名必须为这俩个
predict_score

100%|█████████████████████████████████████████████████████████████████████████| 6234/6234 [00:00<00:00, 1557870.06it/s]


Unnamed: 0,file_name,score
0,0.pkl,1.128058e-03
1,1.pkl,5.420770e-08
2,10.pkl,1.457994e-03
3,10001.pkl,3.489215e-04
4,10002.pkl,2.120853e-05
...,...,...
6229,9994.pkl,9.788960e-01
6230,9995.pkl,1.372199e-05
6231,9996.pkl,6.379621e-03
6232,9998.pkl,6.356512e-08


In [33]:
predict_score.to_csv('submision.csv',index = False)