In [55]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
import gc
gc.enable()

from imblearn.combine import SMOTEENN
sme = SMOTEENN(random_state=2020, n_jobs=-1)
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=2020)

In [2]:
train_set = pd.read_csv('./Processed/extracted_train.csv')
test_set = pd.read_csv('./Processed/extracted_test.csv')

In [3]:
X = train_set.loc[:, train_set.columns!='isDefault'].copy()
y = train_set['isDefault'].copy()

## 定义辅助模型训练函数

In [57]:
def training_helper(clf, train_x, train_y, test_x, cv2fit=True, resample=False, pred=False) -> pd.DataFrame:
    """
    :params clf:
            the classified estimator.
    :params train_x:
            the feature variables of the initial training set.
    :params train_y:
            the target variables of the initial training set.
    :params test_x:
            the test set which needs to predict.
    :params cv2fit: 
            if equals to True, will use the cross validation method to train the model.Else, 
            will only split one training set and one valid set , to train.
    :params resample: 
            if equals to 'NN', will use SMOTEENN to resample the dataset which after KFold splits.
            if equals to 'random', will use RandomOverSampler to resample the dataset which after KFold splits.
            Else will not take the method.
    :params pred: 
            if equals to False, return  None.Else, return train_result and test_result.
            
    returns:
            train_result and test_result, or None. see also pred[function parameter].
    """
    train_result = np.zeros(train_x.shape[0])
    test_result = np.zeros(test_x.shape[0]) # m * 1 matrix， 即tmp_test * constant_matrix = test_result
    
    
    if cv2fit:
        cv_scores = []
        folds = 5
        tmp_test = np.zeros((test_x.shape[0], folds))  # m * 5 dot 5 * 1 
        constant_matrix = np.ones((folds, 1)) * (1 / folds)    # 用来执行test_x预测结果的投票点乘
        
        kf = KFold(n_splits=folds, shuffle=True, random_state=2020)

        for i, (train_index, valid_index) in enumerate(kf.split(train_x, train_y)):
            print('****************************** {} ******************************'.format(str(i+1)))
            trn_x, trn_y, val_x, val_y = train_x.iloc[list(train_index), :], train_y[list(train_index)], \
                                         train_x.iloc[list(valid_index), :],train_y[list(valid_index)]
            
            if resample == 'NN':
                trn_x_resampled, trn_y_resampled = sme.fit_resample(trn_x, trn_y)
                clf.fit(trn_x_resampled.values, trn_y_resampled.values)
            elif resample == 'random':
                trn_x_resampled, trn_y_resampled = ros.fit_resample(trn_x, trn_y)
                clf.fit(trn_x_resampled.values, trn_y_resampled.values)
                
            else:
                clf.fit(trn_x.values, trn_y.values)
                
            trn_pred = clf.predict(trn_x.values)
            val_pred = clf.predict(val_x.values)
            test_pred = clf.predict(test_x.values)

            train_result[valid_index] = val_pred
            tmp_test[:, i] = test_pred

            cv_scores.append(roc_auc_score(val_y, val_pred))
            
            print(f'train auc score:  {roc_auc_score(trn_y, trn_pred)}')
            print(f'valid auc score:  {roc_auc_score(val_y, val_pred)}')
            
        test_result = np.dot(tmp_test, constant_matrix)
        
        print("%s score_list:" % type(clf).__name__, cv_scores)
        print("%s score_mean:" % type(clf).__name__, np.mean(cv_scores))
        print("%s score_std:" % type(clf).__name__, np.std(cv_scores))

    else:
        trn_x, val_x, trn_y, val_y = train_test_split(train_x, train_y, test_size=0.2, random_state=0)

        clf.fit(trn_x, trn_y)
        
        trn_pred = clf.predict(trn_x.values)
        val_pred = clf.predict(val_x)
        test_pred = clf.predict_proba(test_x)

        test_result = test_pred
        
        print(f'train auc score:  {roc_auc_score(trn_y, trn_pred)}')
        print(f'valid auc score:  {roc_auc_score(val_y, val_pred)}')
    

    if pred:
        return train_result, test_result
    else:
        return None


## 使用单模型预测

In [35]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV

决策树来一波，为了防止决策树过拟合，需要对树深和最小分裂样本等参数作出限制

In [50]:
dtc_params = {
    'max_depth':30,
    'min_samples_split':15,
    'min_samples_leaf':15,
    'random_state':2020,
}
dtc = DecisionTreeClassifier(**dtc_params)

In [59]:
training_helper(dtc, X, y, test_set, resample='NN')

****************************** 1 ******************************
train auc score:  0.6598577719644261
valid auc score:  0.5817139365847933
****************************** 2 ******************************
train auc score:  0.6601865461572318
valid auc score:  0.5828762102100045
****************************** 3 ******************************
train auc score:  0.6600079495126154
valid auc score:  0.5813374085687169
****************************** 4 ******************************
train auc score:  0.6586497698009847
valid auc score:  0.5854802433881956
****************************** 5 ******************************
train auc score:  0.6588655370839277
valid auc score:  0.5803985157648173
DecisionTreeClassifier score_list: [0.5817139365847933, 0.5828762102100045, 0.5813374085687169, 0.5854802433881956, 0.5803985157648173]
DecisionTreeClassifier score_mean: 0.5823612629033055
DecisionTreeClassifier score_std: 0.0017500225932564435


In [51]:
training_helper(dtc, X, y, test_set, cv2fit=False)

train auc score:  0.6864871429918792
valid auc score:  0.5593567192954606


## 使用集成方法建模

bagging方法典范 -- 随机森林

In [19]:
from sklearn.ensemble import RandomForestClassifier

In [52]:
rfc_params = {
    'n_estimators':150,
    'n_jobs':-1, 
    'max_depth':20,
    'min_samples_split':15,
    'min_samples_leaf':15,
    'random_state':2020
}
rfc = RandomForestClassifier(**rfc_params)

In [53]:
training_helper(rfc, X, y, test_set, resample='NN')

****************************** 1 ******************************
train auc score:  0.6282342887014193
valid auc score:  0.598868331533091
****************************** 2 ******************************
train auc score:  0.6270564994037602
valid auc score:  0.5972872075469559
****************************** 3 ******************************
train auc score:  0.6289516048642485
valid auc score:  0.5963267274920373
****************************** 4 ******************************
train auc score:  0.6276533245869835
valid auc score:  0.5999089014720511
****************************** 5 ******************************
train auc score:  0.6274370063961466
valid auc score:  0.5992199806914916
RandomForestClassifier score_list: [0.598868331533091, 0.5972872075469559, 0.5963267274920373, 0.5999089014720511, 0.5992199806914916]
RandomForestClassifier score_mean: 0.5983222297471253
RandomForestClassifier score_std: 0.0013171948440780928


boosting大法好

In [64]:
from lightgbm import LGBMClassifier
import lightgbm as lgb

In [62]:
lgb_params = {
                'boosting_type': 'gbdt',
                'objective': 'binary',
                'metric': 'auc',
                'min_child_weight': 5,
                'num_leaves': 2 ** 5,
                'lambda_l2': 10,
                'feature_fraction': 0.8,
                'bagging_fraction': 0.8,
                'bagging_freq': 4,
                'learning_rate': 0.1,
                'seed': 2020,
                'n_jobs':-1,
                'silent': True,
                'verbose': -1,
            }

lgb_clf = LGBMClassifier(**lgb_params)

In [63]:
training_helper(lgb_clf, X, y, test_set, resample='NN')

****************************** 1 ******************************
train auc score:  0.6044622061840622
valid auc score:  0.6003885421278852
****************************** 2 ******************************
train auc score:  0.603727414749769
valid auc score:  0.6029091559430723
****************************** 3 ******************************
train auc score:  0.604883617038279
valid auc score:  0.60010204626504
****************************** 4 ******************************
train auc score:  0.6029326988858587
valid auc score:  0.6016204120636569
****************************** 5 ******************************
train auc score:  0.6035158733469617
valid auc score:  0.6040003480310349
LGBMClassifier score_list: [0.6003885421278852, 0.6029091559430723, 0.60010204626504, 0.6016204120636569, 0.6040003480310349]
LGBMClassifier score_mean: 0.6018041008861379
LGBMClassifier score_std: 0.0014818357746454417


In [65]:
# 数据集划分
X_train_split, X_val, y_train_split, y_val = train_test_split(X, y, test_size=0.2)
train_matrix = lgb.Dataset(X_train_split, label=y_train_split)
valid_matrix = lgb.Dataset(X_val, label=y_val)

params = {
            'boosting_type': 'gbdt',
            'objective': 'binary',
            'learning_rate': 0.1,
            'metric': 'auc',
            'min_child_weight': 1e-3,
            'num_leaves': 31,
            'max_depth': -1,
            'reg_lambda': 0,
            'reg_alpha': 0,
            'feature_fraction': 1,
            'bagging_fraction': 1,
            'bagging_freq': 0,
            'seed': 2020,
            'nthread': 8,
            'verbose': -1,
}

"""使用训练集数据进行模型训练"""
model = lgb.train(params, train_set=train_matrix, valid_sets=valid_matrix, 
                  num_boost_round=20000, verbose_eval=1000, early_stopping_rounds=200)

Please use silent argument of the Dataset constructor to pass this parameter.


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[396]	valid_0's auc: 0.733445


In [70]:
cv_scores = []
kf = KFold(n_splits=5, shuffle=True, random_state=2020)
for i, (train_index, valid_index) in enumerate(kf.split(X, y)):
    print('************************************ {} ************************************'.format(str(i+1)))
    X_train_split, y_train_split, X_val, y_val = X.iloc[train_index], y[train_index], X.iloc[valid_index], y[valid_index]
    
    train_matrix = lgb.Dataset(X_train_split, label=y_train_split)
    valid_matrix = lgb.Dataset(X_val, label=y_val)

    params = {
                'boosting_type': 'gbdt',
                'objective': 'binary',
                'learning_rate': 0.1,
                'metric': 'auc',
        
                'min_child_weight': 1e-3,
                'num_leaves': 31,
                'max_depth': -1,
                'reg_lambda': 0,
                'reg_alpha': 0,
                'feature_fraction': 1,
                'bagging_fraction': 1,
                'bagging_freq': 0,
                'seed': 2020,
                'nthread': 8,
                'silent': True,
                'verbose': -1,
    }
    
    model = lgb.train(params, train_set=train_matrix, num_boost_round=20000, valid_sets=valid_matrix, verbose_eval=1000, early_stopping_rounds=200)
    val_pred = model.predict(X_val, num_iteration=model.best_iteration)
    
    cv_scores.append(roc_auc_score(y_val, val_pred))
    print(cv_scores)

print("lgb_scotrainre_list:{}".format(cv_scores))
print("lgb_score_mean:{}".format(np.mean(cv_scores)))
print("lgb_score_std:{}".format(np.std(cv_scores)))

************************************ 1 ************************************


Please use silent argument of the Dataset constructor to pass this parameter.


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[477]	valid_0's auc: 0.733725
[0.7337251576377901]
************************************ 2 ************************************


Please use silent argument of the Dataset constructor to pass this parameter.


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[515]	valid_0's auc: 0.733604
[0.7337251576377901, 0.7336041469400008]
************************************ 3 ************************************


Please use silent argument of the Dataset constructor to pass this parameter.


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[463]	valid_0's auc: 0.732191
[0.7337251576377901, 0.7336041469400008, 0.7321914742931184]
************************************ 4 ************************************


Please use silent argument of the Dataset constructor to pass this parameter.


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[441]	valid_0's auc: 0.732577
[0.7337251576377901, 0.7336041469400008, 0.7321914742931184, 0.732576927985946]
************************************ 5 ************************************


Please use silent argument of the Dataset constructor to pass this parameter.


Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[496]	valid_0's auc: 0.734733
[0.7337251576377901, 0.7336041469400008, 0.7321914742931184, 0.732576927985946, 0.7347330804643706]
lgb_scotrainre_list:[0.7337251576377901, 0.7336041469400008, 0.7321914742931184, 0.732576927985946, 0.7347330804643706]
lgb_score_mean:0.7333661574642452
lgb_score_std:0.0009007510561073457


In [73]:
from sklearn.model_selection import cross_val_score

"""定义优化函数"""
def rf_cv_lgb(num_leaves, max_depth, bagging_fraction, feature_fraction, bagging_freq, min_data_in_leaf, 
              min_child_weight, min_split_gain, reg_lambda, reg_alpha):
    # 建立模型
    model_lgb = lgb.LGBMClassifier(boosting_type='gbdt', objective='binary', metric='auc',
                                   learning_rate=0.1, n_estimators=5000,
                                   num_leaves=int(num_leaves), max_depth=int(max_depth), 
                                   bagging_fraction=round(bagging_fraction, 2), 
                                   feature_fraction=round(feature_fraction, 2),
                                   bagging_freq=int(bagging_freq), min_data_in_leaf=int(min_data_in_leaf),
                                   min_child_weight=min_child_weight, min_split_gain=min_split_gain,
                                   reg_lambda=reg_lambda, reg_alpha=reg_alpha,
                                   n_jobs= 8
                                  )
    
    val = cross_val_score(model_lgb, X_train_split, y_train_split, cv=5, scoring='roc_auc').mean()
    
    return val

In [74]:
from bayes_opt import BayesianOptimization
"""定义优化参数"""
bayes_lgb = BayesianOptimization(
    rf_cv_lgb, 
    {
        'num_leaves':(10, 200),
        'max_depth':(3, 20),
        'bagging_fraction':(0.5, 1.0),
        'feature_fraction':(0.5, 1.0),
        'bagging_freq':(0, 100),
        'min_data_in_leaf':(10,100),
        'min_child_weight':(0, 10),
        'min_split_gain':(0.0, 1.0),
        'reg_alpha':(0.0, 10),
        'reg_lambda':(0.0, 10),
    }
)

"""开始优化"""
bayes_lgb.maximize(n_iter=10)

|   iter    |  target   | baggin... | baggin... | featur... | max_depth | min_ch... | min_da... | min_sp... | num_le... | reg_alpha | reg_la... |
-------------------------------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.7326  [0m | [0m 0.864   [0m | [0m 37.87   [0m | [0m 0.7079  [0m | [0m 13.03   [0m | [0m 0.8163  [0m | [0m 90.23   [0m | [0m 0.5438  [0m | [0m 145.6   [0m | [0m 7.666   [0m | [0m 5.344   [0m |
| [0m 2       [0m | [0m 0.7265  [0m | [0m 0.8773  [0m | [0m 2.685   [0m | [0m 0.5327  [0m | [0m 15.86   [0m | [0m 3.526   [0m | [0m 18.54   [0m | [0m 0.5892  [0m | [0m 34.87   [0m | [0m 0.07025 [0m | [0m 0.6496  [0m |
| [0m 3       [0m | [0m 0.7092  [0m | [0m 0.8597  [0m | [0m 51.3    [0m | [0m 0.7788  [0m | [0m 12.8    [0m | [0m 7.331   [0m | [0m 13.66   [0m | [0m 0.09061 [0m | [0m 111.2   [0m | [0m 2.927   [0m | [

| [0m 4       [0m | [0m 0.7317  [0m | [0m 0.5397  [0m | [0m 46.8    [0m | [0m 0.8576  [0m | [0m 3.483   [0m | [0m 6.645   [0m | [0m 59.37   [0m | [0m 0.01996 [0m | [0m 164.1   [0m | [0m 2.342   [0m | [0m 8.056   [0m |
| [95m 5       [0m | [95m 0.7335  [0m | [95m 0.8035  [0m | [95m 58.51   [0m | [95m 0.6013  [0m | [95m 17.48   [0m | [95m 3.063   [0m | [95m 61.12   [0m | [95m 0.7407  [0m | [95m 23.51   [0m | [95m 5.467   [0m | [95m 3.66    [0m |
| [0m 6       [0m | [0m 0.733   [0m | [0m 0.7555  [0m | [0m 56.99   [0m | [0m 0.5967  [0m | [0m 16.27   [0m | [0m 2.437   [0m | [0m 61.77   [0m | [0m 0.6724  [0m | [0m 21.47   [0m | [0m 2.978   [0m | [0m 5.997   [0m |


| [0m 7       [0m | [0m 0.7323  [0m | [0m 0.9022  [0m | [0m 63.81   [0m | [0m 0.5183  [0m | [0m 9.836   [0m | [0m 0.6705  [0m | [0m 100.0   [0m | [0m 0.3003  [0m | [0m 200.0   [0m | [0m 7.055   [0m | [0m 9.796   [0m |
| [0m 8       [0m | [0m 0.7325  [0m | [0m 0.6085  [0m | [0m 0.2436  [0m | [0m 0.8576  [0m | [0m 19.86   [0m | [0m 3.01    [0m | [0m 82.26   [0m | [0m 0.8003  [0m | [0m 199.8   [0m | [0m 8.087   [0m | [0m 0.6509  [0m |
| [95m 9       [0m | [95m 0.7336  [0m | [95m 1.0     [0m | [95m 100.0   [0m | [95m 0.6582  [0m | [95m 20.0    [0m | [95m 9.525   [0m | [95m 100.0   [0m | [95m 0.6537  [0m | [95m 42.06   [0m | [95m 10.0    [0m | [95m 0.0     [0m |


| [0m 10      [0m | [0m 0.7333  [0m | [0m 1.0     [0m | [0m 100.0   [0m | [0m 1.0     [0m | [0m 3.0     [0m | [0m 10.0    [0m | [0m 44.12   [0m | [0m 0.0     [0m | [0m 10.0    [0m | [0m 10.0    [0m | [0m 0.0     [0m |
| [0m 11      [0m | [0m 0.732   [0m | [0m 0.5     [0m | [0m 0.0     [0m | [0m 1.0     [0m | [0m 3.0     [0m | [0m 10.0    [0m | [0m 100.0   [0m | [0m 1.0     [0m | [0m 10.0    [0m | [0m 10.0    [0m | [0m 0.0     [0m |
| [0m 12      [0m | [0m 0.7323  [0m | [0m 0.8806  [0m | [0m 100.0   [0m | [0m 1.0     [0m | [0m 3.0     [0m | [0m 10.0    [0m | [0m 100.0   [0m | [0m 0.751   [0m | [0m 145.6   [0m | [0m 8.206   [0m | [0m 2.325   [0m |


| [0m 13      [0m | [0m 0.7326  [0m | [0m 1.0     [0m | [0m 100.0   [0m | [0m 0.5     [0m | [0m 20.0    [0m | [0m 10.0    [0m | [0m 35.48   [0m | [0m 0.3893  [0m | [0m 200.0   [0m | [0m 10.0    [0m | [0m 1.009   [0m |
| [0m 14      [0m | [0m 0.7318  [0m | [0m 1.0     [0m | [0m 0.0     [0m | [0m 1.0     [0m | [0m 3.0     [0m | [0m 0.0     [0m | [0m 10.0    [0m | [0m 1.0     [0m | [0m 200.0   [0m | [0m 10.0    [0m | [0m 10.0    [0m |
| [0m 15      [0m | [0m 0.732   [0m | [0m 1.0     [0m | [0m 0.0     [0m | [0m 1.0     [0m | [0m 3.0     [0m | [0m 0.0     [0m | [0m 100.0   [0m | [0m 1.0     [0m | [0m 81.96   [0m | [0m 10.0    [0m | [0m 0.0     [0m |


In [75]:
bayes_lgb.max

{'target': 0.7335936780574788,
 'params': {'bagging_fraction': 1.0,
  'bagging_freq': 100.0,
  'feature_fraction': 0.6582455514685674,
  'max_depth': 20.0,
  'min_child_weight': 9.525369435461984,
  'min_data_in_leaf': 100.0,
  'min_split_gain': 0.6536599961587486,
  'num_leaves': 42.060624772191304,
  'reg_alpha': 10.0,
  'reg_lambda': 0.0}}

In [79]:
"""调整一个较小的学习率，并通过cv函数确定当前最优的迭代次数"""
base_params_lgb = {
                    'boosting_type': 'gbdt',
                    'objective': 'binary',
                    'metric': 'auc',
                    'learning_rate': 0.01,
                    'num_leaves': 42,
                    'max_depth': 20,
                    'min_data_in_leaf': 100,
                    'min_child_weight':9.525,
                    'bagging_fraction': 1,
                    'feature_fraction': 0.658,
                    'bagging_freq': 100,
                    'reg_lambda': 0,
                    'reg_alpha': 10,
                    'min_split_gain': 0.654,
                    'nthread': 8,
                    'seed': 2020,
                    'silent': True,
                    'verbose': -1,
}

cv_result_lgb = lgb.cv(
    train_set=train_matrix,
    early_stopping_rounds=1000, 
    num_boost_round=20000,
    nfold=5,
    stratified=True,
    shuffle=True,
    params=base_params_lgb,
    metrics='auc',
    seed=0
)

print('迭代次数{}'.format(len(cv_result_lgb['auc-mean'])))
print('最终模型的AUC为{}'.format(max(cv_result_lgb['auc-mean'])))

迭代次数2469
最终模型的AUC为0.7343744082250645


In [81]:
"""使用lightgbm 5折交叉验证进行建模预测"""
cv_scores = []
for i, (train_index, valid_index) in enumerate(kf.split(X, y)):
    print('************************************ {} ************************************'.format(str(i+1)))
    X_train_split, y_train_split, X_val, y_val = X.iloc[train_index], y[train_index], \
                                X.iloc[valid_index], y[valid_index]
    
    train_matrix = lgb.Dataset(X_train_split, label=y_train_split)
    valid_matrix = lgb.Dataset(X_val, label=y_val)

    params = base_params_lgb
    
    model = lgb.train(params, train_set=train_matrix, num_boost_round=14269, 
                      valid_sets=valid_matrix, verbose_eval=1000, early_stopping_rounds=200)
    val_pred = model.predict(X_val, num_iteration=model.best_iteration)
    
    cv_scores.append(roc_auc_score(y_val, val_pred))
    print(cv_scores)

print("lgb_scotrainre_list:{}".format(cv_scores))
print("lgb_score_mean:{}".format(np.mean(cv_scores)))
print("lgb_score_std:{}".format(np.std(cv_scores)))

************************************ 1 ************************************


Please use silent argument of the Dataset constructor to pass this parameter.


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's auc: 0.732971
[2000]	valid_0's auc: 0.735326
Early stopping, best iteration is:
[2704]	valid_0's auc: 0.735737
[0.735737498466386]
************************************ 2 ************************************


Please use silent argument of the Dataset constructor to pass this parameter.


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's auc: 0.732251
[2000]	valid_0's auc: 0.734528
Early stopping, best iteration is:
[2663]	valid_0's auc: 0.734974
[0.735737498466386, 0.7349741435171416]
************************************ 3 ************************************


Please use silent argument of the Dataset constructor to pass this parameter.


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's auc: 0.73136
[2000]	valid_0's auc: 0.733609
Early stopping, best iteration is:
[2688]	valid_0's auc: 0.734116
[0.735737498466386, 0.7349741435171416, 0.7341164859433785]
************************************ 4 ************************************


Please use silent argument of the Dataset constructor to pass this parameter.


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's auc: 0.73183
[2000]	valid_0's auc: 0.734168
Early stopping, best iteration is:
[2673]	valid_0's auc: 0.73467
[0.735737498466386, 0.7349741435171416, 0.7341164859433785, 0.7346704339632018]
************************************ 5 ************************************


Please use silent argument of the Dataset constructor to pass this parameter.


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's auc: 0.733876
[2000]	valid_0's auc: 0.736162
Early stopping, best iteration is:
[2698]	valid_0's auc: 0.736661
[0.735737498466386, 0.7349741435171416, 0.7341164859433785, 0.7346704339632018, 0.7366612724019215]
lgb_scotrainre_list:[0.735737498466386, 0.7349741435171416, 0.7341164859433785, 0.7346704339632018, 0.7366612724019215]
lgb_score_mean:0.7352319668584059
lgb_score_std:0.0008859533038347626


In [85]:
test_val = model.predict(test_set, num_iteration=model.best_iteration)

In [86]:
sample = pd.read_csv('./sample_submit.csv')
sample['isDefault'] = test_val
sample.to_csv('./result.csv', index=False)