# tianjikit模块使用指南

### 一，模块整体架构

![](readme.png)

#### outlier_analysis的输出：

In [None]:
'med', #中位数
'seg_25', #1/4分位数
'seg_75', #3/4分位数
'up_limit',  #离群值判定上边界
'low_limit', #离群值判定下边界
'up_ratio',  #超上边界离群值比例
'low_ratio';  #超下边界离群值比例


#### basic_analysis的输出：

In [None]:
#------覆盖率------------------------#
'not_nan_ratio',  #非空比例，通常覆盖率coverage即指它
'not_zero_ratio', #非零比例，非零值不含空值
'not_outlier_ratio', #非离群值比例，非离群值不含空值

#------统计值------------------------#
'class_num', #数据类别数目
'value_num', #非空数据数目
'min', #最小值
'mean',#均值
'med', #中位数
'most', #众数
'max', #最大值

#------有效性----------------------#
'ks(continous feature)', #ks统计量，适合连续特征
'ks_pvalue', #ks统计量的p值
'chi2(discrete feature)', #chi2统计量，适合离散特征
'chi2_pvalue', #chi2统计量的p值
't(for mean)', #均值t检验,仅对连续特征适用
't_pvalue' ,#均值t检验的p值
'z(for coverage)',#覆盖率z检验，适合连续和离散特征，coverage指 not_nan_ratio
'z_pvalue'; #覆盖率z检验的p值
'iv'; #iv统计量，适合连续和离散特征，iv>0.1有效，iv>0.2强有效


#### psi_analysis的输出：

In [None]:
'psi', #psi指标，仅当 train_data和 test_data 有效数据数量 >10时才取值，否则为 nan值
'is_stable', #是否稳定，psi<0.2判定为稳定
'train_class_num', # train_data中数据类别数目
'test_class_num' , # test_data中数据类别数目
'train_value_num', #train_data中有效数据数目
'test_value_num';#test_data中有效数据数目


#### ks_analysis的输出：

In [None]:
'feature_interval',#特征取值区间
'order_num', #订单数量
'order_ratio', #订单占比
'overdue_num', #逾期订单数量
'overdue_ratio', #逾期订单占比
'normal_num', #正常订单数量
'normal_ratio', #正常订单占比
'overdue_cum_ratio', #累计逾期订单比例
'normal_cum_ratio', #累计正常订单比例
'ks_value'; #ks统计值


#### iv_analysis的输出：

In [None]:
'feature_interval',#区间
'order_num', #订单数量
'order_ratio', #订单占比
'overdue_num', #逾期订单数量
'overdue_ratio', #逾期订单比例
'overdue_interval_ratio', #区间逾期订单占总逾期订单比例
'normal_num', #正常订单数量
'normal_ratio', #正常订单占比
'normal_interval_ratio', #区间正常订单占总正常订单比例
'iv_value'; #iv检验值，列重复


#### chi2_analysis的输出：

In [None]:
'TP', #feature为1的逾期样本数量
'FP', #feature为1的正常样本数量
'TN', #feature为0的正常样本数量
'FN', #feature为0的逾期的样本数量
'TPR', #TP/(TP+FN),逾期样本中feature取1比例
'FPR',#FP/(FP+TN),正常样本中feature取1比例
'overdue_ratio_0',# feature为0样本的逾期率
'overdue_ratio_1',# feature为1样本的逾期率
'precision',#精度
'accuracy',#准确度
'chi2', #shi nme shenmeenme
'chi2_pvalue'; #卡方统计量的p值


### 二，单特征分析示范

In [5]:
import numpy as np
import pandas as pd
from tianjikit.analysisfeature import AnalysisFeature

# 准备数据
data = [1.0,2,3,4,5,6,4,3,2,1,2,9,10,100,np.nan,0,7,8,10,6]
label = [0,1,1,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1,1]
assert len(data)==len(label)

af = AnalysisFeature()
# 离群值分析
dfoutliers = af.outliers_analysis(data,alpha = 2)

# 去除离群值
data_clean = af.drop_outliers(data,data,alpha = 2)

# 基本分析
dfbasic = af.basic_analysis(data,label)

# psi稳定性分析
test_data = [10,9,5,3,4,3,2,1,6,7,5,np.nan,10,100]
dfpsi = af.psi_analysis(data,test_data)

# ks有效性分析,主要对连续特征，对离散特征也可分析
dfks = af.ks_analysis(data,label)

# iv有效性分析，主要针对离散特征，对连续特征也适用
dfiv = af.iv_analysis(data,label)

# 卡方及召回率等分析，主要针对离散特征
dfchi2 = af.chi2_analysis(data,label)

### 三，多特征分析示范

In [7]:
# 多特征分析示范
import numpy as np
import pandas as pd
from tianjikit.analysisfeatures import AnalysisFeatures

# 构造dftrain 训练集特征数据
dftrain = pd.DataFrame()
dftrain['phone'] = ['x1','x2','x3','x4','x5','x6','x7','x8','x9','x10','x11','x12']
dftrain['loan_dt'] = ['2018-01-01']*12
dftrain['label'] = [0,1,1,0,1,0,0,0,0,0,1,0]
dftrain['feature1'] = [1,0,1,0,1,0,1,0,1,0,1,1]
dftrain['feature2'] = [1.0,2,3,4,5,6,7,8,9,10,11,12]


# 构造dftest测试集特征
dftest = pd.DataFrame()
dftest['phone'] = ['y1','y2','y3','y4','y5','y6','y7','y8','y9','y10']
dftest['loan_dt'] = ['2018-02-01']*10
dftest['label'] = [1,0,0,1,0,0,0,1,0,0]
dftest['feature1'] = [1,0,0,1,0,0,1,0,1,0]
dftest['feature2'] = [10.0,9,8,7,6,5,4,3,2,1]

AFS = AnalysisFeatures(dftrain,dftest)

#特征基本分析
dfBasic = AFS.BasicAnalysis()

#特征稳定性分析
dfPsi = AFS.PsiAnalysis()

#特征ks分析
dfKs = AFS.KsAnalysis()

#特征iv分析
dfIv = AFS.IvAnalysis()

start BasicAnalysis...
[total|done|todo]
[2|2|0]
start PsiAnalysis...
[total|done|todo]
[2|2|0]
start KsAnalysis...
[total|done|todo]
[2|2|0]
start IvAnalysis...
[total|done|todo]
[2|2|0]


### 四，跑模型评分示范

In [10]:
# 准备训练数据
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split

data,label = datasets.make_classification(n_samples= 10000, n_features=20,n_classes=2, random_state=0)
dfdata = pd.DataFrame(data,columns = ['feature'+str(i) for i in range(data.shape[1])])
dfdata['label'] = label
dftrain,dftest = train_test_split(dfdata)
dftrain,dftest = dftrain.copy(),dftest.copy()
dftrain.index,dftest.index  = range(len(dftrain)),range(len(dftest))
dftrain.loc[0,['feature0','feature1','feature2']] = np.nan #构造若干缺失值

In [11]:
# 训练逻辑回归模型
from tianjikit.runmodel import RunModel
model = RunModel(dftrain = dftrain,dftest = dftest,coverage_th=0.1, ks_th=0, chi2_th=0, 
                 outliers_th=None, fillna_method='most', scale_method= None)
lr = model.train_lr(cv=5, model_idx=5)
model.test(lr)
dfimportance = model.dfimportances['lr']

START DATA PREPROCESSING ...

train set size:  7500
test set size:  2500
coverage threshold:  0.1
outlier threshold:  None
ks threshold:  0
chi2 threshold:  0
fillna method:  most
scale method:  None
------------------------------------------------------------------------
original feature number:  20
feature number remain after dropfeature:  20
feature number increased to after fill_na:  23
------------------------------------------------------------------------
START TRAIN LR MODEL ...

2018-10-16 11:47:05: k = 1

train: ks = 0.66729 	 auc = 0.900684215885 
+---+-------------------+-----------+-------------+-------------+---------------+----------+
|   |  feature_interval | order_num | order_ratio | overdue_num | overdue_ratio | ks_value |
+---+-------------------+-----------+-------------+-------------+---------------+----------+
| 0 | [0.00038,0.03219) |    600    |     0.1     |      25     |    0.04167    |  0.1826  |
| 1 | [0.03219,0.08889) |    600    |     0.1     |      39    


train: ks = 0.66367 	 auc = 0.899845986818 
+---+-------------------+-----------+-------------+-------------+---------------+----------+
|   |  feature_interval | order_num | order_ratio | overdue_num | overdue_ratio | ks_value |
+---+-------------------+-----------+-------------+-------------+---------------+----------+
| 0 |  [0.00044,0.0318) |    600    |     0.1     |      21     |     0.035     | 0.18526  |
| 1 |  [0.0318,0.08725) |    600    |     0.1     |      37     |    0.06167    | 0.35988  |
| 2 | [0.08725,0.18876) |    600    |     0.1     |      66     |      0.11     | 0.51514  |
| 3 | [0.18876,0.34263) |    600    |     0.1     |     141     |     0.235     | 0.62041  |
| 4 | [0.34263,0.53171) |    600    |     0.1     |     234     |      0.39     | 0.66367  |
| 5 | [0.53171,0.69272) |    600    |     0.1     |     376     |    0.62667    | 0.61227  |
| 6 | [0.69272,0.79822) |    600    |     0.1     |     468     |      0.78     | 0.49954  |
| 7 | [0.79822,0.87526) |

In [12]:
# 训练随机森林模型
from tianjikit.runmodel import RunModel
model = RunModel(dftrain = dftrain,dftest = dftest,coverage_th=0.1, ks_th=0, chi2_th=0, 
                 outliers_th=None, fillna_method='most', scale_method= None)
rf = model.train_rf(cv=5, model_idx=5,
      n_estimators=100, max_depth=10, min_samples_split=2,
      min_samples_leaf=1, min_weight_fraction_leaf=0.0,
      max_features='auto', max_leaf_nodes=None, n_jobs = 4)
model.test(rf)
dfimportance = model.dfimportances['rf']

START DATA PREPROCESSING ...

train set size:  7500
test set size:  2500
coverage threshold:  0.1
outlier threshold:  None
ks threshold:  0
chi2 threshold:  0
fillna method:  most
scale method:  None
------------------------------------------------------------------------
original feature number:  20
feature number remain after dropfeature:  20
feature number increased to after fill_na:  23
------------------------------------------------------------------------
START TRAIN RANDOMFOREST MODEL ...

2018-10-16 11:47:13: k = 1

train: ks = 0.914 	 auc = 0.992321216672 
+---+-------------------+-----------+-------------+-------------+---------------+----------+
|   |  feature_interval | order_num | order_ratio | overdue_num | overdue_ratio | ks_value |
+---+-------------------+-----------+-------------+-------------+---------------+----------+
| 0 | [0.00408,0.01828) |    600    |     0.1     |      0      |      0.0      | 0.19927  |
| 1 | [0.01828,0.02708) |    600    |     0.1     |    


train: ks = 0.90634 	 auc = 0.991677332551 
+---+-------------------+-----------+-------------+-------------+---------------+----------+
|   |  feature_interval | order_num | order_ratio | overdue_num | overdue_ratio | ks_value |
+---+-------------------+-----------+-------------+-------------+---------------+----------+
| 0 | [0.00718,0.02517) |    600    |     0.1     |      0      |      0.0      | 0.19927  |
| 1 | [0.02517,0.03355) |    600    |     0.1     |      0      |      0.0      | 0.39854  |
| 2 | [0.03355,0.05307) |    600    |     0.1     |      0      |      0.0      | 0.59781  |
| 3 | [0.05307,0.18088) |    600    |     0.1     |      1      |    0.00167    | 0.79642  |
| 4 | [0.18088,0.54849) |    600    |     0.1     |     134     |    0.22333    | 0.90634  |
| 5 | [0.54849,0.80629) |    600    |     0.1     |     473     |    0.78833    | 0.79028  |
| 6 | [0.80629,0.92601) |    600    |     0.1     |     581     |    0.96833    | 0.60221  |
| 7 | [0.92601,0.94797) |

In [13]:
# 训练GBDT模型
from tianjikit.runmodel import RunModel
model = RunModel(dftrain = dftrain,dftest = dftest,coverage_th=0.1, ks_th=0, chi2_th=0, 
                 outliers_th=None, fillna_method='most', scale_method= None)
gbdt = model.train_gbdt(cv=5, model_idx=5,
       learning_rate=0.01, n_estimators=1000, max_depth= 3, min_samples_split= 50, 
       min_samples_leaf= 5, subsample=0.7, max_features='sqrt',random_state= 0) 
model.test(gbdt)
dfimportance = model.dfimportances['gbdt']

START DATA PREPROCESSING ...

train set size:  7500
test set size:  2500
coverage threshold:  0.1
outlier threshold:  None
ks threshold:  0
chi2 threshold:  0
fillna method:  most
scale method:  None
------------------------------------------------------------------------
original feature number:  20
feature number remain after dropfeature:  20
feature number increased to after fill_na:  23
------------------------------------------------------------------------
START TRAIN GBDT MODEL ...

2018-10-16 11:47:25: k = 1

train: ks = 0.84932 	 auc = 0.978858642808 
+---+-------------------+-----------+-------------+-------------+---------------+----------+
|   |  feature_interval | order_num | order_ratio | overdue_num | overdue_ratio | ks_value |
+---+-------------------+-----------+-------------+-------------+---------------+----------+
| 0 | [0.00532,0.02053) |    600    |     0.1     |      0      |      0.0      | 0.19927  |
| 1 | [0.02053,0.02385) |    600    |     0.1     |      2   


train: ks = 0.84767 	 auc = 0.978670935465 
+---+-------------------+-----------+-------------+-------------+---------------+----------+
|   |  feature_interval | order_num | order_ratio | overdue_num | overdue_ratio | ks_value |
+---+-------------------+-----------+-------------+-------------+---------------+----------+
| 0 | [0.00755,0.02157) |    600    |     0.1     |      0      |      0.0      | 0.19927  |
| 1 | [0.02157,0.02576) |    600    |     0.1     |      2      |    0.00333    |  0.3972  |
| 2 | [0.02576,0.03466) |    600    |     0.1     |      4      |    0.00667    | 0.59381  |
| 3 | [0.03466,0.17894) |    600    |     0.1     |      16     |    0.02667    | 0.78241  |
| 4 | [0.17894,0.58177) |    600    |     0.1     |     201     |     0.335     | 0.84767  |
| 5 | [0.58177,0.80746) |    600    |     0.1     |     447     |     0.745     | 0.74894  |
| 6 |  [0.80746,0.9298) |    600    |     0.1     |     537     |     0.895     | 0.59021  |
| 7 |   [0.9298,0.947)  |

In [14]:
# 训练XGBOOST模型
from tianjikit.runmodel import RunModel
model = RunModel(dftrain = dftrain,dftest = dftest,coverage_th=0.1, ks_th=0, chi2_th=0, 
                 outliers_th=None, fillna_method= None, scale_method= None)
xgb = model.train_xgb(learning_rate=0.1,cv=5, model_idx=5,
      n_estimators=1000, max_depth=5, min_child_weight=1, gamma=0, subsample=0.8,
      colsample_bytree=0.8,scale_pos_weight=1, n_jobs=4, seed=10) 
model.test(xgb)
dfimportance = model.dfimportances['xgb']

START DATA PREPROCESSING ...

train set size:  7500
test set size:  2500
coverage threshold:  0.1
outlier threshold:  None
ks threshold:  0
chi2 threshold:  0
fillna method:  None
scale method:  None
------------------------------------------------------------------------
original feature number:  20
feature number remain after dropfeature:  20
feature number increased to after fill_na:  20
------------------------------------------------------------------------
START TRAIN XGBOOST MODEL ...

2018-10-16 11:47:48: k = 1

train: ks = 0.99601 	 auc = 1.0 
+---+-------------------+-----------+-------------+-------------+---------------+----------+
|   |  feature_interval | order_num | order_ratio | overdue_num | overdue_ratio | ks_value |
+---+-------------------+-----------+-------------+-------------+---------------+----------+
| 0 |    [0.0,3e-05)    |    600    |     0.1     |      0      |      0.0      | 0.19927  |
| 1 |  [3e-05,0.00011)  |    600    |     0.1     |      0      |    


train: ks = 0.99635 	 auc = 1.0 
+---+-------------------+-----------+-------------+-------------+---------------+----------+
|   |  feature_interval | order_num | order_ratio | overdue_num | overdue_ratio | ks_value |
+---+-------------------+-----------+-------------+-------------+---------------+----------+
| 0 |    [0.0,2e-05)    |    600    |     0.1     |      0      |      0.0      | 0.19927  |
| 1 |   [2e-05,8e-05)   |    600    |     0.1     |      0      |      0.0      | 0.39854  |
| 2 |  [8e-05,0.00036)  |    600    |     0.1     |      0      |      0.0      | 0.59781  |
| 3 | [0.00036,0.00464) |    600    |     0.1     |      0      |      0.0      | 0.79708  |
| 4 | [0.00464,0.04095) |    600    |     0.1     |      0      |      0.0      | 0.99635  |
| 5 |  [0.04095,0.9935) |    600    |     0.1     |     589     |    0.98167    | 0.80294  |
| 6 |  [0.9935,0.99816) |    600    |     0.1     |     600     |      1.0      | 0.60221  |
| 7 | [0.99816,0.99944) |    599    

In [8]:
# 训练神经网络模型
from tianjikit.runmodel import RunModel
model = RunModel(dftrain = dftrain,dftest = dftest,coverage_th=0.1, ks_th=0, chi2_th=0, 
             outliers_th=None, fillna_method='most', scale_method= None)
nn = model.train_nn( cv = 5, model_idx = 5,
     hidden_layer_sizes=(100,20), activation='relu', alpha=0.0001, 
     learning_rate='constant', learning_rate_init=0.001, max_iter=200,tol=0.0001, 
     early_stopping=False, validation_fraction=0.1, warm_start=False, random_state = None)
model.test(nn)

### 五，xgboost调参示范

In [16]:
from __future__ import print_function
import numpy as np
import pandas as pd
import xgboost
from sklearn import datasets
from sklearn.model_selection import train_test_split
from xgboost.sklearn import XGBClassifier

from tianjikit.tunning import Tunning

data,label = datasets.make_classification(n_samples= 10000, n_features=20, n_informative= 6 ,
             n_classes=2, n_clusters_per_class=10,random_state=0)
dfdata = pd.DataFrame(data,columns = [u'f'+str(i) for i in range(data.shape[1])])
dfdata['label'] = label
dftrain,dftest = train_test_split(dfdata)

In [17]:
params_dict = dict()

# 以下为待调整参数
# booster参数
params_dict['learning_rate'] = 0.1        # 学习率，初始值为 0.1，通常越小越好。
params_dict['n_estimators'] = 50          # 加法模型树的数量，初始值为50，通常通过xgboost自带模型cv确认。

# tree参数
params_dict['max_depth'] = 5              # 树的深度，通常取值在[3,10]之间，初始值常取[3,6]之间
params_dict['min_child_weight']=1         # 最小叶子节点样本权重和，越大模型越保守。
params_dict['gamma']= 0                   # 节点分裂所需的最小损失函数下降值，越大模型越保守。
params_dict['subsample']= 0.8             # 横向采样，样本采样比例，通常取值在 [0.5，1]之间 
params_dict['colsample_bytree'] = 0.8     # 纵向采样，特征采样比例，通常取值在 [0.5，1]之间 

# regulazation参数 
# Omega(f) = gamma*T + reg_alpha* sum(abs(wj)) + reg_lambda  
params_dict['reg_alpha'] = 0              #L1 正则化项的权重系数，越大模型越保守，通常取值在[0,1]之间。
params_dict['reg_lambda'] = 1             #L2 正则化项的权重系数，越大模型越保守，通常取值在[1,100]之间。

# 以下参数通常不需要调整
params_dict['objective'] = 'binary:logistic'
params_dict['n_jobs'] = 4
params_dict['scale_pos_weight'] = 1       #不平衡样本时设定为正值可以使算法更快收敛。
params_dict['seed'] = 0

In [18]:
# step0: 初始化
model = XGBClassifier()
tune = Tunning(model = model,dftrain = dftrain,dftest = dftest,params_dict = params_dict,n_jobs = 4)
tune.dfscore

Unnamed: 0,model_id,train_score,validate_score,test_score
0,0,0.928578,0.837681,0.846773


In [19]:
# step1: tune n_estimators for relatively high learning_rate (eg: 0.1)
param_test1 = { 'learning_rate': 0.1, 'n_estimators':1000}
tune.params_dict.update(param_test1)
tune.model.set_params(**tune.params_dict)
tune.xgboost_cv(cv_folds= 5, early_stopping_rounds= 100,n_jobs = 4,seed = 0)
tune.dfscore

[0]	train-auc:0.689263	test-auc:0.655759
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
[1]	train-auc:0.809899	test-auc:0.763808
[2]	train-auc:0.828821	test-auc:0.780394
[3]	train-auc:0.83659	test-auc:0.786832
[4]	train-auc:0.845891	test-auc:0.794179
[5]	train-auc:0.851917	test-auc:0.796019
[6]	train-auc:0.856383	test-auc:0.800001
[7]	train-auc:0.85802	test-auc:0.80081
[8]	train-auc:0.864072	test-auc:0.804085
[9]	train-auc:0.867127	test-auc:0.806166
[10]	train-auc:0.87132	test-auc:0.8109
[11]	train-auc:0.873949	test-auc:0.81197
[12]	train-auc:0.876061	test-auc:0.813475
[13]	train-auc:0.878251	test-auc:0.815341
[14]	train-auc:0.880375	test-auc:0.817463
[15]	train-auc:0.882436	test-auc:0.819126
[16]	train-auc:0.885093	test-auc:0.821411
[17]	train-auc:0.88672	test-auc:0.823285
[18]	train-auc:0.889153	test-auc:0.82541
[19]	train-auc:0.890793	test-auc:0.826439
[20]	train-auc:0.892533	test-auc:0.82

[191]	train-auc:0.989788	test-auc:0.844866
[192]	train-auc:0.989921	test-auc:0.844799
[193]	train-auc:0.990096	test-auc:0.844838
[194]	train-auc:0.990239	test-auc:0.844669
[195]	train-auc:0.99044	test-auc:0.844692
[196]	train-auc:0.99063	test-auc:0.844809
[197]	train-auc:0.990711	test-auc:0.844773
[198]	train-auc:0.990873	test-auc:0.844838
[199]	train-auc:0.991014	test-auc:0.844825
[200]	train-auc:0.991187	test-auc:0.844822
[201]	train-auc:0.991335	test-auc:0.844658
[202]	train-auc:0.991519	test-auc:0.844668
[203]	train-auc:0.991637	test-auc:0.844798
[204]	train-auc:0.991816	test-auc:0.844715
[205]	train-auc:0.991914	test-auc:0.8446
[206]	train-auc:0.992025	test-auc:0.844898
[207]	train-auc:0.992141	test-auc:0.845035
[208]	train-auc:0.992261	test-auc:0.845113
[209]	train-auc:0.992371	test-auc:0.844992
[210]	train-auc:0.992461	test-auc:0.845127
[211]	train-auc:0.992533	test-auc:0.84498
[212]	train-auc:0.992624	test-auc:0.844876
[213]	train-auc:0.992736	test-auc:0.844918
[214]	train-auc:

Unnamed: 0,model_id,train_score,validate_score,test_score
0,0,0.928578,0.837681,0.846773
1,1,0.985017,0.84341,0.853352


In [20]:
# step2：tune max_depth & min_child_weight 
param_test2 = { 'max_depth': range(3, 10, 2), 'min_child_weight': [1,2,3] } 
best_param = tune.gridsearch_cv(param_test2,n_jobs = 4)
tune.dfscore

CV Results: 
+----+-----------------------------------------+------------------+-----------------+
|    |                  params                 | mean_train_score | mean_test_score |
+----+-----------------------------------------+------------------+-----------------+
| 0  | {'max_depth': 3, 'min_child_weight': 1} |  0.898838592337  |  0.813160308926 |
| 1  | {'max_depth': 3, 'min_child_weight': 2} |  0.897251046289  |  0.811684658929 |
| 2  | {'max_depth': 3, 'min_child_weight': 3} |  0.895658499922  |  0.811139201798 |
| 3  | {'max_depth': 5, 'min_child_weight': 1} |  0.985017041091  |  0.843409889344 |
| 4  | {'max_depth': 5, 'min_child_weight': 2} |  0.981994203185  |  0.84420514024  |
| 5  | {'max_depth': 5, 'min_child_weight': 3} |  0.978622076257  |  0.843520651988 |
| 6  | {'max_depth': 7, 'min_child_weight': 1} |  0.999887281675  |  0.857369716106 |
| 7  | {'max_depth': 7, 'min_child_weight': 2} |  0.999664534086  |  0.857004181601 |
| 8  | {'max_depth': 7, 'min_child_weight

Unnamed: 0,model_id,train_score,validate_score,test_score
0,0,0.928578,0.837681,0.846773
1,1,0.985017,0.84341,0.853352
2,2,1.0,0.86335,0.870644


In [21]:
# step3：tune gamma
param_test3 = {'gamma': [i / 10.0 for i in range(0, 5)]}
best_param = tune.gridsearch_cv(param_test3,n_jobs = 4)
tune.dfscore

CV Results: 
+---+----------------+------------------+-----------------+
|   |     params     | mean_train_score | mean_test_score |
+---+----------------+------------------+-----------------+
| 0 | {'gamma': 0.0} |       1.0        |  0.863349832167 |
| 1 | {'gamma': 0.1} |       1.0        |  0.863338453661 |
| 2 | {'gamma': 0.2} |       1.0        |  0.864454613984 |
| 3 | {'gamma': 0.3} |       1.0        |  0.86275494965  |
| 4 | {'gamma': 0.4} |       1.0        |  0.862083262218 |
+---+----------------+------------------+-----------------+
Best Params: 
{'gamma': 0.2}
Best Score: 
0.864454613984184


Unnamed: 0,model_id,train_score,validate_score,test_score
0,0,0.928578,0.837681,0.846773
1,1,0.985017,0.84341,0.853352
2,2,1.0,0.86335,0.870644
3,3,1.0,0.864455,0.871583


In [22]:
# step4：tune subsample & colsample_bytree 
param_test4 = { 'subsample': [i / 10.0 for i in range(6, 10)],
               'colsample_bytree': [i / 10.0 for i in range(6, 10)] } 
best_param = tune.gridsearch_cv(param_test4,n_jobs = 4)
tune.dfscore

CV Results: 
+----+---------------------------------------------+------------------+-----------------+
|    |                    params                   | mean_train_score | mean_test_score |
+----+---------------------------------------------+------------------+-----------------+
| 0  | {'subsample': 0.6, 'colsample_bytree': 0.6} |  0.999999933329  |  0.845483088695 |
| 1  | {'subsample': 0.7, 'colsample_bytree': 0.6} |       1.0        |  0.85151085225  |
| 2  | {'subsample': 0.8, 'colsample_bytree': 0.6} |       1.0        |  0.85653659612  |
| 3  | {'subsample': 0.9, 'colsample_bytree': 0.6} |       1.0        |  0.859387267452 |
| 4  | {'subsample': 0.6, 'colsample_bytree': 0.7} |  0.999999644422  |  0.847333873812 |
| 5  | {'subsample': 0.7, 'colsample_bytree': 0.7} |       1.0        |  0.853970387438 |
| 6  | {'subsample': 0.8, 'colsample_bytree': 0.7} |       1.0        |  0.859113472151 |
| 7  | {'subsample': 0.9, 'colsample_bytree': 0.7} |       1.0        |  0.861292456051

Unnamed: 0,model_id,train_score,validate_score,test_score
0,0,0.928578,0.837681,0.846773
1,1,0.985017,0.84341,0.853352
2,2,1.0,0.86335,0.870644
3,3,1.0,0.864455,0.871583
4,4,1.0,0.869935,0.881892


In [23]:
# step5: tune reg_alpha 
param_test5 = { 'reg_alpha': [1e-5, 1e-2, 0.1, 1, 100] } 
best_param = tune.gridsearch_cv(param_test5,n_jobs = 4)
tune.dfscore

CV Results: 
+---+----------------------+------------------+-----------------+
|   |        params        | mean_train_score | mean_test_score |
+---+----------------------+------------------+-----------------+
| 0 | {'reg_alpha': 1e-05} |       1.0        |  0.869017039313 |
| 1 | {'reg_alpha': 0.01}  |       1.0        |  0.866344512715 |
| 2 |  {'reg_alpha': 0.1}  |       1.0        |  0.86493180008  |
| 3 |   {'reg_alpha': 1}   |  0.999999933329  |  0.862349590374 |
| 4 |  {'reg_alpha': 100}  |  0.742168521008  |  0.726488273027 |
+---+----------------------+------------------+-----------------+
Best Params: 
{'reg_alpha': 1e-05}
Best Score: 
0.8690170393127381


Unnamed: 0,model_id,train_score,validate_score,test_score
0,0,0.928578,0.837681,0.846773
1,1,0.985017,0.84341,0.853352
2,2,1.0,0.86335,0.870644
3,3,1.0,0.864455,0.871583
4,4,1.0,0.869935,0.881892
5,5,1.0,0.869017,0.881892


In [24]:
# step6: tune reg_lambda 
param_test6 = { 'reg_lambda': [1e-5, 1e-2, 0.1, 1, 100, 1000] }
best_param = tune.gridsearch_cv(param_test6,n_jobs = 4)
tune.dfscore

CV Results: 
+---+-----------------------+------------------+-----------------+
|   |         params        | mean_train_score | mean_test_score |
+---+-----------------------+------------------+-----------------+
| 0 | {'reg_lambda': 1e-05} |       1.0        |  0.865687403994 |
| 1 |  {'reg_lambda': 0.01} |       1.0        |  0.867990840303 |
| 2 |  {'reg_lambda': 0.1}  |       1.0        |  0.865423564886 |
| 3 |   {'reg_lambda': 1}   |       1.0        |  0.869017039313 |
| 4 |  {'reg_lambda': 100}  |  0.979617306619  |  0.852049197815 |
| 5 |  {'reg_lambda': 1000} |  0.860412021925  |  0.803147223645 |
+---+-----------------------+------------------+-----------------+
Best Params: 
{'reg_lambda': 1}
Best Score: 
0.8690170393127381


Unnamed: 0,model_id,train_score,validate_score,test_score
0,0,0.928578,0.837681,0.846773
1,1,0.985017,0.84341,0.853352
2,2,1.0,0.86335,0.870644
3,3,1.0,0.864455,0.871583
4,4,1.0,0.869935,0.881892
5,5,1.0,0.869017,0.881892
6,6,1.0,0.869017,0.881892


In [25]:
# step7: lower learning_rate and rise n_estimators
param_test7 = { 'learning_rate': 0.001, 'n_estimators':10000}
tune.params_dict.update(param_test7)
tune.model.set_params(**tune.params_dict)
tune.xgboost_cv(cv_folds= 5, early_stopping_rounds= 100,n_jobs = 4)
tune.dfscore 

[0]	train-auc:0.887421	test-auc:0.747355
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 100 rounds.
[1]	train-auc:0.931896	test-auc:0.794282
[2]	train-auc:0.944839	test-auc:0.809399
[3]	train-auc:0.952864	test-auc:0.819951
[4]	train-auc:0.957529	test-auc:0.826464
[5]	train-auc:0.960881	test-auc:0.830967
[6]	train-auc:0.962552	test-auc:0.833848
[7]	train-auc:0.964029	test-auc:0.835038
[8]	train-auc:0.965419	test-auc:0.838004
[9]	train-auc:0.965727	test-auc:0.83975
[10]	train-auc:0.967358	test-auc:0.841836
[11]	train-auc:0.967954	test-auc:0.842922
[12]	train-auc:0.968281	test-auc:0.843816
[13]	train-auc:0.968512	test-auc:0.845353
[14]	train-auc:0.969072	test-auc:0.845503
[15]	train-auc:0.969492	test-auc:0.846253
[16]	train-auc:0.969923	test-auc:0.847499
[17]	train-auc:0.97045	test-auc:0.848954
[18]	train-auc:0.970724	test-auc:0.848859
[19]	train-auc:0.97078	test-auc:0.84941
[20]	train-auc:0.970844	test-auc

[191]	train-auc:0.975705	test-auc:0.855987
[192]	train-auc:0.975721	test-auc:0.855916
[193]	train-auc:0.975778	test-auc:0.855886
[194]	train-auc:0.975827	test-auc:0.855962
[195]	train-auc:0.975872	test-auc:0.856009
[196]	train-auc:0.975878	test-auc:0.855957
[197]	train-auc:0.975896	test-auc:0.855983
[198]	train-auc:0.975932	test-auc:0.856036
[199]	train-auc:0.975985	test-auc:0.856124
[200]	train-auc:0.975984	test-auc:0.856092
[201]	train-auc:0.97602	test-auc:0.856069
[202]	train-auc:0.976024	test-auc:0.856129
[203]	train-auc:0.976037	test-auc:0.856061
[204]	train-auc:0.976051	test-auc:0.856084
[205]	train-auc:0.976094	test-auc:0.856064
[206]	train-auc:0.976128	test-auc:0.856141
[207]	train-auc:0.97613	test-auc:0.856181
[208]	train-auc:0.976123	test-auc:0.856226
[209]	train-auc:0.97613	test-auc:0.856241
[210]	train-auc:0.976137	test-auc:0.856263
[211]	train-auc:0.976167	test-auc:0.856281
[212]	train-auc:0.976151	test-auc:0.856235
[213]	train-auc:0.976175	test-auc:0.856264
[214]	train-au

[383]	train-auc:0.979168	test-auc:0.858896
[384]	train-auc:0.979199	test-auc:0.858906
[385]	train-auc:0.979227	test-auc:0.858946
[386]	train-auc:0.979251	test-auc:0.858935
[387]	train-auc:0.979255	test-auc:0.858942
[388]	train-auc:0.979278	test-auc:0.858939
[389]	train-auc:0.979293	test-auc:0.858974
[390]	train-auc:0.979306	test-auc:0.858987
[391]	train-auc:0.979327	test-auc:0.859003
[392]	train-auc:0.979346	test-auc:0.858983
[393]	train-auc:0.979368	test-auc:0.859059
[394]	train-auc:0.979383	test-auc:0.859053
[395]	train-auc:0.979426	test-auc:0.859102
[396]	train-auc:0.979428	test-auc:0.85912
[397]	train-auc:0.979437	test-auc:0.859143
[398]	train-auc:0.979465	test-auc:0.859175
[399]	train-auc:0.97949	test-auc:0.859173
[400]	train-auc:0.979512	test-auc:0.859214
[401]	train-auc:0.979524	test-auc:0.859184
[402]	train-auc:0.97955	test-auc:0.859209
[403]	train-auc:0.97957	test-auc:0.859217
[404]	train-auc:0.979581	test-auc:0.859235
[405]	train-auc:0.979613	test-auc:0.859236
[406]	train-auc

[575]	train-auc:0.982243	test-auc:0.861531
[576]	train-auc:0.982246	test-auc:0.861504
[577]	train-auc:0.982261	test-auc:0.861491
[578]	train-auc:0.98228	test-auc:0.861491
[579]	train-auc:0.982294	test-auc:0.861484
[580]	train-auc:0.982312	test-auc:0.861516
[581]	train-auc:0.982337	test-auc:0.861532
[582]	train-auc:0.982364	test-auc:0.86153
[583]	train-auc:0.982384	test-auc:0.861529
[584]	train-auc:0.982394	test-auc:0.861524
[585]	train-auc:0.98241	test-auc:0.861562
[586]	train-auc:0.982421	test-auc:0.861548
[587]	train-auc:0.982437	test-auc:0.861555
[588]	train-auc:0.982461	test-auc:0.861593
[589]	train-auc:0.982475	test-auc:0.861603
[590]	train-auc:0.982497	test-auc:0.861616
[591]	train-auc:0.98251	test-auc:0.861579
[592]	train-auc:0.982516	test-auc:0.86159
[593]	train-auc:0.982528	test-auc:0.861601
[594]	train-auc:0.982547	test-auc:0.861632
[595]	train-auc:0.98257	test-auc:0.861655
[596]	train-auc:0.982586	test-auc:0.861675
[597]	train-auc:0.9826	test-auc:0.86167
[598]	train-auc:0.98

[767]	train-auc:0.984915	test-auc:0.863501
[768]	train-auc:0.984928	test-auc:0.863531
[769]	train-auc:0.984937	test-auc:0.863536
[770]	train-auc:0.984952	test-auc:0.863543
[771]	train-auc:0.984968	test-auc:0.863553
[772]	train-auc:0.98498	test-auc:0.863553
[773]	train-auc:0.984987	test-auc:0.863557
[774]	train-auc:0.985007	test-auc:0.86357
[775]	train-auc:0.985014	test-auc:0.863582
[776]	train-auc:0.985028	test-auc:0.863583
[777]	train-auc:0.985048	test-auc:0.863597
[778]	train-auc:0.985063	test-auc:0.863636
[779]	train-auc:0.985074	test-auc:0.863649
[780]	train-auc:0.98509	test-auc:0.863661
[781]	train-auc:0.985104	test-auc:0.863668
[782]	train-auc:0.985118	test-auc:0.863688
[783]	train-auc:0.985132	test-auc:0.863693
[784]	train-auc:0.985143	test-auc:0.863703
[785]	train-auc:0.985156	test-auc:0.863713
[786]	train-auc:0.985169	test-auc:0.863701
[787]	train-auc:0.985185	test-auc:0.863709
[788]	train-auc:0.985202	test-auc:0.863714
[789]	train-auc:0.985211	test-auc:0.863727
[790]	train-au

[959]	train-auc:0.987288	test-auc:0.865388
[960]	train-auc:0.987295	test-auc:0.865387
[961]	train-auc:0.987311	test-auc:0.86541
[962]	train-auc:0.987318	test-auc:0.865406
[963]	train-auc:0.987328	test-auc:0.865392
[964]	train-auc:0.987337	test-auc:0.865396
[965]	train-auc:0.987347	test-auc:0.86542
[966]	train-auc:0.987358	test-auc:0.865433
[967]	train-auc:0.98737	test-auc:0.865448
[968]	train-auc:0.987381	test-auc:0.86546
[969]	train-auc:0.987394	test-auc:0.865456
[970]	train-auc:0.987407	test-auc:0.865482
[971]	train-auc:0.987418	test-auc:0.8655
[972]	train-auc:0.987428	test-auc:0.865512
[973]	train-auc:0.987441	test-auc:0.865496
[974]	train-auc:0.987447	test-auc:0.865499
[975]	train-auc:0.987455	test-auc:0.865506
[976]	train-auc:0.987464	test-auc:0.865517
[977]	train-auc:0.987476	test-auc:0.865539
[978]	train-auc:0.987484	test-auc:0.865553
[979]	train-auc:0.987498	test-auc:0.865568
[980]	train-auc:0.98751	test-auc:0.865581
[981]	train-auc:0.98752	test-auc:0.865578
[982]	train-auc:0.9

[1148]	train-auc:0.989208	test-auc:0.867046
[1149]	train-auc:0.989213	test-auc:0.86705
[1150]	train-auc:0.989226	test-auc:0.867054
[1151]	train-auc:0.989235	test-auc:0.867066
[1152]	train-auc:0.989243	test-auc:0.867081
[1153]	train-auc:0.989251	test-auc:0.867084
[1154]	train-auc:0.989261	test-auc:0.867075
[1155]	train-auc:0.989272	test-auc:0.867098
[1156]	train-auc:0.989279	test-auc:0.8671
[1157]	train-auc:0.98929	test-auc:0.86711
[1158]	train-auc:0.989303	test-auc:0.867123
[1159]	train-auc:0.989311	test-auc:0.867113
[1160]	train-auc:0.989325	test-auc:0.867131
[1161]	train-auc:0.989337	test-auc:0.867106
[1162]	train-auc:0.989346	test-auc:0.867133
[1163]	train-auc:0.98936	test-auc:0.867141
[1164]	train-auc:0.989367	test-auc:0.867133
[1165]	train-auc:0.989376	test-auc:0.867138
[1166]	train-auc:0.989384	test-auc:0.867129
[1167]	train-auc:0.989396	test-auc:0.867154
[1168]	train-auc:0.989403	test-auc:0.867151
[1169]	train-auc:0.989415	test-auc:0.867164
[1170]	train-auc:0.989423	test-auc:0.8

[1335]	train-auc:0.99083	test-auc:0.868251
[1336]	train-auc:0.990838	test-auc:0.86826
[1337]	train-auc:0.990848	test-auc:0.868266
[1338]	train-auc:0.990857	test-auc:0.868278
[1339]	train-auc:0.990866	test-auc:0.868276
[1340]	train-auc:0.990873	test-auc:0.868287
[1341]	train-auc:0.990882	test-auc:0.868313
[1342]	train-auc:0.99089	test-auc:0.868306
[1343]	train-auc:0.990897	test-auc:0.868323
[1344]	train-auc:0.990906	test-auc:0.868336
[1345]	train-auc:0.990913	test-auc:0.868333
[1346]	train-auc:0.990922	test-auc:0.868346
[1347]	train-auc:0.99093	test-auc:0.868346
[1348]	train-auc:0.990936	test-auc:0.868355
[1349]	train-auc:0.990943	test-auc:0.868354
[1350]	train-auc:0.99095	test-auc:0.86837
[1351]	train-auc:0.99096	test-auc:0.86837
[1352]	train-auc:0.990967	test-auc:0.86838
[1353]	train-auc:0.990976	test-auc:0.868384
[1354]	train-auc:0.990984	test-auc:0.868392
[1355]	train-auc:0.990992	test-auc:0.868392
[1356]	train-auc:0.990998	test-auc:0.868404
[1357]	train-auc:0.991006	test-auc:0.8684

[1523]	train-auc:0.992277	test-auc:0.869409
[1524]	train-auc:0.992285	test-auc:0.869417
[1525]	train-auc:0.992293	test-auc:0.869422
[1526]	train-auc:0.992299	test-auc:0.869426
[1527]	train-auc:0.992307	test-auc:0.869426
[1528]	train-auc:0.992315	test-auc:0.869432
[1529]	train-auc:0.992321	test-auc:0.86944
[1530]	train-auc:0.992329	test-auc:0.869438
[1531]	train-auc:0.992338	test-auc:0.869453
[1532]	train-auc:0.992345	test-auc:0.869457
[1533]	train-auc:0.992351	test-auc:0.869459
[1534]	train-auc:0.992361	test-auc:0.869469
[1535]	train-auc:0.992364	test-auc:0.869478
[1536]	train-auc:0.992372	test-auc:0.869492
[1537]	train-auc:0.992379	test-auc:0.869497
[1538]	train-auc:0.992385	test-auc:0.869499
[1539]	train-auc:0.992391	test-auc:0.8695
[1540]	train-auc:0.9924	test-auc:0.869498
[1541]	train-auc:0.992406	test-auc:0.869501
[1542]	train-auc:0.992414	test-auc:0.869502
[1543]	train-auc:0.992422	test-auc:0.869509
[1544]	train-auc:0.992429	test-auc:0.869508
[1545]	train-auc:0.992436	test-auc:0.

[1711]	train-auc:0.993549	test-auc:0.870466
[1712]	train-auc:0.993554	test-auc:0.870477
[1713]	train-auc:0.993561	test-auc:0.870482
[1714]	train-auc:0.993567	test-auc:0.87048
[1715]	train-auc:0.993573	test-auc:0.870462
[1716]	train-auc:0.993579	test-auc:0.870465
[1717]	train-auc:0.993588	test-auc:0.870478
[1718]	train-auc:0.993592	test-auc:0.870483
[1719]	train-auc:0.993598	test-auc:0.870505
[1720]	train-auc:0.993603	test-auc:0.870496
[1721]	train-auc:0.993609	test-auc:0.870493
[1722]	train-auc:0.993616	test-auc:0.870498
[1723]	train-auc:0.993621	test-auc:0.870507
[1724]	train-auc:0.993626	test-auc:0.870507
[1725]	train-auc:0.993632	test-auc:0.870506
[1726]	train-auc:0.993639	test-auc:0.870505
[1727]	train-auc:0.993645	test-auc:0.870519
[1728]	train-auc:0.99365	test-auc:0.870524
[1729]	train-auc:0.993656	test-auc:0.870524
[1730]	train-auc:0.993661	test-auc:0.870539
[1731]	train-auc:0.993668	test-auc:0.870549
[1732]	train-auc:0.993674	test-auc:0.870548
[1733]	train-auc:0.993678	test-auc

[1899]	train-auc:0.994615	test-auc:0.871445
[1900]	train-auc:0.99462	test-auc:0.871454
[1901]	train-auc:0.994626	test-auc:0.87146
[1902]	train-auc:0.994631	test-auc:0.871461
[1903]	train-auc:0.994637	test-auc:0.871471
[1904]	train-auc:0.994643	test-auc:0.871467
[1905]	train-auc:0.994648	test-auc:0.871467
[1906]	train-auc:0.994654	test-auc:0.871484
[1907]	train-auc:0.994659	test-auc:0.871481
[1908]	train-auc:0.994665	test-auc:0.871481
[1909]	train-auc:0.994671	test-auc:0.871488
[1910]	train-auc:0.994676	test-auc:0.871511
[1911]	train-auc:0.99468	test-auc:0.871505
[1912]	train-auc:0.994685	test-auc:0.871505
[1913]	train-auc:0.994692	test-auc:0.871504
[1914]	train-auc:0.994698	test-auc:0.871497
[1915]	train-auc:0.994704	test-auc:0.871488
[1916]	train-auc:0.994709	test-auc:0.871508
[1917]	train-auc:0.994713	test-auc:0.871521
[1918]	train-auc:0.994717	test-auc:0.871524
[1919]	train-auc:0.994723	test-auc:0.871529
[1920]	train-auc:0.994727	test-auc:0.87154
[1921]	train-auc:0.994732	test-auc:0

[2086]	train-auc:0.995525	test-auc:0.872148
[2087]	train-auc:0.995528	test-auc:0.872149
[2088]	train-auc:0.995533	test-auc:0.872167
[2089]	train-auc:0.995538	test-auc:0.872174
[2090]	train-auc:0.995543	test-auc:0.872182
[2091]	train-auc:0.995548	test-auc:0.872173
[2092]	train-auc:0.995553	test-auc:0.872172
[2093]	train-auc:0.995558	test-auc:0.87218
[2094]	train-auc:0.995563	test-auc:0.872171
[2095]	train-auc:0.995566	test-auc:0.872174
[2096]	train-auc:0.99557	test-auc:0.872184
[2097]	train-auc:0.995574	test-auc:0.872185
[2098]	train-auc:0.995579	test-auc:0.8722
[2099]	train-auc:0.995584	test-auc:0.872202
[2100]	train-auc:0.995587	test-auc:0.872196
[2101]	train-auc:0.995591	test-auc:0.872197
[2102]	train-auc:0.995598	test-auc:0.872203
[2103]	train-auc:0.995602	test-auc:0.872204
[2104]	train-auc:0.995605	test-auc:0.872204
[2105]	train-auc:0.995612	test-auc:0.872209
[2106]	train-auc:0.995617	test-auc:0.872223
[2107]	train-auc:0.995623	test-auc:0.872223
[2108]	train-auc:0.995627	test-auc:0

[2274]	train-auc:0.996301	test-auc:0.872674
[2275]	train-auc:0.996305	test-auc:0.872679
[2276]	train-auc:0.996308	test-auc:0.872667
[2277]	train-auc:0.996312	test-auc:0.872675
[2278]	train-auc:0.996316	test-auc:0.872681
[2279]	train-auc:0.996319	test-auc:0.872685
[2280]	train-auc:0.996322	test-auc:0.872682
[2281]	train-auc:0.996326	test-auc:0.872675
[2282]	train-auc:0.996329	test-auc:0.87268
[2283]	train-auc:0.996333	test-auc:0.872688
[2284]	train-auc:0.996337	test-auc:0.872692
[2285]	train-auc:0.996342	test-auc:0.872696
[2286]	train-auc:0.996346	test-auc:0.872714
[2287]	train-auc:0.99635	test-auc:0.872719
[2288]	train-auc:0.996354	test-auc:0.872726
[2289]	train-auc:0.996357	test-auc:0.872718
[2290]	train-auc:0.996361	test-auc:0.87272
[2291]	train-auc:0.996365	test-auc:0.872719
[2292]	train-auc:0.996369	test-auc:0.872716
[2293]	train-auc:0.996372	test-auc:0.872725
[2294]	train-auc:0.996373	test-auc:0.87272
[2295]	train-auc:0.996377	test-auc:0.872724
[2296]	train-auc:0.99638	test-auc:0.

[2462]	train-auc:0.996951	test-auc:0.873265
[2463]	train-auc:0.996955	test-auc:0.873268
[2464]	train-auc:0.996957	test-auc:0.873277
[2465]	train-auc:0.996961	test-auc:0.873267
[2466]	train-auc:0.996964	test-auc:0.873273
[2467]	train-auc:0.996968	test-auc:0.873281
[2468]	train-auc:0.996971	test-auc:0.873273
[2469]	train-auc:0.996975	test-auc:0.87328
[2470]	train-auc:0.996979	test-auc:0.873281
[2471]	train-auc:0.996982	test-auc:0.873288
[2472]	train-auc:0.996986	test-auc:0.873305
[2473]	train-auc:0.996989	test-auc:0.873296
[2474]	train-auc:0.996994	test-auc:0.873305
[2475]	train-auc:0.996996	test-auc:0.87332
[2476]	train-auc:0.996999	test-auc:0.873321
[2477]	train-auc:0.997003	test-auc:0.873321
[2478]	train-auc:0.997006	test-auc:0.873338
[2479]	train-auc:0.997009	test-auc:0.873343
[2480]	train-auc:0.997012	test-auc:0.873347
[2481]	train-auc:0.997016	test-auc:0.873354
[2482]	train-auc:0.997018	test-auc:0.873353
[2483]	train-auc:0.997021	test-auc:0.873354
[2484]	train-auc:0.997024	test-auc

[2650]	train-auc:0.997497	test-auc:0.873715
[2651]	train-auc:0.997499	test-auc:0.873716
[2652]	train-auc:0.997501	test-auc:0.873728
[2653]	train-auc:0.997504	test-auc:0.873723
[2654]	train-auc:0.997508	test-auc:0.873727
[2655]	train-auc:0.997511	test-auc:0.873733
[2656]	train-auc:0.997513	test-auc:0.87372
[2657]	train-auc:0.997516	test-auc:0.873721
[2658]	train-auc:0.99752	test-auc:0.873713
[2659]	train-auc:0.997523	test-auc:0.87372
[2660]	train-auc:0.997526	test-auc:0.873731
[2661]	train-auc:0.997528	test-auc:0.873723
[2662]	train-auc:0.997531	test-auc:0.873729
[2663]	train-auc:0.997534	test-auc:0.87374
[2664]	train-auc:0.997537	test-auc:0.873737
[2665]	train-auc:0.997538	test-auc:0.873737
[2666]	train-auc:0.997541	test-auc:0.873731
[2667]	train-auc:0.997545	test-auc:0.873741
[2668]	train-auc:0.997548	test-auc:0.87374
[2669]	train-auc:0.997551	test-auc:0.873751
[2670]	train-auc:0.997554	test-auc:0.873747
[2671]	train-auc:0.997556	test-auc:0.873745
[2672]	train-auc:0.997559	test-auc:0.

[2838]	train-auc:0.99795	test-auc:0.873963
[2839]	train-auc:0.997952	test-auc:0.873968
[2840]	train-auc:0.997953	test-auc:0.873961
[2841]	train-auc:0.997956	test-auc:0.873967
[2842]	train-auc:0.997959	test-auc:0.873963
[2843]	train-auc:0.99796	test-auc:0.873963
[2844]	train-auc:0.997963	test-auc:0.873962
[2845]	train-auc:0.997965	test-auc:0.87397
[2846]	train-auc:0.997967	test-auc:0.873972
[2847]	train-auc:0.997969	test-auc:0.873986
[2848]	train-auc:0.997971	test-auc:0.873993
[2849]	train-auc:0.997973	test-auc:0.873994
[2850]	train-auc:0.997975	test-auc:0.873996
[2851]	train-auc:0.997977	test-auc:0.874001
[2852]	train-auc:0.997979	test-auc:0.874
[2853]	train-auc:0.997981	test-auc:0.874001
[2854]	train-auc:0.997984	test-auc:0.874006
[2855]	train-auc:0.997986	test-auc:0.874006
[2856]	train-auc:0.997988	test-auc:0.874012
[2857]	train-auc:0.997989	test-auc:0.874012
[2858]	train-auc:0.997991	test-auc:0.874018
[2859]	train-auc:0.997994	test-auc:0.874017
[2860]	train-auc:0.997996	test-auc:0.8

[3026]	train-auc:0.998302	test-auc:0.874376
[3027]	train-auc:0.998303	test-auc:0.874372
[3028]	train-auc:0.998305	test-auc:0.874386
[3029]	train-auc:0.998308	test-auc:0.874379
[3030]	train-auc:0.99831	test-auc:0.874384
[3031]	train-auc:0.998312	test-auc:0.874391
[3032]	train-auc:0.998313	test-auc:0.874396
[3033]	train-auc:0.998314	test-auc:0.874394
[3034]	train-auc:0.998316	test-auc:0.874392
[3035]	train-auc:0.998318	test-auc:0.874394
[3036]	train-auc:0.99832	test-auc:0.874395
[3037]	train-auc:0.998321	test-auc:0.874401
[3038]	train-auc:0.998324	test-auc:0.874402
[3039]	train-auc:0.998325	test-auc:0.874405
[3040]	train-auc:0.998327	test-auc:0.874399
[3041]	train-auc:0.998328	test-auc:0.874395
[3042]	train-auc:0.99833	test-auc:0.8744
[3043]	train-auc:0.998331	test-auc:0.874398
[3044]	train-auc:0.998333	test-auc:0.874395
[3045]	train-auc:0.998335	test-auc:0.874408
[3046]	train-auc:0.998337	test-auc:0.874418
[3047]	train-auc:0.998338	test-auc:0.874426
[3048]	train-auc:0.998341	test-auc:0.

[3213]	train-auc:0.998588	test-auc:0.874669
[3214]	train-auc:0.998589	test-auc:0.874663
[3215]	train-auc:0.99859	test-auc:0.874669
[3216]	train-auc:0.998592	test-auc:0.874671
[3217]	train-auc:0.998593	test-auc:0.87467
[3218]	train-auc:0.998594	test-auc:0.874672
[3219]	train-auc:0.998596	test-auc:0.874671
[3220]	train-auc:0.998597	test-auc:0.874669
[3221]	train-auc:0.998598	test-auc:0.874681
[3222]	train-auc:0.998599	test-auc:0.874682
[3223]	train-auc:0.9986	test-auc:0.874687
[3224]	train-auc:0.998602	test-auc:0.874685
[3225]	train-auc:0.998603	test-auc:0.874692
[3226]	train-auc:0.998604	test-auc:0.874695
[3227]	train-auc:0.998605	test-auc:0.874694
[3228]	train-auc:0.998606	test-auc:0.874694
[3229]	train-auc:0.998607	test-auc:0.874699
[3230]	train-auc:0.998609	test-auc:0.874708
[3231]	train-auc:0.99861	test-auc:0.874707
[3232]	train-auc:0.998612	test-auc:0.874707
[3233]	train-auc:0.998613	test-auc:0.874703
[3234]	train-auc:0.998614	test-auc:0.874697
[3235]	train-auc:0.998615	test-auc:0.

[3401]	train-auc:0.998805	test-auc:0.874961
[3402]	train-auc:0.998806	test-auc:0.874959
[3403]	train-auc:0.998807	test-auc:0.874958
[3404]	train-auc:0.998807	test-auc:0.874965
[3405]	train-auc:0.998808	test-auc:0.874969
[3406]	train-auc:0.998809	test-auc:0.874967
[3407]	train-auc:0.99881	test-auc:0.874974
[3408]	train-auc:0.998812	test-auc:0.874974
[3409]	train-auc:0.998813	test-auc:0.874975
[3410]	train-auc:0.998815	test-auc:0.874981
[3411]	train-auc:0.998816	test-auc:0.874979
[3412]	train-auc:0.998816	test-auc:0.874983
[3413]	train-auc:0.998818	test-auc:0.874989
[3414]	train-auc:0.998818	test-auc:0.874989
[3415]	train-auc:0.998819	test-auc:0.874995
[3416]	train-auc:0.99882	test-auc:0.874992
[3417]	train-auc:0.998821	test-auc:0.874988
[3418]	train-auc:0.998821	test-auc:0.874991
[3419]	train-auc:0.998822	test-auc:0.874993
[3420]	train-auc:0.998824	test-auc:0.875005
[3421]	train-auc:0.998825	test-auc:0.874991
[3422]	train-auc:0.998826	test-auc:0.874979
[3423]	train-auc:0.998828	test-auc

[3588]	train-auc:0.998987	test-auc:0.875156
[3589]	train-auc:0.998988	test-auc:0.875153
[3590]	train-auc:0.998989	test-auc:0.875162
[3591]	train-auc:0.99899	test-auc:0.87516
[3592]	train-auc:0.99899	test-auc:0.875162
[3593]	train-auc:0.998992	test-auc:0.87516
[3594]	train-auc:0.998993	test-auc:0.875164
[3595]	train-auc:0.998994	test-auc:0.875161
[3596]	train-auc:0.998995	test-auc:0.875157
[3597]	train-auc:0.998996	test-auc:0.87516
[3598]	train-auc:0.998997	test-auc:0.87515
[3599]	train-auc:0.998998	test-auc:0.875153
[3600]	train-auc:0.998999	test-auc:0.87515
[3601]	train-auc:0.998999	test-auc:0.875156
[3602]	train-auc:0.999	test-auc:0.875158
[3603]	train-auc:0.999001	test-auc:0.875164
[3604]	train-auc:0.999002	test-auc:0.875163
[3605]	train-auc:0.999003	test-auc:0.875168
[3606]	train-auc:0.999003	test-auc:0.875168
[3607]	train-auc:0.999004	test-auc:0.875171
[3608]	train-auc:0.999005	test-auc:0.875166
[3609]	train-auc:0.999007	test-auc:0.875162
[3610]	train-auc:0.999007	test-auc:0.87517

Unnamed: 0,model_id,train_score,validate_score,test_score
0,0,0.928578,0.837681,0.846773
1,1,0.985017,0.84341,0.853352
2,2,1.0,0.86335,0.870644
3,3,1.0,0.864455,0.871583
4,4,1.0,0.869935,0.881892
5,5,1.0,0.869017,0.881892
6,6,1.0,0.869017,0.881892
7,7,0.998971,0.873659,0.886452
