In [None]:
from  tunning import Tunning

In [None]:
from __future__ import print_function
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
import xgboost as xgb
from tianjikit.tunning import Tunning


data,label = datasets.make_classification(n_samples= 10000, n_features=20, n_informative= 6 ,
             n_classes=2, n_clusters_per_class=10,random_state=0)
dfdata = pd.DataFrame(data,columns = [u'f'+str(i) for i in range(data.shape[1])])
dfdata['label'] = label
dftrain,dftest = train_test_split(dfdata)

In [None]:
# 构造初始化参数
params_dict = dict()
# 以下为待调整参数
# booster参数
params_dict['learning_rate'] = 0.1        # 学习率，初始值为 0.1，通常越小越好。
params_dict['n_estimators'] = 60          # 加法模型树的数量，初始值为50，通常通过模型cv确认。
# tree参数
params_dict['max_depth'] = 3              # 树的深度，通常取值在[3,10]之间，初始值常取[3,6]之间
params_dict['min_child_weight']=10        # 最小叶子节点样本权重和，越大模型越保守。
params_dict['gamma']= 0                   # 节点分裂所需的最小损失函数下降值，越大模型越保守。
params_dict['subsample']= 0.8             # 横向采样，样本采样比例，通常取值在 [0.5，1]之间 
params_dict['colsample_bytree'] = 1.0     # 纵向采样，特征采样比例，通常取值在 [0.5，1]之间 
# regulazation参数 
# Omega(f) = gamma*T + reg_alpha* sum(abs(wj)) + reg_lambda* sum(wj**2) 
params_dict['reg_alpha'] = 0              #L1 正则化项的权重系数，越大模型越保守，通常取值在[0,1]之间。
params_dict['reg_lambda'] = 1             #L2 正则化项的权重系数，越大模型越保守，通常取值在[1,100]之间。
# 以下参数通常不需要调整
params_dict['objective'] = 'binary:logistic'
params_dict['tree_method'] = 'hist'       # 构建树的策略,可以是auto, exact, approx, hist
params_dict['eval_metric'] =  'auc'
params_dict['silent'] = 1
params_dict['scale_pos_weight'] = 1       #不平衡样本时设定为正值可以使算法更快收敛。
params_dict['seed'] = 0

In [None]:
# step0: 初始化
tune = Tunning(dftrain,dftest,score_func = 'ks',score_gap_limit = 0.05,params_dict=params_dict,n_jobs=4)

# step1: tune n_estimators for relatively high learning_rate
params_test1 = {'learning_rate': [0.1],'n_estimators':[50]} 
tune.gridsearch_cv(params_test1,cv = 5,verbose_eval = 10)

# step2：tune max_depth & min_child_weight 
params_test2 = { 'max_depth': [3], 'min_child_weight': [50,100,200] } 
tune.gridsearch_cv(params_test2,cv = 5,verbose_eval = 10)

'''
# step3：tune gamma
params_test3 = {'gamma': [0.1,0.5,1]}
tune.gridsearch_cv(params_test3,cv = 5,verbose_eval = 10)


# step4：tune subsample & colsample_bytree 
params_test4 = { 'subsample': [0.9,1.0],'colsample_bytree': [1.0] } 
tune.gridsearch_cv(params_test4,cv = 5,verbose_eval = 10)


# step5: tune reg_alpha 
params_test5 = { 'reg_alpha': [0.1,1] } 
tune.gridsearch_cv(params_test5,cv = 5,verbose_eval = 10)


# step6: tune reg_lambda 
params_test6 = { 'reg_lambda': [0,0.1] }
tune.gridsearch_cv(params_test6,cv = 5,verbose_eval = 10)


# step7: lower learning_rate and rise n_estimators
params_test7 = { 'learning_rate':[0.08,0.09], 'n_estimators':[100]}
tune.gridsearch_cv(params_test7,cv = 5)
'''

In [None]:
# step8: train model with tuned parameters and fully train dataset.
bst,dfimportance = tune.train_best()
bst.save_model('./bst.model')
dfimportance.to_csv('./dfimportance.csv',sep = '\t')

In [None]:
dfimportance

In [1]:
%run runpipeline.py


task_name: example 


PART 1:START ANALYSIS FEATURES...


start basic_analysises...
[total|done|todo]
[31|31|0]


start ks_analysis...
[total|done|todo]
[31|31|0]


start psi_analysises...
[total|done|todo]
[31|31|0]
save results... 


PART 2:START TUNNING XGBOOT...


train set size: 426
test set size: 143
feature number: 32
score func: ks
score gap limit: 0.03
n_jobs: 16


step1: tune n_estimators for relatively high learning_rate...


{'n_estimators': 50, 'learning_rate': 0.1}


k = 1
[19:22:49] Tree method is selected to be 'hist', which uses a single updater grow_fast_histmaker.
[0]	train-auc:0.928826	valid-auc:0.943396	test-auc:0.866758	train-ks:0.857651	valid-ks:0.886792	test-ks:0.733516
[20]	train-auc:0.965761	valid-auc:0.970224	test-auc:0.930156	train-ks:0.867114	valid-ks:0.924528	test-ks:0.791209
[40]	train-auc:0.965761	valid-auc:0.970224	test-auc:0.930156	train-ks:0.867114	valid-ks:0.924528	test-ks:0.791209
[49]	train-auc:0.965761	valid-auc:0.970224	test-auc:0.930156	train-k

[40]	train-auc:0.5	valid-auc:0.5	test-auc:0.5	train-ks:0	valid-ks:0	test-ks:0
[49]	train-auc:0.5	valid-auc:0.5	test-auc:0.5	train-ks:0	valid-ks:0	test-ks:0


k = 3
[19:22:53] Tree method is selected to be 'hist', which uses a single updater grow_fast_histmaker.
[0]	train-auc:0.5	valid-auc:0.5	test-auc:0.5	train-ks:0	valid-ks:0	test-ks:0
[20]	train-auc:0.5	valid-auc:0.5	test-auc:0.5	train-ks:0	valid-ks:0	test-ks:0
[40]	train-auc:0.5	valid-auc:0.5	test-auc:0.5	train-ks:0	valid-ks:0	test-ks:0
[49]	train-auc:0.5	valid-auc:0.5	test-auc:0.5	train-ks:0	valid-ks:0	test-ks:0


k = 4
[19:22:54] Tree method is selected to be 'hist', which uses a single updater grow_fast_histmaker.
[0]	train-auc:0.5	valid-auc:0.5	test-auc:0.5	train-ks:0	valid-ks:0	test-ks:0
[20]	train-auc:0.5	valid-auc:0.5	test-auc:0.5	train-ks:0	valid-ks:0	test-ks:0
[40]	train-auc:0.5	valid-auc:0.5	test-auc:0.5	train-ks:0	valid-ks:0	test-ks:0
[49]	train-auc:0.5	valid-auc:0.5	test-auc:0.5	train-ks:0	valid-ks:0	test-ks:0


k = 5
[1

[0]	train-auc:0.935868	valid-auc:0.931014	test-auc:0.868132	train-ks:0.871736	valid-ks:0.862028	test-ks:0.736264
[20]	train-auc:0.970272	valid-auc:0.966686	test-auc:0.915152	train-ks:0.888938	valid-ks:0.862028	test-ks:0.760989
[40]	train-auc:0.970272	valid-auc:0.966686	test-auc:0.915152	train-ks:0.888938	valid-ks:0.862028	test-ks:0.760989
[49]	train-auc:0.970272	valid-auc:0.966686	test-auc:0.915152	train-ks:0.888938	valid-ks:0.862028	test-ks:0.760989


k = 2
[19:23:02] Tree method is selected to be 'hist', which uses a single updater grow_fast_histmaker.
[0]	train-auc:0.910835	valid-auc:0.912146	test-auc:0.85989	train-ks:0.82167	valid-ks:0.824292	test-ks:0.71978
[20]	train-auc:0.972785	valid-auc:0.997642	test-auc:0.932904	train-ks:0.879621	valid-ks:0.949882	test-ks:0.793956
[40]	train-auc:0.972785	valid-auc:0.997642	test-auc:0.932904	train-ks:0.879621	valid-ks:0.949882	test-ks:0.793956
[49]	train-auc:0.972785	valid-auc:0.997642	test-auc:0.932904	train-ks:0.879621	valid-ks:0.949882	test

[20]	train-auc:0.957233	valid-auc:0.990566	test-auc:0.924662	train-ks:0.856074	valid-ks:0.981132	test-ks:0.788462
[40]	train-auc:0.957233	valid-auc:0.990566	test-auc:0.924662	train-ks:0.856074	valid-ks:0.981132	test-ks:0.788462
[49]	train-auc:0.957233	valid-auc:0.990566	test-auc:0.924662	train-ks:0.856074	valid-ks:0.981132	test-ks:0.788462


k = 4
[19:23:09] Tree method is selected to be 'hist', which uses a single updater grow_fast_histmaker.
[0]	train-auc:0.932732	valid-auc:0.934257	test-auc:0.862637	train-ks:0.865464	valid-ks:0.868514	test-ks:0.725275
[20]	train-auc:0.980047	valid-auc:0.975531	test-auc:0.941991	train-ks:0.899941	valid-ks:0.899764	test-ks:0.785714
[40]	train-auc:0.980047	valid-auc:0.975531	test-auc:0.941991	train-ks:0.899941	valid-ks:0.899764	test-ks:0.785714
[49]	train-auc:0.980047	valid-auc:0.975531	test-auc:0.941991	train-ks:0.899941	valid-ks:0.899764	test-ks:0.785714


k = 5
[19:23:09] Tree method is selected to be 'hist', which uses a single updater grow_fast_hi

[20]	train-auc:0.979405	valid-auc:0.982311	test-auc:0.940617	train-ks:0.890515	valid-ks:0.924528	test-ks:0.802198
[40]	train-auc:0.979405	valid-auc:0.982311	test-auc:0.940617	train-ks:0.890515	valid-ks:0.924528	test-ks:0.802198
[49]	train-auc:0.979405	valid-auc:0.982311	test-auc:0.940617	train-ks:0.890515	valid-ks:0.924528	test-ks:0.802198


k = 5
[19:23:12] Tree method is selected to be 'hist', which uses a single updater grow_fast_histmaker.
[0]	train-auc:0.939637	valid-auc:0.887731	test-auc:0.854396	train-ks:0.879275	valid-ks:0.775463	test-ks:0.708791
[20]	train-auc:0.98373	valid-auc:0.980035	test-auc:0.94421	train-ks:0.89033	valid-ks:0.925926	test-ks:0.807692
[40]	train-auc:0.98373	valid-auc:0.980035	test-auc:0.94421	train-ks:0.89033	valid-ks:0.925926	test-ks:0.807692
[49]	train-auc:0.98373	valid-auc:0.980035	test-auc:0.94421	train-ks:0.89033	valid-ks:0.925926	test-ks:0.807692


{'subsample': 1.0, 'colsample_bytree': 1.0}


k = 1
[19:23:13] Tree method is selected to be 'hist', whi

[0]	train-auc:0.93275	valid-auc:0.943396	test-auc:0.868132	train-ks:0.8655	valid-ks:0.886792	test-ks:0.736264
[20]	train-auc:0.977681	valid-auc:0.99941	test-auc:0.94104	train-ks:0.881125	valid-ks:0.981132	test-ks:0.78022
[40]	train-auc:0.977681	valid-auc:0.99941	test-auc:0.94104	train-ks:0.881125	valid-ks:0.981132	test-ks:0.78022
[49]	train-auc:0.977681	valid-auc:0.99941	test-auc:0.94104	train-ks:0.881125	valid-ks:0.981132	test-ks:0.78022


k = 2
[19:23:17] Tree method is selected to be 'hist', which uses a single updater grow_fast_histmaker.
[0]	train-auc:0.931191	valid-auc:0.956073	test-auc:0.854396	train-ks:0.862383	valid-ks:0.912146	test-ks:0.708791
[20]	train-auc:0.981624	valid-auc:0.975236	test-auc:0.945478	train-ks:0.888938	valid-ks:0.912146	test-ks:0.777473
[40]	train-auc:0.981624	valid-auc:0.975236	test-auc:0.945478	train-ks:0.888938	valid-ks:0.912146	test-ks:0.777473
[49]	train-auc:0.981624	valid-auc:0.975236	test-auc:0.945478	train-ks:0.888938	valid-ks:0.912146	test-ks:0.777

[0]	train-auc:0.945257	valid-auc:0.899764	test-auc:0.854396	train-ks:0.890515	valid-ks:0.799528	test-ks:0.708791
[20]	train-auc:0.96941	valid-auc:0.958432	test-auc:0.921915	train-ks:0.893633	valid-ks:0.805425	test-ks:0.752747
[40]	train-auc:0.96941	valid-auc:0.958432	test-auc:0.921915	train-ks:0.893633	valid-ks:0.805425	test-ks:0.752747
[49]	train-auc:0.96941	valid-auc:0.958432	test-auc:0.921915	train-ks:0.893633	valid-ks:0.805425	test-ks:0.752747


k = 2
[19:23:24] Tree method is selected to be 'hist', which uses a single updater grow_fast_histmaker.
[0]	train-auc:0.929632	valid-auc:0.962264	test-auc:0.854396	train-ks:0.859265	valid-ks:0.924528	test-ks:0.708791
[20]	train-auc:0.979057	valid-auc:0.978479	test-auc:0.953297	train-ks:0.87489	valid-ks:0.943396	test-ks:0.793956
[40]	train-auc:0.979057	valid-auc:0.978479	test-auc:0.953297	train-ks:0.87489	valid-ks:0.943396	test-ks:0.793956
[49]	train-auc:0.979057	valid-auc:0.978479	test-auc:0.953297	train-ks:0.87489	valid-ks:0.943396	test-ks

[19:23:30] Tree method is selected to be 'hist', which uses a single updater grow_fast_histmaker.
[0]	train-auc:0.935098	valid-auc:0.884139	test-auc:0.877747	train-ks:0.870195	valid-ks:0.768278	test-ks:0.755495
[20]	train-auc:0.983165	valid-auc:0.970224	test-auc:0.948964	train-ks:0.906213	valid-ks:0.886792	test-ks:0.813187
[40]	train-auc:0.983165	valid-auc:0.970224	test-auc:0.948964	train-ks:0.906213	valid-ks:0.886792	test-ks:0.813187
[60]	train-auc:0.983165	valid-auc:0.970224	test-auc:0.948964	train-ks:0.906213	valid-ks:0.886792	test-ks:0.813187
[80]	train-auc:0.983165	valid-auc:0.970224	test-auc:0.948964	train-ks:0.906213	valid-ks:0.886792	test-ks:0.813187
[99]	train-auc:0.983165	valid-auc:0.970224	test-auc:0.948964	train-ks:0.906213	valid-ks:0.886792	test-ks:0.813187


k = 2
[19:23:31] Tree method is selected to be 'hist', which uses a single updater grow_fast_histmaker.
[0]	train-auc:0.933539	valid-auc:0.937205	test-auc:0.873626	train-ks:0.867077	valid-ks:0.87441	test-ks:0.747253
[

[20]	train-auc:0.979584	valid-auc:0.972222	test-auc:0.938715	train-ks:0.905808	valid-ks:0.876157	test-ks:0.791209
[40]	train-auc:0.979584	valid-auc:0.972222	test-auc:0.938715	train-ks:0.905808	valid-ks:0.876157	test-ks:0.791209
[60]	train-auc:0.979584	valid-auc:0.972222	test-auc:0.938715	train-ks:0.905808	valid-ks:0.876157	test-ks:0.791209
[80]	train-auc:0.979584	valid-auc:0.972222	test-auc:0.938715	train-ks:0.905808	valid-ks:0.876157	test-ks:0.791209
[99]	train-auc:0.979584	valid-auc:0.972222	test-auc:0.938715	train-ks:0.905808	valid-ks:0.876157	test-ks:0.791209


Tested params:
+----+----------+---------------+--------------+-----------+------------------+-------+-----------+------------------+-----------+------------+
|    | model_id | learning_rate | n_estimators | max_depth | min_child_weight | gamma | subsample | colsample_bytree | reg_alpha | reg_lambda |
+----+----------+---------------+--------------+-----------+------------------+-------+-----------+------------------+-------

[0]	train-auc:0.944487	valid-auc:0.893278	train-ks:0.888974	valid-ks:0.786557
[20]	train-auc:0.983935	valid-auc:0.954304	train-ks:0.924919	valid-ks:0.798939
[40]	train-auc:0.983935	valid-auc:0.954304	train-ks:0.924919	valid-ks:0.798939
[49]	train-auc:0.983935	valid-auc:0.954304	train-ks:0.924919	valid-ks:0.798939

train: ks = 0.92492 	 auc = 0.983971537559 
+---+-------------------+-----------+-------------+-------------+---------------+----------+
|   |  feature_interval | order_num | order_ratio | overdue_num | overdue_ratio | ks_value |
+---+-------------------+-----------+-------------+-------------+---------------+----------+
| 0 |  [0.33262,0.5616) |    136    |     0.4     |      11     |    0.08088    | 0.92492  |
| 1 |  [0.5616,0.77253) |     33    |     0.1     |      31     |    0.93939    | 0.79501  |
| 2 | [0.77253,0.83485] |    172    |     0.5     |     171     |    0.99419    |   0.0    |
+---+-------------------+-----------+-------------+-------------+---------------+-