In [1]:
import time
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import re
import gc
from sklearn import preprocessing
import matplotlib as mpl
from functools import wraps
import os
from sklearn.preprocessing import LabelEncoder
# from sklearn.externals import joblib
from joblib import Parallel, delayed
import multiprocessing
from datetime import timedelta
from io import StringIO
# from sklearn.externals import joblib
import lightgbm as lgb
from sklearn.metrics import roc_auc_score

pd.set_option('display.max_columns',None) 
pd.set_option('expand_frame_repr', False)

#我的多核函数，可以提高训练速度，考虑到总行复现环境不确定性，就没有用
def applyParallel(dfGrouped, func):
    ret = Parallel(n_jobs=multiprocessing.cpu_count()-6)(delayed(func)(name,group) for name, group in dfGrouped)
    return pd.concat(ret)

#调参类定义，传入多个参数组成的一个字典，返回一个迭代器，感觉比sklearn中的调参函数好用，个人习惯吧
class params_iter:
    def getPlans(self,lis,jude=True):
        if jude: 
            lis = [[[i] for i in lis[0]]] + lis[1:]
        if len(lis) > 2:
            for i in lis[0]:
                for j in lis[1]:
                    self.getPlans([[i + [j]]] + lis[2:], False)
        elif len(lis) == 2:
            for i in lis[0]:
                for j in lis[1]:
                    self.param_list.append(i + [j])
                
    def __init__(self,params):
        self.params=params
        self.cur_index=0
        self.param_list=[]
        val=list(params.values())
        keys=list(params.keys())
        self.getPlans(val)
        self.df=pd.DataFrame(param_list,columns=keys)
        
    def __iter__(self):
        return self
 
    def __next__(self):
        x = self.df.iloc[self.cur_index,:]
        self.cur_index += 1
        return x.to_dict()
    
def get_sample(dataset,target_0=0.33,target_1=0.33,seed=None):
    dataset=dataset.copy()
    x0=dataset[dataset['target']==0]
    x1=dataset[dataset['target']==1]
    x0_selected=x0.sample(frac=target_0,random_state=seed)
    x1_selected=x1.sample(frac=target_1,random_state=seed)
    dataset=pd.concat([x0_selected,x1_selected]).sample(frac=1,random_state=seed)
    y_selected=dataset['target']
    x_selected=dataset.drop(columns =['target'])
    return x_selected,y_selected

dataset_train=pd.read_hdf('dataset/2_插入产品相除列_填入空值_加入index.h5', key='TRAIN')
dataset_test=pd.read_hdf('dataset/2_插入产品相除列_填入空值_加入index.h5', key='PREDICT')

feature=['crcd_is_overdue', 'cus_sex', 'crcd_is_pc', 'crcd_is_sup', 'cus_type', 'crcd_is_gec', 'crcd_bachange_cnt',
         'cus_intvtime_trans', 'cus_edu', 'cus_marr_stat', 'cus_occu', 'cus_oact_pla', 'cus_os_dist', 'crcd_is_gc',
         'crcd_bill_mcnt',
         
         'is_nan_a_mavg_tranbal', 'is_nan_a_mmax_tranbal', 'is_nan_b_mavg_tranbal', 'is_nan_b_mmax_tranbal', 'is_nan_c_mavg_tranbal',
         'is_nan_c_mmax_tranbal', 'is_nan_crcd_bill_mcnt', 'is_nan_crcd_due_amount', 'is_nan_crcd_is_gc', 'is_nan_crcd_is_gec', 
         'is_nan_crcd_is_pc', 'is_nan_crcd_is_sup', 'is_nan_crcd_lmon_due_amount', 'is_nan_crcd_mavg_balance',
         'is_nan_crcd_mavg_balind', 'is_nan_crcd_max_quota', 'is_nan_crcd_points', 'is_nan_cus_active_ind', 
         'is_nan_cus_asset_bal', 'is_nan_cus_hisloan_cnt', 'is_nan_cus_marr_stat', 'is_nan_cus_mbank_lcnt', 'is_nan_cus_occu', 
         'is_nan_cus_region_move_ind', 'is_nan_cus_reloan_cap_ind', 'is_nan_d_mavg_tranbal', 'is_nan_d_mmax_tranbal',
         'is_nan_e_mavg_tranbal', 'is_nan_e_mmax_tranbal', 'is_nan_f_mavg_tranbal', 'is_nan_f_mmax_tranbal', 'is_nan_g_mavg_tranbal',
         'is_nan_g_mmax_tranbal', 'is_nan_h_mavg_tranbal', 'is_nan_h_mmax_tranbal', 'is_nan_i_mavg_tranbal', 'is_nan_i_mmax_tranbal',
         'is_nan_interval_date_1stc', 'is_nan_interval_date_1stc12', 'is_nan_interval_mon_count', 'is_nan_j_mavg_tranbal',
         'is_nan_j_mmax_tranbal', 'is_nan_k_mavg_tranbal', 'is_nan_k_mmax_tranbal', 'is_nan_m_mavg_tranbal', 'is_nan_m_mmax_tranbal',
         'is_nan_n_mavg_tranbal', 'is_nan_n_mavg_trancount',
        
        'cus_age', 'cus_mbank_lcnt', 'cus_active_ind', 'pass_date', 'interval_mon_count', 'interval_date_1stc', 'interval_date_1stc12',
         'cus_hisloan_cnt', 'crcd_mavg_balind', 'cus_region_move_ind', 'cus_reloan_cap_ind', 'cus_asset_bal', 'crcd_max_quota',
         'crcd_lmon_due_amount', 'crcd_due_amount', 'crcd_mavg_balance', 'crcd_points', 'a_mmax_tranbal', 'a_mavg_tranbal', 
         'b_mmax_tranbal', 'b_mavg_tranbal', 'c_mmax_tranbal', 'c_mavg_tranbal', 'd_mmax_tranbal', 'd_mavg_tranbal', 'e_mmax_tranbal',
         'e_mavg_tranbal', 'f_mmax_tranbal', 'f_mavg_tranbal', 'g_mmax_tranbal', 'g_mavg_tranbal', 'h_mmax_tranbal', 'h_mavg_tranbal',
         'i_mmax_tranbal', 'i_mavg_tranbal', 'j_mmax_tranbal', 'j_mavg_tranbal', 'k_mmax_tranbal', 'k_mavg_tranbal', 'm_mmax_tranbal',
         'm_mavg_tranbal', 'n_mavg_trancount', 'n_mavg_tranbal', 'a_mdivision_tranbal', 'b_mdivision_tranbal', 'c_mdivision_tranbal', 
         'd_mdivision_tranbal', 'e_mdivision_tranbal', 'f_mdivision_tranbal', 'g_mdivision_tranbal', 'h_mdivision_tranbal', 
         'i_mdivision_tranbal', 'j_mdivision_tranbal', 'k_mdivision_tranbal', 'm_mdivision_tranbal', 'n_mdivision_tranbal'
        ]
# feature=['crcd_is_overdue', 'cus_sex', 'crcd_is_pc', 'crcd_is_sup', 'cus_type', 'crcd_is_gec', 'crcd_bachange_cnt', 'cus_intvtime_trans',
#          'cus_edu', 'cus_marr_stat', 'cus_occu', 'cus_oact_pla', 'cus_os_dist', 'crcd_is_gc', 'crcd_bill_mcnt',
        
#         'cus_age', 'cus_mbank_lcnt', 'cus_active_ind', 'pass_date', 'interval_mon_count', 'interval_date_1stc', 'interval_date_1stc12',
#          'cus_hisloan_cnt', 'crcd_mavg_balind', 'cus_region_move_ind', 'cus_reloan_cap_ind', 'cus_asset_bal', 'crcd_max_quota',
#          'crcd_lmon_due_amount', 'crcd_due_amount', 'crcd_mavg_balance', 'crcd_points', 'a_mmax_tranbal', 'a_mavg_tranbal', 
#          'b_mmax_tranbal', 'b_mavg_tranbal', 'c_mmax_tranbal', 'c_mavg_tranbal', 'd_mmax_tranbal', 'd_mavg_tranbal', 'e_mmax_tranbal',
#          'e_mavg_tranbal', 'f_mmax_tranbal', 'f_mavg_tranbal', 'g_mmax_tranbal', 'g_mavg_tranbal', 'h_mmax_tranbal', 'h_mavg_tranbal',
#          'i_mmax_tranbal', 'i_mavg_tranbal', 'j_mmax_tranbal', 'j_mavg_tranbal', 'k_mmax_tranbal', 'k_mavg_tranbal', 'm_mmax_tranbal',
#          'm_mavg_tranbal', 'n_mavg_trancount', 'n_mavg_tranbal', 'a_mdivision_tranbal', 'b_mdivision_tranbal', 'c_mdivision_tranbal', 
#          'd_mdivision_tranbal', 'e_mdivision_tranbal', 'f_mdivision_tranbal', 'g_mdivision_tranbal', 'h_mdivision_tranbal', 
#          'i_mdivision_tranbal', 'j_mdivision_tranbal', 'k_mdivision_tranbal', 'm_mdivision_tranbal', 'n_mdivision_tranbal'
#         ]

In [66]:
dataset_train.shape

(0, 7141)

In [2]:

dataset_train=pd.read_hdf('dataset/3_0_插入产品相除列_填入空值_加入index.h5', key='TRAIN')
dataset_test=pd.read_hdf('dataset/3_0_插入产品相除列_填入空值_加入index.h5', key='PREDICT')

feature=list(dataset_train.columns)
feature.remove('target')

feature_train=feature+['target']
feature_test=feature
dataset_train=dataset_train[feature_train]
dataset_test=dataset_test[feature_test]

cago_feature='0,1,2,3,4,5,6,7,8,9,10,11,12,13,14'
cago_feature=''
params={'reg_alpha': 1, 'reg_lambda': 0.8, 'subsample': 1, 'colsample_bytree': 1,
        'max_depth': 4, 'num_leaves': 18, 'min_data_in_leaf': 20, 
        'bagging_freq': 6, 'bagging_fraction': 0.65,
        'feature_fraction': 0.55, 'min_gain_to_split': 0.2, 
        'min_child_weight': 19.428902804238373, 'objective': 'binary', 'bagging_seed': 11,
        'random_state': 42,
        'learning_rate': 0.01,'boosting': 'gbdt', 'verbosity': -1, 'metric': 'auc', 
        'num_threads': 40,
        'is_unbalance': True, 'categorical_feature': cago_feature}

dataset_train_x,dataset_train_y=get_sample(dataset_train,target_0=1,target_1=1,seed=0)
X_train,X_test,Y_train,Y_test=train_test_split(dataset_train_x[feature],dataset_train_y,test_size=0.2,random_state=2)



lgb_train = lgb.Dataset(X_train,Y_train)
lgb_eval = lgb.Dataset(X_test,Y_test,reference=lgb_train)

model=lgb.train(params,lgb_train,num_boost_round=10000,valid_sets=lgb_eval,early_stopping_rounds=1000)#,init_model=model)


# for i in range(10):
#     dataset_train_x,dataset_train_y=get_sample(dataset_train,target_0=1,target_1=1)
#     X_train,X_test,Y_train,Y_test=train_test_split(dataset_train_x,dataset_train_y,test_size=0.20)
    
#     lgb_train = lgb.Dataset(X_train,Y_train)
#     lgb_eval = lgb.Dataset(X_test,Y_test,reference=lgb_train)

#     model=lgb.train(params,lgb_train,num_boost_round=3000,valid_sets=lgb_eval,early_stopping_rounds=200)#,init_model=model)
#     models.append(model)


Please use categorical_feature argument of the Dataset constructor to pass this parameter.
  .format(key))


[1]	valid_0's auc: 0.629534
Training until validation scores don't improve for 1000 rounds
[2]	valid_0's auc: 0.661589
[3]	valid_0's auc: 0.66683
[4]	valid_0's auc: 0.672048
[5]	valid_0's auc: 0.67395
[6]	valid_0's auc: 0.674983
[7]	valid_0's auc: 0.678632
[8]	valid_0's auc: 0.678441
[9]	valid_0's auc: 0.680142
[10]	valid_0's auc: 0.682254
[11]	valid_0's auc: 0.683402
[12]	valid_0's auc: 0.683895
[13]	valid_0's auc: 0.685786
[14]	valid_0's auc: 0.686866
[15]	valid_0's auc: 0.687436
[16]	valid_0's auc: 0.688054
[17]	valid_0's auc: 0.688682
[18]	valid_0's auc: 0.688899
[19]	valid_0's auc: 0.689199
[20]	valid_0's auc: 0.689771
[21]	valid_0's auc: 0.68966
[22]	valid_0's auc: 0.690252
[23]	valid_0's auc: 0.690757
[24]	valid_0's auc: 0.691077
[25]	valid_0's auc: 0.691688
[26]	valid_0's auc: 0.691957
[27]	valid_0's auc: 0.692005
[28]	valid_0's auc: 0.692545
[29]	valid_0's auc: 0.692539
[30]	valid_0's auc: 0.692505
[31]	valid_0's auc: 0.693177
[32]	valid_0's auc: 0.693402
[33]	valid_0's auc: 0

[278]	valid_0's auc: 0.716095
[279]	valid_0's auc: 0.716168
[280]	valid_0's auc: 0.716245
[281]	valid_0's auc: 0.716187
[282]	valid_0's auc: 0.716233
[283]	valid_0's auc: 0.716212
[284]	valid_0's auc: 0.716278
[285]	valid_0's auc: 0.716307
[286]	valid_0's auc: 0.716414
[287]	valid_0's auc: 0.716514
[288]	valid_0's auc: 0.71661
[289]	valid_0's auc: 0.71662
[290]	valid_0's auc: 0.71669
[291]	valid_0's auc: 0.716705
[292]	valid_0's auc: 0.716745
[293]	valid_0's auc: 0.716786
[294]	valid_0's auc: 0.716784
[295]	valid_0's auc: 0.716834
[296]	valid_0's auc: 0.716857
[297]	valid_0's auc: 0.716946
[298]	valid_0's auc: 0.716996
[299]	valid_0's auc: 0.717027
[300]	valid_0's auc: 0.71706
[301]	valid_0's auc: 0.717095
[302]	valid_0's auc: 0.717143
[303]	valid_0's auc: 0.717226
[304]	valid_0's auc: 0.717268
[305]	valid_0's auc: 0.717308
[306]	valid_0's auc: 0.71734
[307]	valid_0's auc: 0.717392
[308]	valid_0's auc: 0.717389
[309]	valid_0's auc: 0.717447
[310]	valid_0's auc: 0.71746
[311]	valid_0's 

[554]	valid_0's auc: 0.72277
[555]	valid_0's auc: 0.722781
[556]	valid_0's auc: 0.72281
[557]	valid_0's auc: 0.722813
[558]	valid_0's auc: 0.722794
[559]	valid_0's auc: 0.722827
[560]	valid_0's auc: 0.7229
[561]	valid_0's auc: 0.722952
[562]	valid_0's auc: 0.72301
[563]	valid_0's auc: 0.72304
[564]	valid_0's auc: 0.723065
[565]	valid_0's auc: 0.723132
[566]	valid_0's auc: 0.723244
[567]	valid_0's auc: 0.723365
[568]	valid_0's auc: 0.723347
[569]	valid_0's auc: 0.723343
[570]	valid_0's auc: 0.723444
[571]	valid_0's auc: 0.72345
[572]	valid_0's auc: 0.723449
[573]	valid_0's auc: 0.723464
[574]	valid_0's auc: 0.723461
[575]	valid_0's auc: 0.723507
[576]	valid_0's auc: 0.723525
[577]	valid_0's auc: 0.72353
[578]	valid_0's auc: 0.723542
[579]	valid_0's auc: 0.723576
[580]	valid_0's auc: 0.723607
[581]	valid_0's auc: 0.723655
[582]	valid_0's auc: 0.723679
[583]	valid_0's auc: 0.723703
[584]	valid_0's auc: 0.72372
[585]	valid_0's auc: 0.723759
[586]	valid_0's auc: 0.723742
[587]	valid_0's auc

[830]	valid_0's auc: 0.726603
[831]	valid_0's auc: 0.726628
[832]	valid_0's auc: 0.726618
[833]	valid_0's auc: 0.726673
[834]	valid_0's auc: 0.726668
[835]	valid_0's auc: 0.72668
[836]	valid_0's auc: 0.726668
[837]	valid_0's auc: 0.726678
[838]	valid_0's auc: 0.726673
[839]	valid_0's auc: 0.726668
[840]	valid_0's auc: 0.726666
[841]	valid_0's auc: 0.726646
[842]	valid_0's auc: 0.726652
[843]	valid_0's auc: 0.726695
[844]	valid_0's auc: 0.726713
[845]	valid_0's auc: 0.726754
[846]	valid_0's auc: 0.726776
[847]	valid_0's auc: 0.726821
[848]	valid_0's auc: 0.726845
[849]	valid_0's auc: 0.726878
[850]	valid_0's auc: 0.726906
[851]	valid_0's auc: 0.726946
[852]	valid_0's auc: 0.726917
[853]	valid_0's auc: 0.726926
[854]	valid_0's auc: 0.726962
[855]	valid_0's auc: 0.726953
[856]	valid_0's auc: 0.726932
[857]	valid_0's auc: 0.726878
[858]	valid_0's auc: 0.726905
[859]	valid_0's auc: 0.726919
[860]	valid_0's auc: 0.72695
[861]	valid_0's auc: 0.726979
[862]	valid_0's auc: 0.727008
[863]	valid_

[1103]	valid_0's auc: 0.728362
[1104]	valid_0's auc: 0.728359
[1105]	valid_0's auc: 0.728369
[1106]	valid_0's auc: 0.728365
[1107]	valid_0's auc: 0.728385
[1108]	valid_0's auc: 0.728395
[1109]	valid_0's auc: 0.72839
[1110]	valid_0's auc: 0.728357
[1111]	valid_0's auc: 0.728394
[1112]	valid_0's auc: 0.72838
[1113]	valid_0's auc: 0.728397
[1114]	valid_0's auc: 0.728383
[1115]	valid_0's auc: 0.728376
[1116]	valid_0's auc: 0.728355
[1117]	valid_0's auc: 0.728361
[1118]	valid_0's auc: 0.728368
[1119]	valid_0's auc: 0.728348
[1120]	valid_0's auc: 0.728351
[1121]	valid_0's auc: 0.728356
[1122]	valid_0's auc: 0.728319
[1123]	valid_0's auc: 0.728308
[1124]	valid_0's auc: 0.728308
[1125]	valid_0's auc: 0.728299
[1126]	valid_0's auc: 0.728307
[1127]	valid_0's auc: 0.72832
[1128]	valid_0's auc: 0.728315
[1129]	valid_0's auc: 0.728318
[1130]	valid_0's auc: 0.728329
[1131]	valid_0's auc: 0.728322
[1132]	valid_0's auc: 0.728325
[1133]	valid_0's auc: 0.72833
[1134]	valid_0's auc: 0.728307
[1135]	valid

[1370]	valid_0's auc: 0.729386
[1371]	valid_0's auc: 0.729401
[1372]	valid_0's auc: 0.729422
[1373]	valid_0's auc: 0.729445
[1374]	valid_0's auc: 0.729444
[1375]	valid_0's auc: 0.729415
[1376]	valid_0's auc: 0.729433
[1377]	valid_0's auc: 0.729437
[1378]	valid_0's auc: 0.729437
[1379]	valid_0's auc: 0.729438
[1380]	valid_0's auc: 0.72944
[1381]	valid_0's auc: 0.729432
[1382]	valid_0's auc: 0.729433
[1383]	valid_0's auc: 0.729433
[1384]	valid_0's auc: 0.729428
[1385]	valid_0's auc: 0.729431
[1386]	valid_0's auc: 0.729416
[1387]	valid_0's auc: 0.729419
[1388]	valid_0's auc: 0.729448
[1389]	valid_0's auc: 0.729447
[1390]	valid_0's auc: 0.729414
[1391]	valid_0's auc: 0.729432
[1392]	valid_0's auc: 0.729446
[1393]	valid_0's auc: 0.729422
[1394]	valid_0's auc: 0.729409
[1395]	valid_0's auc: 0.729422
[1396]	valid_0's auc: 0.729429
[1397]	valid_0's auc: 0.729425
[1398]	valid_0's auc: 0.729451
[1399]	valid_0's auc: 0.729466
[1400]	valid_0's auc: 0.729474
[1401]	valid_0's auc: 0.729444
[1402]	va

[1637]	valid_0's auc: 0.729552
[1638]	valid_0's auc: 0.729543
[1639]	valid_0's auc: 0.729535
[1640]	valid_0's auc: 0.729542
[1641]	valid_0's auc: 0.72953
[1642]	valid_0's auc: 0.729528
[1643]	valid_0's auc: 0.729522
[1644]	valid_0's auc: 0.729493
[1645]	valid_0's auc: 0.729494
[1646]	valid_0's auc: 0.729497
[1647]	valid_0's auc: 0.729513
[1648]	valid_0's auc: 0.729513
[1649]	valid_0's auc: 0.729507
[1650]	valid_0's auc: 0.7295
[1651]	valid_0's auc: 0.729496
[1652]	valid_0's auc: 0.729497
[1653]	valid_0's auc: 0.729487
[1654]	valid_0's auc: 0.72945
[1655]	valid_0's auc: 0.729449
[1656]	valid_0's auc: 0.729456
[1657]	valid_0's auc: 0.729451
[1658]	valid_0's auc: 0.729465
[1659]	valid_0's auc: 0.729442
[1660]	valid_0's auc: 0.72943
[1661]	valid_0's auc: 0.729407
[1662]	valid_0's auc: 0.729372
[1663]	valid_0's auc: 0.729342
[1664]	valid_0's auc: 0.72936
[1665]	valid_0's auc: 0.729345
[1666]	valid_0's auc: 0.729352
[1667]	valid_0's auc: 0.72934
[1668]	valid_0's auc: 0.729344
[1669]	valid_0'

[1905]	valid_0's auc: 0.729586
[1906]	valid_0's auc: 0.729574
[1907]	valid_0's auc: 0.72958
[1908]	valid_0's auc: 0.729593
[1909]	valid_0's auc: 0.729587
[1910]	valid_0's auc: 0.729588
[1911]	valid_0's auc: 0.729589
[1912]	valid_0's auc: 0.729572
[1913]	valid_0's auc: 0.729565
[1914]	valid_0's auc: 0.729566
[1915]	valid_0's auc: 0.729585
[1916]	valid_0's auc: 0.729582
[1917]	valid_0's auc: 0.729602
[1918]	valid_0's auc: 0.729613
[1919]	valid_0's auc: 0.729622
[1920]	valid_0's auc: 0.729621
[1921]	valid_0's auc: 0.729613
[1922]	valid_0's auc: 0.729608
[1923]	valid_0's auc: 0.729637
[1924]	valid_0's auc: 0.729632
[1925]	valid_0's auc: 0.72963
[1926]	valid_0's auc: 0.729622
[1927]	valid_0's auc: 0.72963
[1928]	valid_0's auc: 0.729637
[1929]	valid_0's auc: 0.729615
[1930]	valid_0's auc: 0.729644
[1931]	valid_0's auc: 0.729664
[1932]	valid_0's auc: 0.729681
[1933]	valid_0's auc: 0.729681
[1934]	valid_0's auc: 0.729649
[1935]	valid_0's auc: 0.72964
[1936]	valid_0's auc: 0.729634
[1937]	valid

[2173]	valid_0's auc: 0.729832
[2174]	valid_0's auc: 0.729822
[2175]	valid_0's auc: 0.729833
[2176]	valid_0's auc: 0.729836
[2177]	valid_0's auc: 0.729825
[2178]	valid_0's auc: 0.729828
[2179]	valid_0's auc: 0.729819
[2180]	valid_0's auc: 0.72981
[2181]	valid_0's auc: 0.729833
[2182]	valid_0's auc: 0.729822
[2183]	valid_0's auc: 0.729832
[2184]	valid_0's auc: 0.729845
[2185]	valid_0's auc: 0.729838
[2186]	valid_0's auc: 0.729822
[2187]	valid_0's auc: 0.729813
[2188]	valid_0's auc: 0.7298
[2189]	valid_0's auc: 0.729815
[2190]	valid_0's auc: 0.72983
[2191]	valid_0's auc: 0.729829
[2192]	valid_0's auc: 0.729816
[2193]	valid_0's auc: 0.72982
[2194]	valid_0's auc: 0.729834
[2195]	valid_0's auc: 0.729835
[2196]	valid_0's auc: 0.729828
[2197]	valid_0's auc: 0.729814
[2198]	valid_0's auc: 0.729785
[2199]	valid_0's auc: 0.729776
[2200]	valid_0's auc: 0.729766
[2201]	valid_0's auc: 0.729767
[2202]	valid_0's auc: 0.729754
[2203]	valid_0's auc: 0.729731
[2204]	valid_0's auc: 0.729717
[2205]	valid_

[2440]	valid_0's auc: 0.730083
[2441]	valid_0's auc: 0.730083
[2442]	valid_0's auc: 0.730096
[2443]	valid_0's auc: 0.730087
[2444]	valid_0's auc: 0.730085
[2445]	valid_0's auc: 0.73008
[2446]	valid_0's auc: 0.730085
[2447]	valid_0's auc: 0.730103
[2448]	valid_0's auc: 0.730122
[2449]	valid_0's auc: 0.730126
[2450]	valid_0's auc: 0.730171
[2451]	valid_0's auc: 0.730167
[2452]	valid_0's auc: 0.730171
[2453]	valid_0's auc: 0.73016
[2454]	valid_0's auc: 0.730167
[2455]	valid_0's auc: 0.730173
[2456]	valid_0's auc: 0.730181
[2457]	valid_0's auc: 0.730187
[2458]	valid_0's auc: 0.7302
[2459]	valid_0's auc: 0.730205
[2460]	valid_0's auc: 0.730199
[2461]	valid_0's auc: 0.730213
[2462]	valid_0's auc: 0.730199
[2463]	valid_0's auc: 0.730199
[2464]	valid_0's auc: 0.730201
[2465]	valid_0's auc: 0.730193
[2466]	valid_0's auc: 0.730208
[2467]	valid_0's auc: 0.730207
[2468]	valid_0's auc: 0.730225
[2469]	valid_0's auc: 0.730238
[2470]	valid_0's auc: 0.730239
[2471]	valid_0's auc: 0.730243
[2472]	valid

[2706]	valid_0's auc: 0.729958
[2707]	valid_0's auc: 0.729964
[2708]	valid_0's auc: 0.729959
[2709]	valid_0's auc: 0.729975
[2710]	valid_0's auc: 0.72997
[2711]	valid_0's auc: 0.729985
[2712]	valid_0's auc: 0.729993
[2713]	valid_0's auc: 0.729989
[2714]	valid_0's auc: 0.729988
[2715]	valid_0's auc: 0.729982
[2716]	valid_0's auc: 0.729975
[2717]	valid_0's auc: 0.729973
[2718]	valid_0's auc: 0.729974
[2719]	valid_0's auc: 0.729965
[2720]	valid_0's auc: 0.729944
[2721]	valid_0's auc: 0.729946
[2722]	valid_0's auc: 0.729938
[2723]	valid_0's auc: 0.729931
[2724]	valid_0's auc: 0.729934
[2725]	valid_0's auc: 0.729941
[2726]	valid_0's auc: 0.729919
[2727]	valid_0's auc: 0.729915
[2728]	valid_0's auc: 0.729914
[2729]	valid_0's auc: 0.729891
[2730]	valid_0's auc: 0.729875
[2731]	valid_0's auc: 0.72989
[2732]	valid_0's auc: 0.72991
[2733]	valid_0's auc: 0.729918
[2734]	valid_0's auc: 0.729915
[2735]	valid_0's auc: 0.72991
[2736]	valid_0's auc: 0.729908
[2737]	valid_0's auc: 0.729928
[2738]	valid

[2973]	valid_0's auc: 0.729707
[2974]	valid_0's auc: 0.7297
[2975]	valid_0's auc: 0.729708
[2976]	valid_0's auc: 0.729714
[2977]	valid_0's auc: 0.72971
[2978]	valid_0's auc: 0.729705
[2979]	valid_0's auc: 0.729699
[2980]	valid_0's auc: 0.72968
[2981]	valid_0's auc: 0.729662
[2982]	valid_0's auc: 0.729636
[2983]	valid_0's auc: 0.72967
[2984]	valid_0's auc: 0.729652
[2985]	valid_0's auc: 0.729632
[2986]	valid_0's auc: 0.729631
[2987]	valid_0's auc: 0.729624
[2988]	valid_0's auc: 0.729629
[2989]	valid_0's auc: 0.72963
[2990]	valid_0's auc: 0.729629
[2991]	valid_0's auc: 0.729622
[2992]	valid_0's auc: 0.72961
[2993]	valid_0's auc: 0.729609
[2994]	valid_0's auc: 0.7296
[2995]	valid_0's auc: 0.729619
[2996]	valid_0's auc: 0.729629
[2997]	valid_0's auc: 0.729641
[2998]	valid_0's auc: 0.729678
[2999]	valid_0's auc: 0.729683
[3000]	valid_0's auc: 0.729704
[3001]	valid_0's auc: 0.729714
[3002]	valid_0's auc: 0.729711
[3003]	valid_0's auc: 0.729704
[3004]	valid_0's auc: 0.729703
[3005]	valid_0's 

[3240]	valid_0's auc: 0.729263
[3241]	valid_0's auc: 0.729258
[3242]	valid_0's auc: 0.72924
[3243]	valid_0's auc: 0.729213
[3244]	valid_0's auc: 0.729199
[3245]	valid_0's auc: 0.729178
[3246]	valid_0's auc: 0.729137
[3247]	valid_0's auc: 0.729145
[3248]	valid_0's auc: 0.729134
[3249]	valid_0's auc: 0.729135
[3250]	valid_0's auc: 0.729144
[3251]	valid_0's auc: 0.729159
[3252]	valid_0's auc: 0.729158
[3253]	valid_0's auc: 0.729153
[3254]	valid_0's auc: 0.729139
[3255]	valid_0's auc: 0.729136
[3256]	valid_0's auc: 0.729146
[3257]	valid_0's auc: 0.729139
[3258]	valid_0's auc: 0.729135
[3259]	valid_0's auc: 0.729128
[3260]	valid_0's auc: 0.729129
[3261]	valid_0's auc: 0.729112
[3262]	valid_0's auc: 0.729104
[3263]	valid_0's auc: 0.72908
[3264]	valid_0's auc: 0.729094
[3265]	valid_0's auc: 0.729097
[3266]	valid_0's auc: 0.729092
[3267]	valid_0's auc: 0.729095
[3268]	valid_0's auc: 0.729104
[3269]	valid_0's auc: 0.729103
[3270]	valid_0's auc: 0.729116
[3271]	valid_0's auc: 0.729115
[3272]	val

In [14]:
dataset_x,dataset_y=get_sample(dataset_train,target_0=1,target_1=1)
y_pr=model.predict(dataset_x)
offline_test_auc=roc_auc_score(dataset_y,y_pr)
print('线下测试集AUC:%f'%offline_test_auc)

线下测试集AUC:0.543993


In [53]:
feature.remove('cus_sex')

In [56]:
dataset_train=pd.read_hdf('dataset/3_0_插入产品相除列_填入空值_加入index.h5', key='TRAIN')
dataset_test=pd.read_hdf('dataset/3_0_插入产品相除列_填入空值_加入index.h5', key='PREDICT')


NameError: name 'dataset' is not defined

In [13]:
#model.save_model('j')
# model=lgb.Booster(model_file='models/lgb_191130_0.7645228479096401')




dataset_x,dataset_y=get_sample(dataset_train,target_0=1,target_1=1)
y_pr=model.predict(dataset_x)
offline_test_auc=roc_auc_score(dataset_y,y_pr)
print('线下测试集AUC:%f'%offline_test_auc)

# y_pr=model.predict(dataset_test)
# y_pr=y_pr.astype(np.float16)
# y_pr=pd.Series(y_pr,index=dataset_test.index)
# y_pr.to_csv('online/LOAN_JS_FH_SX_04_20191130_03.csv',header=['target'])

FileNotFoundError: File dataset/2_插入产品相除列并加入index.h5 does not exist

In [8]:


dataset_x,dataset_y=get_sample(dataset_train,target_0=0.0931,target_1=1)
y_pres=pd.DataFrame([])
for i,model in enumerate(models):
    y_pres[i]=model.predict(dataset_x)
y_pr=y_pres.mean(axis=1)
offline_test_auc=roc_auc_score(dataset_y,y_pr)
print('线下测试集AUC:%f'%offline_test_auc)




NameError: name 'models' is not defined

In [66]:
y_pres=pd.DataFrame([])
for i,model in enumerate(models):
    y_pres[i]=model.predict(dataset_test)
y_pr=y_pres.mean(axis=1)
y_pr=y_pr.astype(np.float16)
y_pr.index=dataset_test.index
y_pr.to_csv('online/LOAN_JS_FH_SX_04_20191129_01.csv',header=['target'])

In [46]:
y_pr.index=dataset_test.index
y_pr.to_csv('online/LOAN_JS_FH_SX_04_20191128_03.csv',header=['target'])

nimabi
