In [1]:
import time
import datetime
import numpy as np
import pandas as pd
import lightgbm as lgb
import xgboost as xgb
from dateutil.parser import parse
from sklearn.cross_validation import KFold
from sklearn.svm import SVC
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
from lightgbm import Booster as lgbm_Booster
from sklearn.metrics import f1_score


data_path = './data/tc/'

train = pd.read_csv(data_path + 'f_train_20180204.csv', encoding='gb2312')
test = pd.read_csv(data_path + 'f_test_a_20180204.csv', encoding='gb2312')



In [9]:
# 线下 0.650 线上0.718
def make_feat1(train, test):
    merge = pd.concat([train, test])
    n_train = len(train)
    train_y = merge['label']
    merge = merge.drop(['label'], axis=1)
    merge = merge.drop(['id', 'SNP21', 'SNP22', 'SNP23', 'RBP4', 'SNP54', 'SNP55', 'ACEID'], axis=1)
    merge.fillna(merge.median(axis=0), inplace=True)

    # 年龄
    def age_level(line):
        age = line['年龄']
        if age < 30:
            return "age_0_30"
        elif age >= 30 and age < 45:
            return "age_30_45"
        elif age >= 45 and age < 60:
            return "age_45_60"
        else:
            return "age_60_100"

    merge['年龄_LEVEL'] = merge.apply(age_level, axis=1)
    d_age = pd.get_dummies(merge['年龄_LEVEL'], prefix="年龄")
    merge = pd.concat([d_age, merge], axis=1)
    merge = merge.drop(['年龄_LEVEL'], axis=1)
    
    
    X, y = merge[:n_train], train_y[:n_train]
    test_X = merge[n_train:]

    return X, y, test_X

X, y, test_X = make_feat1(train, test)

In [14]:
#  0.644 线上0.724
def make_feat2(train, test):
    merge = pd.concat([train, test])
    n_train = len(train)
    train_y = merge['label']
    merge = merge.drop(['label'], axis=1)
    merge = merge.drop(['id', 'SNP21', 'SNP22', 'SNP23', 'RBP4', 'SNP54', 'SNP55', 'ACEID'], axis=1)

    for i in ['孕次', '产次', 'DM家族史','BMI分类']:  #众数填充
        a = merge[i].mode()[0]
        merge[i] = merge[i].fillna(a)   ##means na
        

    merge.fillna(merge.median(axis=0), inplace=True)

    ##血压和BMI
    merge['bp*BMI'] = (merge['舒张压']+merge['收缩压'])*(merge['BMI分类']+1)/2

    ##孕产
    merge['孕产数'] = merge['孕次'] + merge['产次']
    merge['孕产差'] = merge['孕次'] - merge['产次']

    ##血生化
    #/肝脏类
    merge['肝脏']  = (merge['AST']+merge['ALT'])/2
    #/肾脏类别
    merge['肾脏'] = (merge['BUN'] + merge['Cr'])/2
    #/心血管类别
    merge['心血管'] = (merge['HDLC'] + merge['LDLC']+merge['CHO'] + merge['TG'])/4
    #/炎症
    merge['炎症'] = (np.log(merge['wbc'])+merge['hsCRP'])/2

    # 年龄
    def age_level(line):
        age = line['年龄']
        if age < 25:
            return "age_0_25"
        elif age >= 25 and age < 30:
            return "age_25_30"
        elif age >= 30 and age < 40:
            return "age_30_40"
        else:
            return "age_40_"

    merge['年龄_LEVEL'] = merge.apply(age_level, axis=1)
    d_age = pd.get_dummies(merge['年龄_LEVEL'], prefix="年龄")
    merge = pd.concat([d_age, merge], axis=1)
    merge = merge.drop(['年龄_LEVEL'], axis=1)

    def pg_level(line):
        pg = line['孕次']
        if pg < 3:
            return "pg_0_3"
        else:
            return "pg_3_"

    merge['pg_LEVEL'] = merge.apply(pg_level, axis=1)
    d_pg = pd.get_dummies(merge['pg_LEVEL'])
    merge = pd.concat([d_pg, merge], axis=1)
    merge = merge.drop(['pg_LEVEL'], axis=1)

    merge.columns = [np.arange(89)]

    X, y = merge[:n_train], train_y[:n_train]
    test_X = merge[n_train:]
    return X, y, test_X  #0.6561

X, y, test_X = make_feat2(train, test)

In [2]:
#小洁特征 线下0.665 线上0.734
def make_feat3(train, test):
    merge = pd.concat([train, test])
    n_train = len(train)
    train_y = merge['label']
    merge = merge.drop(['label','id'], axis=1)
    merge.loc['Row_sum'] = merge.isnull().apply(lambda x: x.sum())  ##计算每列有多少缺失值
    merge.loc[:, merge.loc['Row_sum'] > 600].columns  ##缺失值大于600的columns
    merge = merge.drop(merge.loc[:, merge.loc['Row_sum'] > 600].columns, axis=1)
    merge = merge.drop(['Row_sum'],axis=0)
    #merge = merge.sort_index(axis=1, ascending=False)

    for i in merge.columns[12:62]:   ##for all snp
        merge[i] =  merge[i].fillna(0)   ##0 means NaN
        d =  pd.get_dummies(merge[i], prefix=i)
        merge = pd.concat([merge,d],axis=1)
        merge = merge.drop([i], axis=1)

    for i in ['孕次', '产次', 'DM家族史','BMI分类']:  #众数填充
        a = merge[i].mode()[0]
        merge[i] = merge[i].fillna(a)   ##means na

    #np.sum(merge['产次'].isnull())

    merge.fillna(merge.median(axis=0), inplace=True)

    ##血压和BMI
    merge['bp*BMI'] = (merge['舒张压']+merge['收缩压'])*(merge['BMI分类']+1)/2

    ##孕产
    merge['孕产数'] = merge['孕次'] + merge['产次']
    merge['孕产差'] = merge['孕次'] - merge['产次']

    ##血生化
    #/肝脏类
    merge['肝脏']  = (merge['AST']+merge['ALT'])/2
    #/肾脏类别
    merge['肾脏'] = (merge['BUN'] + merge['Cr'])/2
    #/心血管类别
    merge['心血管'] = (merge['HDLC'] + merge['LDLC']+merge['CHO'] + merge['TG'])/4
    #/炎症
    merge['炎症'] = (np.log(merge['wbc'])+merge['hsCRP'])/2

    # 年龄
    def age_level(line):
        age = line['年龄']
        if age < 25:
            return "age_0_25"
        elif age >= 25 and age < 30:
            return "age_25_30"
        elif age >= 30 and age < 40:
            return "age_30_40"
        else:
            return "age_40_"

    merge['年龄_LEVEL'] = merge.apply(age_level, axis=1)
    d_age = pd.get_dummies(merge['年龄_LEVEL'], prefix="年龄")
    merge = pd.concat([d_age, merge], axis=1)
    merge = merge.drop(['年龄_LEVEL'], axis=1)

    def pg_level(line):
        pg = line['孕次']
        if pg < 3:
            return "pg_0_3"
        else:
            return "pg_3_"

    merge['pg_LEVEL'] = merge.apply(pg_level, axis=1)
    d_pg = pd.get_dummies(merge['pg_LEVEL'], prefix='孕次')
    merge = pd.concat([d_pg, merge], axis=1)
    merge = merge.drop(['pg_LEVEL'], axis=1)

#     merge.columns = [np.arange(285)]

    X, y = merge[:n_train], train_y[:n_train]
    test_X = merge[n_train:]

    return X, y, test_X  #0.6561

X, y, test_X = make_feat3(train, test)

## 20170227 模型测试

In [13]:
##lgb
clf_lgb_params1 = {
    'learning_rate': 0.01,
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': 'mse',
    'sub_feature': 0.7,
    'num_leaves': 60,
    'colsample_bytree': 0.7,
    'feature_fraction': 0.7,
    'min_data': 100,
    'min_hessian': 1,
    'verbose': -1,
}

clf_lgb_params2 = {
    'learning_rate': 0.005,
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': 'mse',
    'sub_feature': 0.5,
    'num_leaves': 70,
    'colsample_bytree': 0.7,
    'feature_fraction': 0.7,
    'min_data': 120,
    'min_hessian': 1,
    'verbose': -1,
}

# xgboost
clf_xgb_params1 = {
    'booster': 'gbtree',
    'eval_metric': 'auc',
    'objective': 'binary:logistic',
    'max_depth': 9,
    'eta': 0.02,
    'min_child_weight': 4,
    'colsample': 0.8,
    'gamma': 2,
    'n_thread': 4,
    'silent': 1
}

# K折交叉验证
print('开始CV 5折训练...')
t0 = time.time()
X_preds = np.zeros(X.shape[0])
kf = KFold(len(X), n_folds=5, shuffle=True,random_state=520)
for i, (train_index, valid_index) in enumerate(kf):
    print('第{}次训练...'.format(i))
    
    train_X, train_y = X.iloc[train_index], y.iloc[train_index]
    valid_X, valid_y = X.iloc[valid_index], y.iloc[valid_index]
    
    #lightgbm
    lgb_train = lgb.Dataset(train_X, train_y)
    lgb_valid = lgb.Dataset(valid_X, valid_y)   
    
    clf_lgb_model1 = lgb.train(clf_lgb_params1, lgb_train, valid_sets = [lgb_valid], num_boost_round = 3000, early_stopping_rounds = 100)
    lgb1_pred = np.where(clf_lgb_model1.predict(valid_X) > 0.5, 1, 0)
    
    clf_lgb_model2 = lgb.train(clf_lgb_params2, lgb_train, valid_sets = [lgb_valid], num_boost_round = 3000, early_stopping_rounds = 100)
    lgb2_pred = np.where(clf_lgb_model2.predict(valid_X) > 0.5, 1, 0)
    
    #xgboost
    xgb_train = xgb.DMatrix(train_X, train_y)
    xgb_valid = xgb.DMatrix(valid_X, valid_y)
    
    watchlist = [(xgb_train,'train')]
    clf_xgb_model1 = xgb.train(clf_xgb_params1, xgb_train, num_boost_round=1000, verbose_eval=200, evals=watchlist)
    xgb1_pred = np.where(clf_xgb_model1.predict(xgb.DMatrix(valid_X), ntree_limit=clf_xgb_model1.best_ntree_limit + 20)>0.5,1,0)
    
    print('lgb1第{}次得分:{}'.format(i, f1_score(valid_y, lgb1_pred)))
    print('lgb2第{}次得分:{}'.format(i, f1_score(valid_y, lgb2_pred)))
    print('xgb1 第{}次得分:{}'.format(i, f1_score(valid_y, xgb1_pred)))
    
    valid_X_pred = 0.333 * xgb1_pred + 0.333 * xgb1_pred + 0.333 * lgb2_pred
    valid_X_pred = np.where(np.array(valid_X_pred) > 0.5, 1, 0)
    print('融合后第{}次得分:{}'.format(i, f1_score(valid_y, valid_X_pred)))
    X_preds[valid_index] += valid_X_pred
    
#分类预测结果
print('线下得分：{}'.format(f1_score(y, X_preds)))
print('CV训练用时{}秒'.format(time.time() - t0))

开始CV 5折训练...
第0次训练...
[1]	valid_0's l2: 0.249484
Training until validation scores don't improve for 100 rounds.
[2]	valid_0's l2: 0.248793
[3]	valid_0's l2: 0.247983
[4]	valid_0's l2: 0.247792
[5]	valid_0's l2: 0.247045
[6]	valid_0's l2: 0.246261
[7]	valid_0's l2: 0.24566
[8]	valid_0's l2: 0.244791
[9]	valid_0's l2: 0.244363
[10]	valid_0's l2: 0.243858
[11]	valid_0's l2: 0.242875
[12]	valid_0's l2: 0.242093
[13]	valid_0's l2: 0.241131
[14]	valid_0's l2: 0.240563
[15]	valid_0's l2: 0.239628
[16]	valid_0's l2: 0.239269
[17]	valid_0's l2: 0.23871
[18]	valid_0's l2: 0.237832
[19]	valid_0's l2: 0.236992
[20]	valid_0's l2: 0.236627
[21]	valid_0's l2: 0.236129
[22]	valid_0's l2: 0.235279
[23]	valid_0's l2: 0.234739
[24]	valid_0's l2: 0.233919
[25]	valid_0's l2: 0.23336
[26]	valid_0's l2: 0.232963
[27]	valid_0's l2: 0.23254
[28]	valid_0's l2: 0.231792
[29]	valid_0's l2: 0.231068
[30]	valid_0's l2: 0.230673
[31]	valid_0's l2: 0.229922
[32]	valid_0's l2: 0.229449
[33]	valid_0's l2: 0.228717
[34]

[26]	valid_0's l2: 0.241322
[27]	valid_0's l2: 0.241079
[28]	valid_0's l2: 0.240625
[29]	valid_0's l2: 0.240172
[30]	valid_0's l2: 0.239945
[31]	valid_0's l2: 0.239527
[32]	valid_0's l2: 0.239269
[33]	valid_0's l2: 0.238791
[34]	valid_0's l2: 0.238532
[35]	valid_0's l2: 0.238306
[36]	valid_0's l2: 0.237931
[37]	valid_0's l2: 0.237508
[38]	valid_0's l2: 0.237083
[39]	valid_0's l2: 0.236808
[40]	valid_0's l2: 0.236549
[41]	valid_0's l2: 0.236362
[42]	valid_0's l2: 0.236004
[43]	valid_0's l2: 0.235599
[44]	valid_0's l2: 0.235293
[45]	valid_0's l2: 0.234895
[46]	valid_0's l2: 0.234658
[47]	valid_0's l2: 0.234281
[48]	valid_0's l2: 0.234018
[49]	valid_0's l2: 0.233812
[50]	valid_0's l2: 0.233423
[51]	valid_0's l2: 0.233042
[52]	valid_0's l2: 0.232822
[53]	valid_0's l2: 0.232497
[54]	valid_0's l2: 0.23224
[55]	valid_0's l2: 0.231919
[56]	valid_0's l2: 0.231654
[57]	valid_0's l2: 0.231319
[58]	valid_0's l2: 0.230982
[59]	valid_0's l2: 0.230746
[60]	valid_0's l2: 0.230515
[61]	valid_0's l2: 0.

[399]	valid_0's l2: 0.20172
[400]	valid_0's l2: 0.201705
[401]	valid_0's l2: 0.2017
[402]	valid_0's l2: 0.201677
[403]	valid_0's l2: 0.201692
[404]	valid_0's l2: 0.201695
[405]	valid_0's l2: 0.201726
[406]	valid_0's l2: 0.201723
[407]	valid_0's l2: 0.201684
[408]	valid_0's l2: 0.20168
[409]	valid_0's l2: 0.201676
[410]	valid_0's l2: 0.201698
[411]	valid_0's l2: 0.201688
[412]	valid_0's l2: 0.201649
[413]	valid_0's l2: 0.201635
[414]	valid_0's l2: 0.201637
[415]	valid_0's l2: 0.201621
[416]	valid_0's l2: 0.201633
[417]	valid_0's l2: 0.201638
[418]	valid_0's l2: 0.201661
[419]	valid_0's l2: 0.201664
[420]	valid_0's l2: 0.201655
[421]	valid_0's l2: 0.201653
[422]	valid_0's l2: 0.201625
[423]	valid_0's l2: 0.201637
[424]	valid_0's l2: 0.201636
[425]	valid_0's l2: 0.201627
[426]	valid_0's l2: 0.201619
[427]	valid_0's l2: 0.201597
[428]	valid_0's l2: 0.20159
[429]	valid_0's l2: 0.20153
[430]	valid_0's l2: 0.201528
[431]	valid_0's l2: 0.201523
[432]	valid_0's l2: 0.201516
[433]	valid_0's l2: 

[81]	valid_0's l2: 0.210767
[82]	valid_0's l2: 0.210531
[83]	valid_0's l2: 0.21027
[84]	valid_0's l2: 0.21007
[85]	valid_0's l2: 0.20987
[86]	valid_0's l2: 0.209686
[87]	valid_0's l2: 0.209377
[88]	valid_0's l2: 0.209252
[89]	valid_0's l2: 0.209066
[90]	valid_0's l2: 0.208796
[91]	valid_0's l2: 0.208546
[92]	valid_0's l2: 0.208374
[93]	valid_0's l2: 0.208181
[94]	valid_0's l2: 0.207977
[95]	valid_0's l2: 0.207803
[96]	valid_0's l2: 0.207621
[97]	valid_0's l2: 0.207385
[98]	valid_0's l2: 0.20715
[99]	valid_0's l2: 0.206974
[100]	valid_0's l2: 0.206799
[101]	valid_0's l2: 0.206652
[102]	valid_0's l2: 0.206487
[103]	valid_0's l2: 0.206272
[104]	valid_0's l2: 0.206081
[105]	valid_0's l2: 0.205968
[106]	valid_0's l2: 0.205722
[107]	valid_0's l2: 0.205589
[108]	valid_0's l2: 0.20548
[109]	valid_0's l2: 0.205371
[110]	valid_0's l2: 0.205267
[111]	valid_0's l2: 0.20515
[112]	valid_0's l2: 0.204973
[113]	valid_0's l2: 0.204751
[114]	valid_0's l2: 0.204557
[115]	valid_0's l2: 0.204357
[116]	vali

[495]	valid_0's l2: 0.18809
[496]	valid_0's l2: 0.188038
[497]	valid_0's l2: 0.188003
[498]	valid_0's l2: 0.18803
[499]	valid_0's l2: 0.188027
[500]	valid_0's l2: 0.188019
[501]	valid_0's l2: 0.188058
[502]	valid_0's l2: 0.188002
[503]	valid_0's l2: 0.188016
[504]	valid_0's l2: 0.187989
[505]	valid_0's l2: 0.187988
[506]	valid_0's l2: 0.18795
[507]	valid_0's l2: 0.187965
[508]	valid_0's l2: 0.187945
[509]	valid_0's l2: 0.187876
[510]	valid_0's l2: 0.187895
[511]	valid_0's l2: 0.18791
[512]	valid_0's l2: 0.187949
[513]	valid_0's l2: 0.187952
[514]	valid_0's l2: 0.187927
[515]	valid_0's l2: 0.187874
[516]	valid_0's l2: 0.18788
[517]	valid_0's l2: 0.187872
[518]	valid_0's l2: 0.187844
[519]	valid_0's l2: 0.187858
[520]	valid_0's l2: 0.187841
[521]	valid_0's l2: 0.187821
[522]	valid_0's l2: 0.187788
[523]	valid_0's l2: 0.187759
[524]	valid_0's l2: 0.18782
[525]	valid_0's l2: 0.187793
[526]	valid_0's l2: 0.18775
[527]	valid_0's l2: 0.187791
[528]	valid_0's l2: 0.187784
[529]	valid_0's l2: 0

[783]	valid_0's l2: 0.186072
[784]	valid_0's l2: 0.186073
[785]	valid_0's l2: 0.186002
[786]	valid_0's l2: 0.185937
[787]	valid_0's l2: 0.185933
[788]	valid_0's l2: 0.185926
[789]	valid_0's l2: 0.185906
[790]	valid_0's l2: 0.185932
[791]	valid_0's l2: 0.185965
[792]	valid_0's l2: 0.186013
[793]	valid_0's l2: 0.185965
[794]	valid_0's l2: 0.18602
[795]	valid_0's l2: 0.18595
[796]	valid_0's l2: 0.185945
[797]	valid_0's l2: 0.185947
[798]	valid_0's l2: 0.185976
[799]	valid_0's l2: 0.186045
[800]	valid_0's l2: 0.186005
[801]	valid_0's l2: 0.186005
[802]	valid_0's l2: 0.186015
[803]	valid_0's l2: 0.185997
[804]	valid_0's l2: 0.186026
[805]	valid_0's l2: 0.186021
[806]	valid_0's l2: 0.186053
[807]	valid_0's l2: 0.186018
[808]	valid_0's l2: 0.186008
[809]	valid_0's l2: 0.186048
[810]	valid_0's l2: 0.186039
[811]	valid_0's l2: 0.186069
[812]	valid_0's l2: 0.186044
[813]	valid_0's l2: 0.186054
[814]	valid_0's l2: 0.186107
[815]	valid_0's l2: 0.18609
[816]	valid_0's l2: 0.186132
[817]	valid_0's l

[269]	valid_0's l2: 0.204923
[270]	valid_0's l2: 0.204878
[271]	valid_0's l2: 0.204822
[272]	valid_0's l2: 0.204739
[273]	valid_0's l2: 0.204708
[274]	valid_0's l2: 0.204658
[275]	valid_0's l2: 0.20461
[276]	valid_0's l2: 0.204563
[277]	valid_0's l2: 0.204514
[278]	valid_0's l2: 0.204461
[279]	valid_0's l2: 0.204407
[280]	valid_0's l2: 0.204356
[281]	valid_0's l2: 0.204283
[282]	valid_0's l2: 0.204244
[283]	valid_0's l2: 0.204168
[284]	valid_0's l2: 0.204131
[285]	valid_0's l2: 0.204087
[286]	valid_0's l2: 0.204057
[287]	valid_0's l2: 0.20402
[288]	valid_0's l2: 0.20395
[289]	valid_0's l2: 0.203909
[290]	valid_0's l2: 0.20386
[291]	valid_0's l2: 0.203825
[292]	valid_0's l2: 0.203785
[293]	valid_0's l2: 0.203743
[294]	valid_0's l2: 0.203704
[295]	valid_0's l2: 0.203661
[296]	valid_0's l2: 0.203591
[297]	valid_0's l2: 0.203514
[298]	valid_0's l2: 0.203478
[299]	valid_0's l2: 0.203452
[300]	valid_0's l2: 0.203418
[301]	valid_0's l2: 0.203367
[302]	valid_0's l2: 0.203345
[303]	valid_0's l2

[902]	valid_0's l2: 0.196301
[903]	valid_0's l2: 0.196311
[904]	valid_0's l2: 0.196316
[905]	valid_0's l2: 0.196326
[906]	valid_0's l2: 0.196324
[907]	valid_0's l2: 0.196298
[908]	valid_0's l2: 0.19632
[909]	valid_0's l2: 0.196337
[910]	valid_0's l2: 0.196327
[911]	valid_0's l2: 0.196323
[912]	valid_0's l2: 0.196317
[913]	valid_0's l2: 0.196314
[914]	valid_0's l2: 0.196322
[915]	valid_0's l2: 0.196311
[916]	valid_0's l2: 0.196309
[917]	valid_0's l2: 0.196256
[918]	valid_0's l2: 0.196241
[919]	valid_0's l2: 0.196235
[920]	valid_0's l2: 0.196291
[921]	valid_0's l2: 0.196272
[922]	valid_0's l2: 0.196256
[923]	valid_0's l2: 0.196203
[924]	valid_0's l2: 0.1962
[925]	valid_0's l2: 0.196188
[926]	valid_0's l2: 0.196189
[927]	valid_0's l2: 0.19623
[928]	valid_0's l2: 0.196194
[929]	valid_0's l2: 0.196199
[930]	valid_0's l2: 0.196198
[931]	valid_0's l2: 0.196215
[932]	valid_0's l2: 0.196163
[933]	valid_0's l2: 0.196194
[934]	valid_0's l2: 0.196169
[935]	valid_0's l2: 0.196163
[936]	valid_0's l2

[268]	valid_0's l2: 0.192242
[269]	valid_0's l2: 0.192248
[270]	valid_0's l2: 0.192261
[271]	valid_0's l2: 0.192164
[272]	valid_0's l2: 0.192155
[273]	valid_0's l2: 0.192115
[274]	valid_0's l2: 0.191992
[275]	valid_0's l2: 0.191921
[276]	valid_0's l2: 0.191815
[277]	valid_0's l2: 0.191741
[278]	valid_0's l2: 0.191649
[279]	valid_0's l2: 0.191681
[280]	valid_0's l2: 0.191591
[281]	valid_0's l2: 0.191595
[282]	valid_0's l2: 0.191537
[283]	valid_0's l2: 0.191527
[284]	valid_0's l2: 0.191538
[285]	valid_0's l2: 0.191503
[286]	valid_0's l2: 0.191497
[287]	valid_0's l2: 0.191372
[288]	valid_0's l2: 0.191296
[289]	valid_0's l2: 0.191183
[290]	valid_0's l2: 0.191118
[291]	valid_0's l2: 0.191046
[292]	valid_0's l2: 0.191101
[293]	valid_0's l2: 0.191055
[294]	valid_0's l2: 0.191028
[295]	valid_0's l2: 0.190933
[296]	valid_0's l2: 0.1909
[297]	valid_0's l2: 0.190752
[298]	valid_0's l2: 0.190786
[299]	valid_0's l2: 0.19081
[300]	valid_0's l2: 0.190834
[301]	valid_0's l2: 0.19083
[302]	valid_0's l2

[607]	valid_0's l2: 0.18443
[608]	valid_0's l2: 0.184444
[609]	valid_0's l2: 0.184359
[610]	valid_0's l2: 0.18434
[611]	valid_0's l2: 0.184342
[612]	valid_0's l2: 0.184355
[613]	valid_0's l2: 0.18427
[614]	valid_0's l2: 0.184259
[615]	valid_0's l2: 0.184159
[616]	valid_0's l2: 0.184165
[617]	valid_0's l2: 0.184176
[618]	valid_0's l2: 0.184172
[619]	valid_0's l2: 0.184099
[620]	valid_0's l2: 0.184104
[621]	valid_0's l2: 0.184161
[622]	valid_0's l2: 0.184119
[623]	valid_0's l2: 0.184149
[624]	valid_0's l2: 0.184137
[625]	valid_0's l2: 0.184167
[626]	valid_0's l2: 0.184175
[627]	valid_0's l2: 0.184168
[628]	valid_0's l2: 0.184158
[629]	valid_0's l2: 0.18414
[630]	valid_0's l2: 0.184152
[631]	valid_0's l2: 0.184069
[632]	valid_0's l2: 0.184013
[633]	valid_0's l2: 0.183988
[634]	valid_0's l2: 0.183971
[635]	valid_0's l2: 0.183965
[636]	valid_0's l2: 0.183926
[637]	valid_0's l2: 0.183877
[638]	valid_0's l2: 0.183847
[639]	valid_0's l2: 0.18384
[640]	valid_0's l2: 0.183831
[641]	valid_0's l2:

[982]	valid_0's l2: 0.182501
[983]	valid_0's l2: 0.182511
[984]	valid_0's l2: 0.182547
[985]	valid_0's l2: 0.182536
[986]	valid_0's l2: 0.182542
[987]	valid_0's l2: 0.182555
[988]	valid_0's l2: 0.182562
[989]	valid_0's l2: 0.182608
[990]	valid_0's l2: 0.182559
[991]	valid_0's l2: 0.182552
[992]	valid_0's l2: 0.182591
[993]	valid_0's l2: 0.182621
[994]	valid_0's l2: 0.182579
[995]	valid_0's l2: 0.18255
[996]	valid_0's l2: 0.182546
[997]	valid_0's l2: 0.182564
[998]	valid_0's l2: 0.182583
[999]	valid_0's l2: 0.182608
[1000]	valid_0's l2: 0.182606
[1001]	valid_0's l2: 0.182576
[1002]	valid_0's l2: 0.182596
[1003]	valid_0's l2: 0.182587
[1004]	valid_0's l2: 0.182605
[1005]	valid_0's l2: 0.182605
[1006]	valid_0's l2: 0.182569
[1007]	valid_0's l2: 0.182583
[1008]	valid_0's l2: 0.182546
[1009]	valid_0's l2: 0.182516
[1010]	valid_0's l2: 0.18255
[1011]	valid_0's l2: 0.182547
[1012]	valid_0's l2: 0.182615
[1013]	valid_0's l2: 0.182609
[1014]	valid_0's l2: 0.182665
[1015]	valid_0's l2: 0.182634


[259]	valid_0's l2: 0.204763
[260]	valid_0's l2: 0.204704
[261]	valid_0's l2: 0.204674
[262]	valid_0's l2: 0.204635
[263]	valid_0's l2: 0.204579
[264]	valid_0's l2: 0.204512
[265]	valid_0's l2: 0.204436
[266]	valid_0's l2: 0.204353
[267]	valid_0's l2: 0.204293
[268]	valid_0's l2: 0.204234
[269]	valid_0's l2: 0.204167
[270]	valid_0's l2: 0.204088
[271]	valid_0's l2: 0.204057
[272]	valid_0's l2: 0.203974
[273]	valid_0's l2: 0.203946
[274]	valid_0's l2: 0.203888
[275]	valid_0's l2: 0.203814
[276]	valid_0's l2: 0.203746
[277]	valid_0's l2: 0.20368
[278]	valid_0's l2: 0.203623
[279]	valid_0's l2: 0.203598
[280]	valid_0's l2: 0.203529
[281]	valid_0's l2: 0.203483
[282]	valid_0's l2: 0.203419
[283]	valid_0's l2: 0.203383
[284]	valid_0's l2: 0.203325
[285]	valid_0's l2: 0.203322
[286]	valid_0's l2: 0.203269
[287]	valid_0's l2: 0.203206
[288]	valid_0's l2: 0.203183
[289]	valid_0's l2: 0.203135
[290]	valid_0's l2: 0.203072
[291]	valid_0's l2: 0.203044
[292]	valid_0's l2: 0.202993
[293]	valid_0's

[578]	valid_0's l2: 0.197758
[579]	valid_0's l2: 0.197766
[580]	valid_0's l2: 0.197722
[581]	valid_0's l2: 0.19768
[582]	valid_0's l2: 0.197684
[583]	valid_0's l2: 0.197691
[584]	valid_0's l2: 0.197692
[585]	valid_0's l2: 0.197699
[586]	valid_0's l2: 0.197694
[587]	valid_0's l2: 0.197627
[588]	valid_0's l2: 0.197633
[589]	valid_0's l2: 0.197653
[590]	valid_0's l2: 0.197657
[591]	valid_0's l2: 0.197653
[592]	valid_0's l2: 0.197688
[593]	valid_0's l2: 0.197727
[594]	valid_0's l2: 0.197762
[595]	valid_0's l2: 0.19775
[596]	valid_0's l2: 0.197741
[597]	valid_0's l2: 0.197692
[598]	valid_0's l2: 0.197704
[599]	valid_0's l2: 0.197701
[600]	valid_0's l2: 0.197706
[601]	valid_0's l2: 0.197714
[602]	valid_0's l2: 0.197683
[603]	valid_0's l2: 0.197643
[604]	valid_0's l2: 0.197607
[605]	valid_0's l2: 0.197598
[606]	valid_0's l2: 0.197603
[607]	valid_0's l2: 0.197561
[608]	valid_0's l2: 0.197555
[609]	valid_0's l2: 0.197532
[610]	valid_0's l2: 0.197534
[611]	valid_0's l2: 0.197538
[612]	valid_0's 

[994]	valid_0's l2: 0.195021
[995]	valid_0's l2: 0.195032
[996]	valid_0's l2: 0.195009
[997]	valid_0's l2: 0.195043
[998]	valid_0's l2: 0.195056
[999]	valid_0's l2: 0.195071
[1000]	valid_0's l2: 0.195081
[1001]	valid_0's l2: 0.195098
[1002]	valid_0's l2: 0.195096
[1003]	valid_0's l2: 0.19506
[1004]	valid_0's l2: 0.195082
[1005]	valid_0's l2: 0.195098
[1006]	valid_0's l2: 0.195087
[1007]	valid_0's l2: 0.195101
[1008]	valid_0's l2: 0.195078
[1009]	valid_0's l2: 0.195044
[1010]	valid_0's l2: 0.194991
[1011]	valid_0's l2: 0.194969
[1012]	valid_0's l2: 0.194929
[1013]	valid_0's l2: 0.194897
[1014]	valid_0's l2: 0.194891
[1015]	valid_0's l2: 0.194917
[1016]	valid_0's l2: 0.194898
[1017]	valid_0's l2: 0.194907
[1018]	valid_0's l2: 0.194918
[1019]	valid_0's l2: 0.194897
[1020]	valid_0's l2: 0.194875
[1021]	valid_0's l2: 0.194872
[1022]	valid_0's l2: 0.194887
[1023]	valid_0's l2: 0.194841
[1024]	valid_0's l2: 0.194825
[1025]	valid_0's l2: 0.194813
[1026]	valid_0's l2: 0.194844
[1027]	valid_0's 

[200]	train-auc:0.999382
[400]	train-auc:0.999773
[600]	train-auc:0.999773
[800]	train-auc:0.999773
lgb1第2次得分:0.723404255319149
lgb2第2次得分:0.644808743169399
xgb1 第2次得分:0.6740331491712708
融合后第2次得分:0.6740331491712708
第3次训练...
[1]	valid_0's l2: 0.249511
Training until validation scores don't improve for 100 rounds.
[2]	valid_0's l2: 0.248719
[3]	valid_0's l2: 0.247848
[4]	valid_0's l2: 0.24753
[5]	valid_0's l2: 0.246725
[6]	valid_0's l2: 0.245827
[7]	valid_0's l2: 0.245158
[8]	valid_0's l2: 0.244328
[9]	valid_0's l2: 0.243884
[10]	valid_0's l2: 0.243447
[11]	valid_0's l2: 0.242601
[12]	valid_0's l2: 0.24184
[13]	valid_0's l2: 0.241088
[14]	valid_0's l2: 0.240514
[15]	valid_0's l2: 0.239786
[16]	valid_0's l2: 0.239387
[17]	valid_0's l2: 0.238785
[18]	valid_0's l2: 0.238183
[19]	valid_0's l2: 0.237503
[20]	valid_0's l2: 0.2371
[21]	valid_0's l2: 0.236554
[22]	valid_0's l2: 0.235892
[23]	valid_0's l2: 0.235414
[24]	valid_0's l2: 0.234804
[25]	valid_0's l2: 0.234426
[26]	valid_0's l2: 0.234103

[535]	valid_0's l2: 0.192264
[536]	valid_0's l2: 0.192302
[537]	valid_0's l2: 0.192239
[538]	valid_0's l2: 0.192166
[539]	valid_0's l2: 0.192086
[540]	valid_0's l2: 0.192067
[541]	valid_0's l2: 0.192044
[542]	valid_0's l2: 0.192043
[543]	valid_0's l2: 0.19209
[544]	valid_0's l2: 0.192127
[545]	valid_0's l2: 0.192128
[546]	valid_0's l2: 0.192052
[547]	valid_0's l2: 0.192128
[548]	valid_0's l2: 0.19209
[549]	valid_0's l2: 0.192103
[550]	valid_0's l2: 0.192126
Early stopping, best iteration is:
[450]	valid_0's l2: 0.191794
[1]	valid_0's l2: 0.249778
Training until validation scores don't improve for 100 rounds.
[2]	valid_0's l2: 0.249396
[3]	valid_0's l2: 0.248968
[4]	valid_0's l2: 0.248829
[5]	valid_0's l2: 0.248411
[6]	valid_0's l2: 0.247933
[7]	valid_0's l2: 0.247587
[8]	valid_0's l2: 0.247151
[9]	valid_0's l2: 0.24696
[10]	valid_0's l2: 0.246743
[11]	valid_0's l2: 0.246301
[12]	valid_0's l2: 0.245902
[13]	valid_0's l2: 0.245512
[14]	valid_0's l2: 0.245273
[15]	valid_0's l2: 0.244884
[

[313]	valid_0's l2: 0.205637
[314]	valid_0's l2: 0.205584
[315]	valid_0's l2: 0.205561
[316]	valid_0's l2: 0.205569
[317]	valid_0's l2: 0.205525
[318]	valid_0's l2: 0.205457
[319]	valid_0's l2: 0.205463
[320]	valid_0's l2: 0.205408
[321]	valid_0's l2: 0.205357
[322]	valid_0's l2: 0.205372
[323]	valid_0's l2: 0.205347
[324]	valid_0's l2: 0.205304
[325]	valid_0's l2: 0.205239
[326]	valid_0's l2: 0.205202
[327]	valid_0's l2: 0.20516
[328]	valid_0's l2: 0.20513
[329]	valid_0's l2: 0.205039
[330]	valid_0's l2: 0.205034
[331]	valid_0's l2: 0.204993
[332]	valid_0's l2: 0.20499
[333]	valid_0's l2: 0.204906
[334]	valid_0's l2: 0.204854
[335]	valid_0's l2: 0.204747
[336]	valid_0's l2: 0.204756
[337]	valid_0's l2: 0.204721
[338]	valid_0's l2: 0.204667
[339]	valid_0's l2: 0.204632
[340]	valid_0's l2: 0.204643
[341]	valid_0's l2: 0.20461
[342]	valid_0's l2: 0.204619
[343]	valid_0's l2: 0.204598
[344]	valid_0's l2: 0.204519
[345]	valid_0's l2: 0.204486
[346]	valid_0's l2: 0.204496
[347]	valid_0's l2

[770]	valid_0's l2: 0.197764
[771]	valid_0's l2: 0.197766
[772]	valid_0's l2: 0.197743
[773]	valid_0's l2: 0.197715
[774]	valid_0's l2: 0.197762
[775]	valid_0's l2: 0.197743
[776]	valid_0's l2: 0.19769
[777]	valid_0's l2: 0.197638
[778]	valid_0's l2: 0.197616
[779]	valid_0's l2: 0.197598
[780]	valid_0's l2: 0.197613
[781]	valid_0's l2: 0.197631
[782]	valid_0's l2: 0.19759
[783]	valid_0's l2: 0.197596
[784]	valid_0's l2: 0.197577
[785]	valid_0's l2: 0.197584
[786]	valid_0's l2: 0.197611
[787]	valid_0's l2: 0.197559
[788]	valid_0's l2: 0.197554
[789]	valid_0's l2: 0.197593
[790]	valid_0's l2: 0.197608
[791]	valid_0's l2: 0.197566
[792]	valid_0's l2: 0.197537
[793]	valid_0's l2: 0.197514
[794]	valid_0's l2: 0.19751
[795]	valid_0's l2: 0.197469
[796]	valid_0's l2: 0.197417
[797]	valid_0's l2: 0.197392
[798]	valid_0's l2: 0.197396
[799]	valid_0's l2: 0.197444
[800]	valid_0's l2: 0.197451
[801]	valid_0's l2: 0.197377
[802]	valid_0's l2: 0.197368
[803]	valid_0's l2: 0.197339
[804]	valid_0's l

[1105]	valid_0's l2: 0.194567
[1106]	valid_0's l2: 0.194573
[1107]	valid_0's l2: 0.194588
[1108]	valid_0's l2: 0.194597
[1109]	valid_0's l2: 0.194562
[1110]	valid_0's l2: 0.194586
[1111]	valid_0's l2: 0.194629
[1112]	valid_0's l2: 0.194636
[1113]	valid_0's l2: 0.194608
[1114]	valid_0's l2: 0.194629
[1115]	valid_0's l2: 0.194622
[1116]	valid_0's l2: 0.194614
[1117]	valid_0's l2: 0.194615
[1118]	valid_0's l2: 0.194653
[1119]	valid_0's l2: 0.19465
[1120]	valid_0's l2: 0.194652
[1121]	valid_0's l2: 0.194673
[1122]	valid_0's l2: 0.194705
[1123]	valid_0's l2: 0.194665
[1124]	valid_0's l2: 0.194638
[1125]	valid_0's l2: 0.19466
[1126]	valid_0's l2: 0.194699
[1127]	valid_0's l2: 0.194666
[1128]	valid_0's l2: 0.194632
[1129]	valid_0's l2: 0.194645
[1130]	valid_0's l2: 0.194683
[1131]	valid_0's l2: 0.19471
[1132]	valid_0's l2: 0.194703
[1133]	valid_0's l2: 0.194742
[1134]	valid_0's l2: 0.194739
[1135]	valid_0's l2: 0.194737
[1136]	valid_0's l2: 0.194723
[1137]	valid_0's l2: 0.194719
[1138]	valid_

[96]	valid_0's l2: 0.210793
[97]	valid_0's l2: 0.210725
[98]	valid_0's l2: 0.210421
[99]	valid_0's l2: 0.210166
[100]	valid_0's l2: 0.210066
[101]	valid_0's l2: 0.210034
[102]	valid_0's l2: 0.20977
[103]	valid_0's l2: 0.20965
[104]	valid_0's l2: 0.209616
[105]	valid_0's l2: 0.209573
[106]	valid_0's l2: 0.209323
[107]	valid_0's l2: 0.209282
[108]	valid_0's l2: 0.209054
[109]	valid_0's l2: 0.208991
[110]	valid_0's l2: 0.208796
[111]	valid_0's l2: 0.208559
[112]	valid_0's l2: 0.208467
[113]	valid_0's l2: 0.208236
[114]	valid_0's l2: 0.208054
[115]	valid_0's l2: 0.207964
[116]	valid_0's l2: 0.207786
[117]	valid_0's l2: 0.207528
[118]	valid_0's l2: 0.20733
[119]	valid_0's l2: 0.207278
[120]	valid_0's l2: 0.207101
[121]	valid_0's l2: 0.206911
[122]	valid_0's l2: 0.206702
[123]	valid_0's l2: 0.206518
[124]	valid_0's l2: 0.206321
[125]	valid_0's l2: 0.20617
[126]	valid_0's l2: 0.206102
[127]	valid_0's l2: 0.205856
[128]	valid_0's l2: 0.205867
[129]	valid_0's l2: 0.205602
[130]	valid_0's l2: 0.

[1]	valid_0's l2: 0.249742
Training until validation scores don't improve for 100 rounds.
[2]	valid_0's l2: 0.249254
[3]	valid_0's l2: 0.248807
[4]	valid_0's l2: 0.248361
[5]	valid_0's l2: 0.247858
[6]	valid_0's l2: 0.247359
[7]	valid_0's l2: 0.246865
[8]	valid_0's l2: 0.246733
[9]	valid_0's l2: 0.246246
[10]	valid_0's l2: 0.245822
[11]	valid_0's l2: 0.245369
[12]	valid_0's l2: 0.245221
[13]	valid_0's l2: 0.244758
[14]	valid_0's l2: 0.244343
[15]	valid_0's l2: 0.243897
[16]	valid_0's l2: 0.24349
[17]	valid_0's l2: 0.243272
[18]	valid_0's l2: 0.242878
[19]	valid_0's l2: 0.242486
[20]	valid_0's l2: 0.242274
[21]	valid_0's l2: 0.241846
[22]	valid_0's l2: 0.24141
[23]	valid_0's l2: 0.240991
[24]	valid_0's l2: 0.240627
[25]	valid_0's l2: 0.240197
[26]	valid_0's l2: 0.239841
[27]	valid_0's l2: 0.239675
[28]	valid_0's l2: 0.239302
[29]	valid_0's l2: 0.238906
[30]	valid_0's l2: 0.238781
[31]	valid_0's l2: 0.238709
[32]	valid_0's l2: 0.238585
[33]	valid_0's l2: 0.238456
[34]	valid_0's l2: 0.238

[384]	valid_0's l2: 0.199429
[385]	valid_0's l2: 0.199461
[386]	valid_0's l2: 0.199434
[387]	valid_0's l2: 0.19944
[388]	valid_0's l2: 0.199438
[389]	valid_0's l2: 0.199411
[390]	valid_0's l2: 0.199382
[391]	valid_0's l2: 0.199391
[392]	valid_0's l2: 0.199296
[393]	valid_0's l2: 0.199239
[394]	valid_0's l2: 0.199242
[395]	valid_0's l2: 0.199209
[396]	valid_0's l2: 0.199231
[397]	valid_0's l2: 0.199206
[398]	valid_0's l2: 0.199136
[399]	valid_0's l2: 0.199081
[400]	valid_0's l2: 0.19911
[401]	valid_0's l2: 0.199091
[402]	valid_0's l2: 0.199065
[403]	valid_0's l2: 0.198973
[404]	valid_0's l2: 0.199007
[405]	valid_0's l2: 0.199015
[406]	valid_0's l2: 0.198991
[407]	valid_0's l2: 0.19897
[408]	valid_0's l2: 0.198952
[409]	valid_0's l2: 0.198957
[410]	valid_0's l2: 0.198922
[411]	valid_0's l2: 0.198886
[412]	valid_0's l2: 0.19887
[413]	valid_0's l2: 0.198786
[414]	valid_0's l2: 0.19879
[415]	valid_0's l2: 0.198775
[416]	valid_0's l2: 0.198709
[417]	valid_0's l2: 0.198658
[418]	valid_0's l2:

In [10]:
# xgboost
clf_xgb_params1 = {
    'booster': 'gbtree',
    'eval_metric': 'auc',
    'objective': 'binary:logistic',
    'max_depth': 9,
    'eta': 0.02,
    'min_child_weight': 4,
    'colsample': 0.8,
    'gamma': 2,
    'n_thread': 4,
    'silent': 1
}

clf_xgb_params2 = {
    'booster':'gbtree',
    'eval_metric': 'auc',
    'objective':'binary:logistic',
    'max_depth': 8,
    'eta':0.01,
    'min_child_weight':3,
    'colsample':0.8,
    'scale_pos_weight':1,
    'gamma':1,
    'n_thread':4,
    'silent':1
 }

##lgb
clf_lgb_params1 = {
    'learning_rate': 0.01,
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': 'mse',
    'sub_feature': 0.7,
    'num_leaves': 60,
    'colsample_bytree': 0.7,
    'feature_fraction': 0.7,
    'min_data': 100,
    'min_hessian': 1,
    'verbose': -1,
}

clf_lgb_params2 = {
    'application': 'binary',
    'objective': 'binary',
    'metric': 'auc',
    'is_unbalance': 'true',
    'boosting': 'gbdt',
    'num_leaves': 15,
    'feature_fraction': 0.5,
    'bagging_fraction': 0.5,
    'bagging_freq': 20,
    'learning_rate': 0.05,
    'verbose': -1
}

# K折交叉验证
print('开始CV 5折训练...')
t0 = time.time()
X_preds = np.zeros(X.shape[0])
kf = KFold(len(X), n_folds=5, shuffle=True,random_state=520)
for i, (train_index, valid_index) in enumerate(kf):
    print('第{}次训练...'.format(i))
    train_X, train_y = X.iloc[train_index], y.iloc[train_index]
    valid_X, valid_y = X.iloc[valid_index], y.iloc[valid_index]
    
    # xgboost
    xgb_train = xgb.DMatrix(train_X, train_y)
    xgb_valid = xgb.DMatrix(valid_X, valid_y)

    watchlist = [(xgb_train,'train')]
    clf_xgb_model1 = xgb.train(clf_xgb_params1, xgb_train, num_boost_round=1000,
                              verbose_eval=200, evals=watchlist)
#     clf_xgb_model2 = xgb.train(clf_xgb_params2, xgb_train, num_boost_round=1000,
#                           verbose_eval=200, evals=watchlist)
    
    xgb1_pred = np.where(clf_xgb_model1.predict(
                        xgb.DMatrix(valid_X),  ntree_limit=clf_xgb_model1.best_ntree_limit + 20)>0.5,1,0)
#     xgb2_pred = np.where(clf_xgb_model2.predict(xgb.DMatrix(valid_X))>0.5,1,0)
    
    # lightgbm
#     lgb_train = lgb.Dataset(train_X, train_y)
#     lgb_valid = lgb.Dataset(valid_X, valid_y)
    
#     clf_lgb_model1 = lgb.train(clf_lgb_params1, lgb_train, valid_sets = [lgb_valid],
#                        num_boost_round = 3000, early_stopping_rounds = 100)
#     clf_lgb_model2 = lgb.train(clf_lgb_params2, lgb_train, valid_sets = [lgb_valid],
#                        num_boost_round=3000, early_stopping_rounds=100)
    
#     lgb1_pred = np.where(clf_lgb_model1.predict(xgb.DMatrix(valid_X))>0.5, 1, 0)
#     lgb2_pred = np.where(clf_lgb_model2.predict(valid_X)>0.5, 1, 0) 
    
#     # 随机森林
#     rf_clf = RandomForestClassifier(n_estimators = 1000, max_depth=8, random_state = 521)
#     rf_clf.fit(train_X, train_y)
#     rf_pred = rf_clf.predict(valid_X)
    
#     # svm
#     svm_clf = SVC(kernel='rbf', C=1, random_state = 521,verbose = 1)
#     svm_clf.fit(train_X, train_y)
#     svm_pred = svm_clf.predict(valid_X)
    
    print('xgb1 第{}次得分:{}'.format(i, f1_score(valid_y, xgb1_pred)))
#     print('xgb2第{}次得分:{}'.format(i, f1_score(valid_y, xgb2_pred)))
#     print('lgb1第{}次得分:{}'.format(i, f1_score(valid_y, lgb1_pred)))
#     print('lgb2第{}次得分:{}'.format(i, f1_score(valid_y, lgb2_pred)))
#     print('rf第{}次得分:{}'.format(i, f1_score(valid_y, rf_pred)))
#     print('svm第{}次得分:{}'.format(i, f1_score(valid_y, svm_pred)))
    
    # 模型融合
#     valid_X_pred = 0.2 * xgb1_pred + 0.2 * xgb2_pred + 0.2 * lgb1_pred + 
#                    0.2 * lgb_pred + 0.2 * rf_pred + 0.2 * svm_pred
    valid_X_pred = 1 * xgb1_pred 
    valid_X_pred = np.where(np.array(valid_X_pred) > 0.5, 1, 0)
    
    print('融合后第{}次得分:{}'.format(i, f1_score(valid_y, valid_X_pred)))
    
    X_preds[valid_index] += valid_X_pred
#分类预测结果
print('线下得分：{}'.format(f1_score(y, X_preds)))
print('CV训练用时{}秒'.format(time.time() - t0))

开始CV 5折训练...
第0次训练...
[0]	train-auc:0.877161
[200]	train-auc:0.998589
[400]	train-auc:0.99941
[600]	train-auc:0.99941
[800]	train-auc:0.99941
xgb1 第0次得分:0.6742857142857142
融合后第0次得分:0.6742857142857142
第1次训练...
[0]	train-auc:0.876252
[200]	train-auc:0.999052
[400]	train-auc:0.99951
[600]	train-auc:0.99951
[800]	train-auc:0.99951
xgb1 第1次得分:0.7093023255813953
融合后第1次得分:0.7093023255813953
第2次训练...
[0]	train-auc:0.877574
[200]	train-auc:0.999382
[400]	train-auc:0.999773
[600]	train-auc:0.999773
[800]	train-auc:0.999773
xgb1 第2次得分:0.6740331491712708
融合后第2次得分:0.6740331491712708
第3次训练...
[0]	train-auc:0.876199
[200]	train-auc:0.999261
[400]	train-auc:0.999612
[600]	train-auc:0.999612
[800]	train-auc:0.999612
xgb1 第3次得分:0.6627906976744184
融合后第3次得分:0.6627906976744184
第4次训练...
[0]	train-auc:0.874692
[200]	train-auc:0.99943
[400]	train-auc:0.9998
[600]	train-auc:0.9998
[800]	train-auc:0.9998
xgb1 第4次得分:0.6012269938650306
融合后第4次得分:0.6012269938650306
线下得分：0.6651216685979142
CV训练用时19.82067346572876秒


In [14]:
pd.Series(clf_xgb_model1.get_fscore()).sort_values(ascending = False)

VAR00007        347
年龄              202
孕前BMI           152
TG              139
SNP37_2.0       129
LDLC            107
炎症              105
HDLC            103
心血管             103
AST             103
wbc             101
hsCRP           100
肾脏               88
SNP34_1.0        85
Lpa              75
ApoA1            74
Cr               69
SNP34_2.0        69
糖筛孕周             66
SNP20_0.0        64
ApoB             62
SNP48_2.0        59
孕前体重             58
SNP37_1.0        54
SNP28_2.0        49
身高               49
SNP37_3.0        46
SNP46_0.0        45
SNP49_2.0        44
收缩压              42
               ... 
SNP33_1.0         4
SNP16_2.0         4
SNP2_1            4
SNP19_2.0         3
SNP30_1.0         3
SNP25_2.0         3
SNP50_2.0         3
SNP48_3.0         3
SNP39_1.0         3
SNP52_2.0         3
SNP16_3.0         3
SNP15_1.0         2
SNP44_2.0         2
SNP26_2.0         2
SNP4_1.0          2
SNP24_2.0         2
DM家族史             2
SNP36_3.0         2
SNP3_2.0          2


In [15]:
# 测试集结果
xgb1_test_pred = np.where(clf_xgb_model1.predict(xgb.DMatrix(test_X), ntree_limit=clf_xgb_model1.best_ntree_limit + 20)>0.5,1,0)
lgb1_test_pred = np.where(clf_lgb_model1.predict(test_X, num_iteration=clf_lgb_model1.best_iteration + 20)>0.5,1,0)
lgb2_test_pred = np.where(clf_lgb_model2.predict(test_X, num_iteration=clf_lgb_model2.best_iteration + 20)>0.5,1,0)

final_pred = 0.333 * xgb1_test_pred + 0.333 * lgb1_test_pred + 0.333 * lgb2_test_pred 
final_pred = np.where(np.array(final_pred) > 0.5, 1, 0)

final_pred = pd.DataFrame({'final_pred' : final_pred})
final_pred.to_csv(data_path + r'sub{}.csv'.format(datetime.datetime.now().strftime('%Y%m%d_%H%M%S')), header=None,
                  index=False, float_format='%.4f')