In [227]:
import lightgbm as lgb
import numpy as np
import pandas as pd
from sklearn.metrics import precision_score, recall_score, mean_absolute_error, f1_score
import os
import json
import gc
from numba import jit
from tqdm import tqdm_notebook
from tqdm import tqdm
import catboost as cbt
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold, RepeatedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.kernel_ridge import KernelRidge
from scipy.signal import hilbert, hann, convolve
from scipy import stats
import scipy.spatial.distance as dist
from collections import Counter
from statistics import mode
import warnings
import math
from itertools import product
import ast

In [228]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
warnings.filterwarnings('ignore')

In [229]:
path = r'./data/'
train = pd.read_csv(filepath_or_buffer=path + 'train.csv')
test = pd.read_csv(filepath_or_buffer=path + 'test_noLabel.csv')

In [230]:
print(train.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1100 entries, 0 to 1099
Data columns (total 32 columns):
ID                          1100 non-null int64
Age                         1100 non-null int64
BusinessTravel              1100 non-null object
Department                  1100 non-null object
DistanceFromHome            1100 non-null int64
Education                   1100 non-null int64
EducationField              1100 non-null object
EmployeeNumber              1100 non-null int64
EnvironmentSatisfaction     1100 non-null int64
Gender                      1100 non-null object
JobInvolvement              1100 non-null int64
JobLevel                    1100 non-null int64
JobRole                     1100 non-null object
JobSatisfaction             1100 non-null int64
MaritalStatus               1100 non-null object
MonthlyIncome               1100 non-null int64
NumCompaniesWorked          1100 non-null int64
Over18                      1100 non-null object
OverTime              

In [231]:
train.describe()

Unnamed: 0,ID,Age,DistanceFromHome,Education,EmployeeNumber,EnvironmentSatisfaction,JobInvolvement,JobLevel,JobSatisfaction,MonthlyIncome,NumCompaniesWorked,PercentSalaryHike,PerformanceRating,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,Label
count,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0
mean,549.5,36.999091,9.427273,2.922727,1028.157273,2.725455,2.730909,2.054545,2.732727,6483.620909,2.683636,15.235455,3.152727,2.696364,80.0,0.788182,11.221818,2.807273,2.746364,7.011818,4.207273,2.226364,4.123636,0.161818
std,317.686953,9.03723,8.196694,1.022242,598.915204,1.098053,0.706366,1.107805,1.109731,4715.293419,2.510017,3.628571,0.359888,1.095356,0.0,0.843347,7.825548,1.291514,0.701121,6.223093,3.618115,3.31383,3.597996,0.368451
min,0.0,18.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1009.0,0.0,11.0,3.0,1.0,80.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
25%,274.75,30.0,2.0,2.0,504.25,2.0,2.0,1.0,2.0,2924.5,1.0,12.0,3.0,2.0,80.0,0.0,6.0,2.0,2.0,3.0,2.0,0.0,2.0,0.0
50%,549.5,36.0,7.0,3.0,1026.5,3.0,3.0,2.0,3.0,4857.0,2.0,14.0,3.0,3.0,80.0,1.0,10.0,3.0,3.0,5.0,3.0,1.0,3.0,0.0
75%,824.25,43.0,15.0,4.0,1556.5,4.0,3.0,3.0,4.0,8354.5,4.0,18.0,3.0,4.0,80.0,1.0,15.0,3.0,3.0,9.0,7.0,3.0,7.0,0.0
max,1099.0,60.0,29.0,5.0,2065.0,4.0,4.0,5.0,4.0,19999.0,9.0,25.0,4.0,4.0,80.0,3.0,40.0,6.0,4.0,37.0,18.0,15.0,17.0,1.0


In [232]:
# train.corr()

In [233]:
train.EnvironmentSatisfaction.value_counts()

4    338
3    337
1    215
2    210
Name: EnvironmentSatisfaction, dtype: int64

In [234]:
print(len(test.columns))

31


In [235]:
# data = pd.concat([train, test])
# print(len(data.columns))

In [236]:
test['Label'] = -1  # 新加了一列
print(len(test.columns))

32


In [237]:
data = train.append(test).reset_index(drop=True)

In [238]:
data.head()

Unnamed: 0,ID,Age,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EmployeeNumber,EnvironmentSatisfaction,Gender,JobInvolvement,JobLevel,JobRole,JobSatisfaction,MaritalStatus,MonthlyIncome,NumCompaniesWorked,Over18,OverTime,PercentSalaryHike,PerformanceRating,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,Label
0,0,37,Travel_Rarely,Research & Development,1,4,Life Sciences,77,1,Male,2,2,Manufacturing Director,3,Divorced,5993,1,Y,No,18,3,3,80,1,7,2,4,7,5,0,7,0
1,1,54,Travel_Frequently,Research & Development,1,4,Life Sciences,1245,4,Female,3,3,Manufacturing Director,3,Divorced,10502,7,Y,No,17,3,1,80,1,33,2,1,5,4,1,4,0
2,2,34,Travel_Frequently,Research & Development,7,3,Life Sciences,147,1,Male,1,2,Laboratory Technician,3,Single,6074,1,Y,Yes,24,4,4,80,0,9,3,3,9,7,0,6,1
3,3,39,Travel_Rarely,Research & Development,1,1,Life Sciences,1026,4,Female,2,4,Manufacturing Director,4,Married,12742,1,Y,No,16,3,3,80,1,21,3,3,21,6,11,8,0
4,4,28,Travel_Frequently,Research & Development,1,3,Medical,1111,1,Male,2,1,Laboratory Technician,2,Divorced,2596,1,Y,No,15,3,1,80,2,1,2,3,1,0,0,0,1


In [239]:
# 筛选出类别特征做Label Encoder

In [240]:
# cat_col = [i for i in data.select_dtypes(object).columns if i not in ['ID', 'Label']]
# for i in cat_col:
#     data[i] = data[i].astype('category')

In [241]:
cat_col = [i for i in data.select_dtypes(object).columns if i not in ['Label']]
for i in tqdm_notebook(cat_col):
    lbl = LabelEncoder()
    data[i] = lbl.fit_transform(data[i].astype(str))

HBox(children=(IntProgress(value=0, max=8), HTML(value='')))




In [242]:
# data.head()

In [243]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1450 entries, 0 to 1449
Data columns (total 32 columns):
ID                          1450 non-null int64
Age                         1450 non-null int64
BusinessTravel              1450 non-null int32
Department                  1450 non-null int32
DistanceFromHome            1450 non-null int64
Education                   1450 non-null int64
EducationField              1450 non-null int32
EmployeeNumber              1450 non-null int64
EnvironmentSatisfaction     1450 non-null int64
Gender                      1450 non-null int32
JobInvolvement              1450 non-null int64
JobLevel                    1450 non-null int64
JobRole                     1450 non-null int32
JobSatisfaction             1450 non-null int64
MaritalStatus               1450 non-null int32
MonthlyIncome               1450 non-null int64
NumCompaniesWorked          1450 non-null int64
Over18                      1450 non-null int32
OverTime                    1

In [244]:
feats = [i for i in data.columns if i not in ['ID', 'Label']]
feats

['Age',
 'BusinessTravel',
 'Department',
 'DistanceFromHome',
 'Education',
 'EducationField',
 'EmployeeNumber',
 'EnvironmentSatisfaction',
 'Gender',
 'JobInvolvement',
 'JobLevel',
 'JobRole',
 'JobSatisfaction',
 'MaritalStatus',
 'MonthlyIncome',
 'NumCompaniesWorked',
 'Over18',
 'OverTime',
 'PercentSalaryHike',
 'PerformanceRating',
 'RelationshipSatisfaction',
 'StandardHours',
 'StockOptionLevel',
 'TotalWorkingYears',
 'TrainingTimesLastYear',
 'WorkLifeBalance',
 'YearsAtCompany',
 'YearsInCurrentRole',
 'YearsSinceLastPromotion',
 'YearsWithCurrManager']

In [245]:
len(feats)

30

In [246]:
data[data['Label'] == -1][['ID']]

Unnamed: 0,ID
1100,1100
1101,1101
1102,1102
1103,1103
1104,1104
1105,1105
1106,1106
1107,1107
1108,1108
1109,1109


In [247]:
# 建模

In [248]:
model = lgb.LGBMClassifier(boosting_type='gbdt',
                           num_leaves=30,
                           reg_alpha=0,
                           reg_lambda=0.,
                           max_depth=-1,
                           n_estimators=1500,
                           objective='binary',
                           metric='auc',
                           subsample=0.95,
                           colsample_bytree=0.7,
                           subsample_freq=1,
                           learning_rate=0.02,
                           random_state=2019)

In [249]:
data.Label.value_counts()

 0    922
-1    350
 1    178
Name: Label, dtype: int64

In [250]:
# 5折交叉验证

In [255]:
n_splits = 5
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
train_x = data[data['Label'] != -1][feats]
train_y = data[data['Label'] != -1]['Label']
res = data[data['Label'] == -1][['ID']]
test_x = data[data['Label'] == -1][feats]
res['pred'] = 0

for train_idx, val_idx in kfold.split(train_x):
    model.random_state = model.random_state + 1
    train_x1 = train_x.loc[train_idx]
    train_y1 = train_y.loc[train_idx]
    test_x1 = train_x.loc[val_idx]
    test_y1 = train_y.loc[val_idx]
    model.fit(train_x1, train_y1, eval_set=[(train_x1, train_y1), (test_x1, test_y1)], eval_metric='auc', early_stopping_rounds=100)
    res['pred'] += model.predict_proba(test_x)[:, 1]
    
res['pred'] = res['pred'] / n_splits
res['Label'] = res['pred']
res.loc[res['Label'] >= 0.5] = 1
res.loc[res['Label'] <= 0.5] = 0

[1]	training's auc: 0.877812	valid_1's auc: 0.688254
Training until validation scores don't improve for 100 rounds
[2]	training's auc: 0.909488	valid_1's auc: 0.730222
[3]	training's auc: 0.916055	valid_1's auc: 0.743746
[4]	training's auc: 0.925617	valid_1's auc: 0.753016
[5]	training's auc: 0.932109	valid_1's auc: 0.761524
[6]	training's auc: 0.933061	valid_1's auc: 0.763429
[7]	training's auc: 0.936649	valid_1's auc: 0.761968
[8]	training's auc: 0.935954	valid_1's auc: 0.763746
[9]	training's auc: 0.937776	valid_1's auc: 0.76
[10]	training's auc: 0.937736	valid_1's auc: 0.76673
[11]	training's auc: 0.944444	valid_1's auc: 0.765778
[12]	training's auc: 0.945894	valid_1's auc: 0.761079
[13]	training's auc: 0.9484	valid_1's auc: 0.762222
[14]	training's auc: 0.948657	valid_1's auc: 0.765143
[15]	training's auc: 0.949422	valid_1's auc: 0.764762
[16]	training's auc: 0.951998	valid_1's auc: 0.768317
[17]	training's auc: 0.952995	valid_1's auc: 0.765524
[18]	training's auc: 0.953433	valid_

[151]	training's auc: 0.999316	valid_1's auc: 0.77981
[152]	training's auc: 0.999396	valid_1's auc: 0.778413
[153]	training's auc: 0.999446	valid_1's auc: 0.779302
[154]	training's auc: 0.999467	valid_1's auc: 0.778286
[155]	training's auc: 0.999507	valid_1's auc: 0.776381
[156]	training's auc: 0.999557	valid_1's auc: 0.775873
[157]	training's auc: 0.999557	valid_1's auc: 0.774603
[158]	training's auc: 0.999577	valid_1's auc: 0.774857
[159]	training's auc: 0.999607	valid_1's auc: 0.77473
[160]	training's auc: 0.999628	valid_1's auc: 0.775873
[161]	training's auc: 0.999668	valid_1's auc: 0.776127
[162]	training's auc: 0.999678	valid_1's auc: 0.776381
[163]	training's auc: 0.999688	valid_1's auc: 0.776508
[164]	training's auc: 0.999708	valid_1's auc: 0.777016
[165]	training's auc: 0.999708	valid_1's auc: 0.776508
[166]	training's auc: 0.999708	valid_1's auc: 0.776254
[167]	training's auc: 0.999708	valid_1's auc: 0.776
[168]	training's auc: 0.999708	valid_1's auc: 0.775619
[169]	training'

[108]	training's auc: 0.995353	valid_1's auc: 0.866093
[109]	training's auc: 0.995582	valid_1's auc: 0.86564
[110]	training's auc: 0.995782	valid_1's auc: 0.864583
[111]	training's auc: 0.995744	valid_1's auc: 0.865338
[112]	training's auc: 0.99584	valid_1's auc: 0.864885
[113]	training's auc: 0.995954	valid_1's auc: 0.865791
[114]	training's auc: 0.996059	valid_1's auc: 0.865338
[115]	training's auc: 0.996126	valid_1's auc: 0.866093
[116]	training's auc: 0.996298	valid_1's auc: 0.867754
[117]	training's auc: 0.996307	valid_1's auc: 0.867301
[118]	training's auc: 0.99646	valid_1's auc: 0.866697
[119]	training's auc: 0.996584	valid_1's auc: 0.866093
[120]	training's auc: 0.996794	valid_1's auc: 0.867301
[121]	training's auc: 0.996985	valid_1's auc: 0.868207
[122]	training's auc: 0.99708	valid_1's auc: 0.866848
[123]	training's auc: 0.997242	valid_1's auc: 0.868961
[124]	training's auc: 0.997261	valid_1's auc: 0.869414
[125]	training's auc: 0.997462	valid_1's auc: 0.869867
[126]	training

[260]	training's auc: 1	valid_1's auc: 0.889946
[261]	training's auc: 1	valid_1's auc: 0.890248
[262]	training's auc: 1	valid_1's auc: 0.8907
[263]	training's auc: 1	valid_1's auc: 0.891153
[264]	training's auc: 1	valid_1's auc: 0.890248
[265]	training's auc: 1	valid_1's auc: 0.889795
[266]	training's auc: 1	valid_1's auc: 0.890851
[267]	training's auc: 1	valid_1's auc: 0.891002
[268]	training's auc: 1	valid_1's auc: 0.890399
[269]	training's auc: 1	valid_1's auc: 0.891002
[270]	training's auc: 1	valid_1's auc: 0.890851
[271]	training's auc: 1	valid_1's auc: 0.890399
[272]	training's auc: 1	valid_1's auc: 0.891153
[273]	training's auc: 1	valid_1's auc: 0.891757
[274]	training's auc: 1	valid_1's auc: 0.892361
[275]	training's auc: 1	valid_1's auc: 0.892361
[276]	training's auc: 1	valid_1's auc: 0.892814
[277]	training's auc: 1	valid_1's auc: 0.891606
[278]	training's auc: 1	valid_1's auc: 0.890851
[279]	training's auc: 1	valid_1's auc: 0.8907
[280]	training's auc: 1	valid_1's auc: 0.890

[430]	training's auc: 1	valid_1's auc: 0.897192
[431]	training's auc: 1	valid_1's auc: 0.897041
[432]	training's auc: 1	valid_1's auc: 0.897343
[433]	training's auc: 1	valid_1's auc: 0.897041
[434]	training's auc: 1	valid_1's auc: 0.89689
[435]	training's auc: 1	valid_1's auc: 0.897192
[436]	training's auc: 1	valid_1's auc: 0.897645
[437]	training's auc: 1	valid_1's auc: 0.897947
[438]	training's auc: 1	valid_1's auc: 0.8984
[439]	training's auc: 1	valid_1's auc: 0.897796
[440]	training's auc: 1	valid_1's auc: 0.897796
[441]	training's auc: 1	valid_1's auc: 0.897494
[442]	training's auc: 1	valid_1's auc: 0.897645
[443]	training's auc: 1	valid_1's auc: 0.897796
[444]	training's auc: 1	valid_1's auc: 0.898551
[445]	training's auc: 1	valid_1's auc: 0.8984
[446]	training's auc: 1	valid_1's auc: 0.898098
[447]	training's auc: 1	valid_1's auc: 0.898098
[448]	training's auc: 1	valid_1's auc: 0.897947
[449]	training's auc: 1	valid_1's auc: 0.897796
[450]	training's auc: 1	valid_1's auc: 0.8980

[601]	training's auc: 1	valid_1's auc: 0.899909
[602]	training's auc: 1	valid_1's auc: 0.90006
[603]	training's auc: 1	valid_1's auc: 0.900513
[604]	training's auc: 1	valid_1's auc: 0.899909
[605]	training's auc: 1	valid_1's auc: 0.900211
[606]	training's auc: 1	valid_1's auc: 0.900211
[607]	training's auc: 1	valid_1's auc: 0.900513
[608]	training's auc: 1	valid_1's auc: 0.900211
[609]	training's auc: 1	valid_1's auc: 0.900513
[610]	training's auc: 1	valid_1's auc: 0.900362
[611]	training's auc: 1	valid_1's auc: 0.899909
[612]	training's auc: 1	valid_1's auc: 0.899909
[613]	training's auc: 1	valid_1's auc: 0.90006
[614]	training's auc: 1	valid_1's auc: 0.90006
[615]	training's auc: 1	valid_1's auc: 0.900211
[616]	training's auc: 1	valid_1's auc: 0.899457
[617]	training's auc: 1	valid_1's auc: 0.899758
[618]	training's auc: 1	valid_1's auc: 0.899607
[619]	training's auc: 1	valid_1's auc: 0.900513
[620]	training's auc: 1	valid_1's auc: 0.900362
[621]	training's auc: 1	valid_1's auc: 0.90

[43]	training's auc: 0.97072	valid_1's auc: 0.777778
[44]	training's auc: 0.971462	valid_1's auc: 0.778119
[45]	training's auc: 0.972947	valid_1's auc: 0.777607
[46]	training's auc: 0.973365	valid_1's auc: 0.779826
[47]	training's auc: 0.973912	valid_1's auc: 0.778119
[48]	training's auc: 0.9751	valid_1's auc: 0.778631
[49]	training's auc: 0.975202	valid_1's auc: 0.778973
[50]	training's auc: 0.975945	valid_1's auc: 0.776242
[51]	training's auc: 0.976613	valid_1's auc: 0.777607
[52]	training's auc: 0.976947	valid_1's auc: 0.775559
[53]	training's auc: 0.97742	valid_1's auc: 0.773682
[54]	training's auc: 0.978135	valid_1's auc: 0.774535
[55]	training's auc: 0.978348	valid_1's auc: 0.774364
[56]	training's auc: 0.978924	valid_1's auc: 0.771975
[57]	training's auc: 0.979471	valid_1's auc: 0.774023
[58]	training's auc: 0.980093	valid_1's auc: 0.774023
[59]	training's auc: 0.980344	valid_1's auc: 0.770268
[60]	training's auc: 0.98091	valid_1's auc: 0.771121
[61]	training's auc: 0.981494	val

[195]	training's auc: 1	valid_1's auc: 0.783922
[196]	training's auc: 1	valid_1's auc: 0.783581
[197]	training's auc: 1	valid_1's auc: 0.783751
[198]	training's auc: 1	valid_1's auc: 0.784434
[199]	training's auc: 1	valid_1's auc: 0.785117
[200]	training's auc: 1	valid_1's auc: 0.786482
[201]	training's auc: 1	valid_1's auc: 0.785288
[202]	training's auc: 1	valid_1's auc: 0.786312
[203]	training's auc: 1	valid_1's auc: 0.786141
[204]	training's auc: 1	valid_1's auc: 0.786312
[205]	training's auc: 1	valid_1's auc: 0.7858
[206]	training's auc: 1	valid_1's auc: 0.785458
[207]	training's auc: 1	valid_1's auc: 0.785458
[208]	training's auc: 1	valid_1's auc: 0.786141
[209]	training's auc: 1	valid_1's auc: 0.786482
[210]	training's auc: 1	valid_1's auc: 0.787336
[211]	training's auc: 1	valid_1's auc: 0.786312
[212]	training's auc: 1	valid_1's auc: 0.787848
[213]	training's auc: 1	valid_1's auc: 0.787165
[214]	training's auc: 1	valid_1's auc: 0.788701
[215]	training's auc: 1	valid_1's auc: 0.7

[43]	training's auc: 0.966447	valid_1's auc: 0.778421
[44]	training's auc: 0.966853	valid_1's auc: 0.778772
[45]	training's auc: 0.967149	valid_1's auc: 0.776316
[46]	training's auc: 0.967508	valid_1's auc: 0.781404
[47]	training's auc: 0.968284	valid_1's auc: 0.781228
[48]	training's auc: 0.969318	valid_1's auc: 0.782456
[49]	training's auc: 0.970287	valid_1's auc: 0.780877
[50]	training's auc: 0.971071	valid_1's auc: 0.780702
[51]	training's auc: 0.971755	valid_1's auc: 0.780526
[52]	training's auc: 0.972364	valid_1's auc: 0.781754
[53]	training's auc: 0.973047	valid_1's auc: 0.782632
[54]	training's auc: 0.973351	valid_1's auc: 0.782456
[55]	training's auc: 0.973998	valid_1's auc: 0.782456
[56]	training's auc: 0.974653	valid_1's auc: 0.782982
[57]	training's auc: 0.975041	valid_1's auc: 0.784386
[58]	training's auc: 0.97553	valid_1's auc: 0.781228
[59]	training's auc: 0.976822	valid_1's auc: 0.779298
[60]	training's auc: 0.977376	valid_1's auc: 0.776491
[61]	training's auc: 0.978004

[193]	training's auc: 0.999972	valid_1's auc: 0.793333
[194]	training's auc: 0.999982	valid_1's auc: 0.792982
[195]	training's auc: 0.999982	valid_1's auc: 0.794211
[196]	training's auc: 0.999982	valid_1's auc: 0.794737
[197]	training's auc: 0.999972	valid_1's auc: 0.794386
[198]	training's auc: 0.999982	valid_1's auc: 0.792632
[199]	training's auc: 0.999982	valid_1's auc: 0.792807
[200]	training's auc: 0.999982	valid_1's auc: 0.792632
[201]	training's auc: 0.999982	valid_1's auc: 0.793684
[202]	training's auc: 0.999982	valid_1's auc: 0.794561
[203]	training's auc: 0.999982	valid_1's auc: 0.795439
[204]	training's auc: 0.999991	valid_1's auc: 0.796316
[205]	training's auc: 0.999991	valid_1's auc: 0.796491
[206]	training's auc: 0.999991	valid_1's auc: 0.796491
[207]	training's auc: 0.999991	valid_1's auc: 0.796491
[208]	training's auc: 0.999991	valid_1's auc: 0.796316
[209]	training's auc: 0.999991	valid_1's auc: 0.795614
[210]	training's auc: 0.999991	valid_1's auc: 0.79614
[211]	train

[361]	training's auc: 1	valid_1's auc: 0.800351
[362]	training's auc: 1	valid_1's auc: 0.800175
[363]	training's auc: 1	valid_1's auc: 0.799474
[364]	training's auc: 1	valid_1's auc: 0.8
[365]	training's auc: 1	valid_1's auc: 0.799649
[366]	training's auc: 1	valid_1's auc: 0.799825
[367]	training's auc: 1	valid_1's auc: 0.799825
[368]	training's auc: 1	valid_1's auc: 0.800877
[369]	training's auc: 1	valid_1's auc: 0.801053
[370]	training's auc: 1	valid_1's auc: 0.799474
[371]	training's auc: 1	valid_1's auc: 0.800175
[372]	training's auc: 1	valid_1's auc: 0.800702
[373]	training's auc: 1	valid_1's auc: 0.799474
[374]	training's auc: 1	valid_1's auc: 0.798947
[375]	training's auc: 1	valid_1's auc: 0.799474
[376]	training's auc: 1	valid_1's auc: 0.79807
[377]	training's auc: 1	valid_1's auc: 0.798421
[378]	training's auc: 1	valid_1's auc: 0.798246
[379]	training's auc: 1	valid_1's auc: 0.798947
[380]	training's auc: 1	valid_1's auc: 0.797719
[381]	training's auc: 1	valid_1's auc: 0.79736

[86]	training's auc: 0.992586	valid_1's auc: 0.789855
[87]	training's auc: 0.992786	valid_1's auc: 0.791214
[88]	training's auc: 0.993072	valid_1's auc: 0.791516
[89]	training's auc: 0.993397	valid_1's auc: 0.791818
[90]	training's auc: 0.993645	valid_1's auc: 0.791063
[91]	training's auc: 0.993683	valid_1's auc: 0.790912
[92]	training's auc: 0.993893	valid_1's auc: 0.790761
[93]	training's auc: 0.994227	valid_1's auc: 0.791063
[94]	training's auc: 0.994351	valid_1's auc: 0.793176
[95]	training's auc: 0.994742	valid_1's auc: 0.794535
[96]	training's auc: 0.995057	valid_1's auc: 0.794686
[97]	training's auc: 0.995095	valid_1's auc: 0.794988
[98]	training's auc: 0.995257	valid_1's auc: 0.795441
[99]	training's auc: 0.995572	valid_1's auc: 0.795592
[100]	training's auc: 0.995754	valid_1's auc: 0.794837
[101]	training's auc: 0.99603	valid_1's auc: 0.795139
[102]	training's auc: 0.996183	valid_1's auc: 0.795139
[103]	training's auc: 0.996355	valid_1's auc: 0.793478
[104]	training's auc: 0.9

In [256]:
res[['ID', 'Label']].to_csv('./result/lgb_baseline.csv', index=False)

In [224]:
# 需要拿来融合的概率文件

In [253]:
res[['ID', 'pred']].to_csv('./result/lgb_proba.csv', index=False)

In [209]:
from sklearn.ensemble import GradientBoostingClassifier

In [210]:
model = GradientBoostingClassifier()
testX = test_x.values
trainX = train_x.values
trainY = train_y.values
model.fit(trainX, trainY)
predictY = model.predict_proba(testX)[:, 1]
rs = res[['ID']]
rs['pred'] = predictY
rs[['ID', 'pred']].to_csv('./result/gbdt_proba.csv', index=False)

In [212]:
rs.loc[rs['pred'] >= 0.5, 'pred'] = 1
rs.loc[rs['pred'] < 0.5, 'pred'] = 0
rs.columns = ['ID', 'Label']
# rs['Label'] = res['Label'].astype(int)
rs

Unnamed: 0,ID,Label
1100,0,0.0
1101,0,0.0
1102,0,0.0
1103,0,0.0
1104,0,0.0
1105,0,0.0
1106,0,0.0
1107,0,0.0
1108,0,0.0
1109,0,0.0
