In [29]:
# -*- coding:utf-8 -*-
'''
@Author:

@Date: 10/17

@Description:

'''

import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score


def lgb_f1_score_sk(y_hat, y_true):
    y_true = np.round(y_true)
    y_hat = np.round(y_hat)  # scikits f1 doesn't like probabilities
    return 'f1', f1_score(y_true, y_hat), True


'''
Initial Configuration
'''
pd.set_option('display.expand_frame_repr', False)

'''
Data reading.
'''
train_data = pd.read_table('../data/oppo_round1_train_20180929.txt',
                           names=['prefix', 'query_prediction', 'title', 'tag', 'label'],
                           header=None, encoding='utf-8').astype(str)
val_data = pd.read_table('../data/oppo_round1_vali_20180929.txt',
                         names=['prefix', 'query_prediction', 'title', 'tag', 'label'],
                         header=None, encoding='utf-8').astype(str)
test_data = pd.read_table('../data/oppo_round1_test_A_20180929.txt',
                          names=['prefix', 'query_prediction', 'title', 'tag'],
                          header=None, encoding='utf-8').astype(str)

  interactivity=interactivity, compiler=compiler, result=result)


In [30]:
new_train_data = pd.read_csv('../data/train_31_3.csv')
new_val_data = pd.read_csv('../data/val_31_3.csv')
new_test_data = pd.read_csv('../data/test_31_3.csv')
train_data_dict_df = pd.read_csv('../data/train_vec_4.csv')
val_data_dict_df = pd.read_csv('../data/val_vec_4.csv')
test_data_dict_df = pd.read_csv('../data/test_vec_4.csv')

In [31]:
'''
Data preprocessing (clearing)
'''

print(np.where(train_data['label'].values == '音乐'))
train_data = train_data[train_data['label'] != '音乐']
# train_data = train_data.reset_index(drop=True)
train_data['label'] = train_data['label'].apply(lambda x: int(x))
val_data['label'] = val_data['label'].apply(lambda x: int(x))


(array([1815101], dtype=int64),)


In [32]:
'''
Feature Enginnering
'''

items = ['prefix', 'title', 'tag']
temp = train_data.groupby(items, as_index=False)['label'].agg(
    {'_'.join(items) + '_click': 'sum', '_'.join(items) + '_count': 'count'})
temp['_'.join(items) + '_ctr'] = temp['_'.join(items) + '_click'] / (temp['_'.join(items) + '_count'])
train_data = pd.merge(train_data, temp, on=items, how='left')
val_data = pd.merge(val_data, temp, on=items, how='left')
test_data = pd.merge(test_data, temp, on=items, how='left')

for item in items:
    temp = train_data.groupby(item, as_index=False)['label'].agg({item + '_click': 'sum', item + '_count': 'count'})
    temp[item + '_ctr'] = temp[item + '_click'] / (temp[item + '_count'])
    train_data = pd.merge(train_data, temp, on=item, how='left')
    val_data = pd.merge(val_data, temp, on=item, how='left')
    test_data = pd.merge(test_data, temp, on=item, how='left')

for i in range(len(items)):
    for j in range(i + 1, len(items)):
        item_g = [items[i], items[j]]
        temp = train_data.groupby(item_g, as_index=False)['label'].agg(
            {'_'.join(item_g) + '_click': 'sum', '_'.join(item_g) + '_count': 'count'})
        temp['_'.join(item_g) + '_ctr'] = temp['_'.join(item_g) + '_click'] / (temp['_'.join(item_g) + '_count'])
        train_data = pd.merge(train_data, temp, on=item_g, how='left')
        val_data = pd.merge(val_data, temp, on=item_g, how='left')
        test_data = pd.merge(test_data, temp, on=item_g, how='left')


In [33]:
# semantic feature - 31_3

train_data = pd.concat([train_data, new_train_data], axis=1)
val_data = pd.concat([val_data, new_val_data], axis=1)
test_data = pd.concat([test_data, new_test_data], axis=1)

# semantic feature - 4

train_data = pd.concat([train_data, train_data_dict_df], axis=1)
val_data = pd.concat([val_data, val_data_dict_df], axis=1)
test_data = pd.concat([test_data, test_data_dict_df], axis=1)

In [34]:
train_data.loc[1815100:1815104, ['prefix', 'ws_similarity']]

Unnamed: 0,prefix,ws_similarity
1815100,习仲勋,0.045656
1815101,田螺,0.39833
1815102,经期,0.101104
1815103,花样多,0.945661
1815104,本命年,0.918332


In [None]:
# one-hot of 'tag'
train_data = pd.get_dummies(train_data, columns=['tag']).drop(['tag_推广'], axis=1)
val_data = pd.get_dummies(val_data, columns=['tag']).drop(['tag_推广'], axis=1)
test_data = pd.get_dummies(test_data, columns=['tag'])

# drop useless feature

train_data_ = train_data.drop(['prefix', 'query_prediction', 'title'], axis=1)
val_data_ = val_data.drop(['prefix', 'query_prediction', 'title'], axis=1)
test_data_ = test_data.drop(['prefix', 'query_prediction', 'title'], axis=1)


In [None]:
'''
Training
'''
print('Feature: ', train_data_.columns.values)
print('- Nan Check! -')
print('train_data_:\n', train_data_.isna().sum(axis=0))
print('val_data_:\n', val_data_.isna().sum(axis=0))
print('test_data_:\n', test_data_.isna().sum(axis=0))

# Label Split
X_train_data_ = np.array(train_data_.drop(['label'], axis=1))
y_train_data_ = np.array(train_data_['label'])
X_val_data_ = np.array(val_data_.drop(['label'], axis=1))
y_val_data_ = np.array(val_data_['label'])
X_test_data = test_data_

# Data inspecting
print('train beginning')
print('================================')

print('-Training- : ')
print(X_train_data_.shape)
print(y_train_data_.shape)

print('-Training- : ')
print(X_val_data_.shape)
print(y_val_data_.shape)

print('-Testing- : ')
print(X_test_data.shape)
print('================================')


In [None]:


# Algorithm: LightGBM
N = 5
skf = StratifiedKFold(n_splits=N, random_state=42, shuffle=True)
xx_f1 = []
xx_submit = []
valid_f1 = []
LGBM_classify = lgb.LGBMClassifier(boosting_type='gbdt', objective='huber', num_leaves=32,
                                   learning_rate=0.05, subsample_freq=5, n_estimators=5000, silent=True)

for k, (train_loc, test_loc) in enumerate(skf.split(X_val_data_, y_val_data_)):
    print('train _K_ flod', k)
    X_train_combine = np.vstack([X_train_data_, X_val_data_[train_loc]])
    Y_train_combine = np.hstack([y_train_data_, y_val_data_[train_loc]])

    LGBM_classify.fit(X_train_combine, Y_train_combine,
                      eval_set=(X_val_data_[test_loc], y_val_data_[test_loc]),
                      early_stopping_rounds=200, eval_sample_weight=None, eval_metric=lgb_f1_score_sk)

    xx_f1.append(LGBM_classify._best_score['valid_0']['f1'])
    xx_submit.append(LGBM_classify.predict_proba(X_test_data, num_iteration=LGBM_classify.best_iteration_))
    valid_f1.append(f1_score(y_val_data_, LGBM_classify.predict(X_val_data_)))
print('\n\n- cross validation score (f1) -:', xx_f1, '. Mean: ', np.mean(xx_f1))
print('- whole validation score (f1) -:', valid_f1, '. Mean: ', np.mean(valid_f1))

Feature:  ['label' 'prefix_title_tag_click' 'prefix_title_tag_count'
 'prefix_title_tag_ctr' 'prefix_click' 'prefix_count' 'prefix_ctr'
 'title_click' 'title_count' 'title_ctr' 'tag_click' 'tag_count' 'tag_ctr'
 'prefix_title_click' 'prefix_title_count' 'prefix_title_ctr'
 'prefix_tag_click' 'prefix_tag_count' 'prefix_tag_ctr' 'title_tag_click'
 'title_tag_count' 'title_tag_ctr' 'new_tag0' 'new_tag1' 'new_tag2'
 'new_tag3' 'new_tag4' 'new_tag5' 'new_tag6' 'new_tag7' 'new_tag8'
 'new_tag9' 'new_tag10' 'new_tag11' 'new_tag12' 'new_tag13' 'new_tag14'
 'new_tag15' 'new_tag16' 'new_tag17' 'new_tag18' 'new_tag19' 'new_tag20'
 'new_tag21' 'new_tag22' 'new_tag23' 'new_tag24' 'new_tag25' 'new_tag26'
 'new_tag27' 'new_tag28' 'new_tag29' 'new_tag30' 'new_tag31' 'new_tag32'
 'new_tag33' 'ws_similarity' 'maximum_similarity' 'median_similarity'
 'mean_similarity' 'tag_健康' 'tag_商品' 'tag_应用' 'tag_影视' 'tag_快应用' 'tag_旅游'
 'tag_景点' 'tag_歌手' 'tag_汽车' 'tag_游戏' 'tag_火车' 'tag_百科' 'tag_知道' 'tag_经验'
 'tag_网站' 

  'recall', 'true', average, warn_for)


[1]	valid_0's huber: 0.11204	valid_0's f1: 0
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's huber: 0.107792	valid_0's f1: 0
[3]	valid_0's huber: 0.103973	valid_0's f1: 0
[4]	valid_0's huber: 0.100476	valid_0's f1: 0
[5]	valid_0's huber: 0.0973341	valid_0's f1: 0.377246
[6]	valid_0's huber: 0.0945062	valid_0's f1: 0.479381
[7]	valid_0's huber: 0.0918775	valid_0's f1: 0.539474
[8]	valid_0's huber: 0.089553	valid_0's f1: 0.578431
[9]	valid_0's huber: 0.0874697	valid_0's f1: 0.62094
[10]	valid_0's huber: 0.0856297	valid_0's f1: 0.630698
[11]	valid_0's huber: 0.0839482	valid_0's f1: 0.636471
[12]	valid_0's huber: 0.0823581	valid_0's f1: 0.656178
[13]	valid_0's huber: 0.0809958	valid_0's f1: 0.671707
[14]	valid_0's huber: 0.0797724	valid_0's f1: 0.677594
[15]	valid_0's huber: 0.0786146	valid_0's f1: 0.680279
[16]	valid_0's huber: 0.0775237	valid_0's f1: 0.685293
[17]	valid_0's huber: 0.0765644	valid_0's f1: 0.688407
[18]	valid_0's huber: 0.0756765	valid_0's f1: 

[150]	valid_0's huber: 0.0675592	valid_0's f1: 0.730017
[151]	valid_0's huber: 0.0675591	valid_0's f1: 0.730017
[152]	valid_0's huber: 0.0675601	valid_0's f1: 0.730122
[153]	valid_0's huber: 0.0675594	valid_0's f1: 0.730122
[154]	valid_0's huber: 0.0675635	valid_0's f1: 0.729835
[155]	valid_0's huber: 0.0675636	valid_0's f1: 0.729835
[156]	valid_0's huber: 0.0675647	valid_0's f1: 0.729652
[157]	valid_0's huber: 0.0675632	valid_0's f1: 0.729547
[158]	valid_0's huber: 0.0675517	valid_0's f1: 0.729547
[159]	valid_0's huber: 0.0675452	valid_0's f1: 0.72973
[160]	valid_0's huber: 0.0675352	valid_0's f1: 0.72973
[161]	valid_0's huber: 0.0675398	valid_0's f1: 0.729703
[162]	valid_0's huber: 0.0675365	valid_0's f1: 0.729912
[163]	valid_0's huber: 0.067533	valid_0's f1: 0.730095
[164]	valid_0's huber: 0.0675258	valid_0's f1: 0.7302
[165]	valid_0's huber: 0.0675297	valid_0's f1: 0.7302
[166]	valid_0's huber: 0.0675202	valid_0's f1: 0.7302
[167]	valid_0's huber: 0.0675238	valid_0's f1: 0.7302
[16

[299]	valid_0's huber: 0.0673514	valid_0's f1: 0.732752
[300]	valid_0's huber: 0.0673463	valid_0's f1: 0.73301
[301]	valid_0's huber: 0.0673462	valid_0's f1: 0.733295
[302]	valid_0's huber: 0.0673424	valid_0's f1: 0.733295
[303]	valid_0's huber: 0.0673431	valid_0's f1: 0.733476
[304]	valid_0's huber: 0.0673411	valid_0's f1: 0.733476
[305]	valid_0's huber: 0.067348	valid_0's f1: 0.733476
[306]	valid_0's huber: 0.0673477	valid_0's f1: 0.733114
[307]	valid_0's huber: 0.0673416	valid_0's f1: 0.733114
[308]	valid_0's huber: 0.0673491	valid_0's f1: 0.733114
[309]	valid_0's huber: 0.0673503	valid_0's f1: 0.733114
[310]	valid_0's huber: 0.067349	valid_0's f1: 0.73301
[311]	valid_0's huber: 0.067347	valid_0's f1: 0.733086
[312]	valid_0's huber: 0.067348	valid_0's f1: 0.732571
[313]	valid_0's huber: 0.067352	valid_0's f1: 0.732571
[314]	valid_0's huber: 0.0673532	valid_0's f1: 0.732467
[315]	valid_0's huber: 0.067356	valid_0's f1: 0.732752
[316]	valid_0's huber: 0.0673543	valid_0's f1: 0.732752


[448]	valid_0's huber: 0.0674092	valid_0's f1: 0.734008
[449]	valid_0's huber: 0.067414	valid_0's f1: 0.734188
[450]	valid_0's huber: 0.0674146	valid_0's f1: 0.734188
[451]	valid_0's huber: 0.0674171	valid_0's f1: 0.734188
[452]	valid_0's huber: 0.0674169	valid_0's f1: 0.734188
[453]	valid_0's huber: 0.0674211	valid_0's f1: 0.734008
[454]	valid_0's huber: 0.0674233	valid_0's f1: 0.734008
[455]	valid_0's huber: 0.0674215	valid_0's f1: 0.734188
[456]	valid_0's huber: 0.0674201	valid_0's f1: 0.734008
[457]	valid_0's huber: 0.0674284	valid_0's f1: 0.734083
[458]	valid_0's huber: 0.0674299	valid_0's f1: 0.734264
[459]	valid_0's huber: 0.06743	valid_0's f1: 0.734264
[460]	valid_0's huber: 0.0674301	valid_0's f1: 0.734264
[461]	valid_0's huber: 0.0674295	valid_0's f1: 0.734159
[462]	valid_0's huber: 0.0674306	valid_0's f1: 0.734159
[463]	valid_0's huber: 0.0674304	valid_0's f1: 0.734159
[464]	valid_0's huber: 0.0674304	valid_0's f1: 0.734159
[465]	valid_0's huber: 0.0674357	valid_0's f1: 0.73

  if diff:


train _K_ flod 1


  'recall', 'true', average, warn_for)


[1]	valid_0's huber: 0.111899	valid_0's f1: 0
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's huber: 0.10754	valid_0's f1: 0
[3]	valid_0's huber: 0.103579	valid_0's f1: 0
[4]	valid_0's huber: 0.100052	valid_0's f1: 0
[5]	valid_0's huber: 0.0968452	valid_0's f1: 0.39524
[6]	valid_0's huber: 0.0939284	valid_0's f1: 0.499116
[7]	valid_0's huber: 0.0913473	valid_0's f1: 0.562708
[8]	valid_0's huber: 0.0888945	valid_0's f1: 0.596095
[9]	valid_0's huber: 0.0867174	valid_0's f1: 0.629291
[10]	valid_0's huber: 0.084799	valid_0's f1: 0.644539
[11]	valid_0's huber: 0.0830153	valid_0's f1: 0.656637
[12]	valid_0's huber: 0.0813498	valid_0's f1: 0.668802
[13]	valid_0's huber: 0.0799381	valid_0's f1: 0.677746
[14]	valid_0's huber: 0.0786673	valid_0's f1: 0.680113
[15]	valid_0's huber: 0.0775314	valid_0's f1: 0.682522
[16]	valid_0's huber: 0.0765027	valid_0's f1: 0.682088
[17]	valid_0's huber: 0.0755685	valid_0's f1: 0.686414
[18]	valid_0's huber: 0.0747498	valid_0's f1: 

[151]	valid_0's huber: 0.0661131	valid_0's f1: 0.730255
[152]	valid_0's huber: 0.0661053	valid_0's f1: 0.73036
[153]	valid_0's huber: 0.0661053	valid_0's f1: 0.73015
[154]	valid_0's huber: 0.066099	valid_0's f1: 0.730095
[155]	valid_0's huber: 0.066099	valid_0's f1: 0.730537
[156]	valid_0's huber: 0.0660993	valid_0's f1: 0.730615
[157]	valid_0's huber: 0.0660896	valid_0's f1: 0.730432
[158]	valid_0's huber: 0.0660899	valid_0's f1: 0.73051
[159]	valid_0's huber: 0.0660905	valid_0's f1: 0.73051
[160]	valid_0's huber: 0.0661013	valid_0's f1: 0.730118
[161]	valid_0's huber: 0.0660959	valid_0's f1: 0.730118
[162]	valid_0's huber: 0.066089	valid_0's f1: 0.730791
[163]	valid_0's huber: 0.0660852	valid_0's f1: 0.730868
[164]	valid_0's huber: 0.0660775	valid_0's f1: 0.7317
[165]	valid_0's huber: 0.0660647	valid_0's f1: 0.732168
[166]	valid_0's huber: 0.0660647	valid_0's f1: 0.731805
[167]	valid_0's huber: 0.066071	valid_0's f1: 0.7317
[168]	valid_0's huber: 0.0660688	valid_0's f1: 0.731938
[169

[299]	valid_0's huber: 0.0659994	valid_0's f1: 0.734833
[300]	valid_0's huber: 0.0660001	valid_0's f1: 0.735043
[301]	valid_0's huber: 0.0660056	valid_0's f1: 0.735147
[302]	valid_0's huber: 0.0660053	valid_0's f1: 0.734967
[303]	valid_0's huber: 0.0660052	valid_0's f1: 0.734967
[304]	valid_0's huber: 0.0660049	valid_0's f1: 0.734938
[305]	valid_0's huber: 0.0660044	valid_0's f1: 0.734938
[306]	valid_0's huber: 0.0660057	valid_0's f1: 0.734938
[307]	valid_0's huber: 0.0660034	valid_0's f1: 0.735014
[308]	valid_0's huber: 0.0660087	valid_0's f1: 0.734833
[309]	valid_0's huber: 0.0660134	valid_0's f1: 0.734938
[310]	valid_0's huber: 0.0660184	valid_0's f1: 0.734938
[311]	valid_0's huber: 0.0660127	valid_0's f1: 0.734729
[312]	valid_0's huber: 0.0660117	valid_0's f1: 0.734729
[313]	valid_0's huber: 0.0660126	valid_0's f1: 0.734804
[314]	valid_0's huber: 0.0660141	valid_0's f1: 0.734804
[315]	valid_0's huber: 0.0660137	valid_0's f1: 0.734804
[316]	valid_0's huber: 0.0660167	valid_0's f1: 0

  if diff:


train _K_ flod 2


  'recall', 'true', average, warn_for)


[1]	valid_0's huber: 0.111995	valid_0's f1: 0
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's huber: 0.107718	valid_0's f1: 0
[3]	valid_0's huber: 0.10382	valid_0's f1: 0
[4]	valid_0's huber: 0.100314	valid_0's f1: 0
[5]	valid_0's huber: 0.0971588	valid_0's f1: 0.372612
[6]	valid_0's huber: 0.0942814	valid_0's f1: 0.474051
[7]	valid_0's huber: 0.0917238	valid_0's f1: 0.562901
[8]	valid_0's huber: 0.0893999	valid_0's f1: 0.577256
[9]	valid_0's huber: 0.087301	valid_0's f1: 0.612713
[10]	valid_0's huber: 0.085404	valid_0's f1: 0.619335
[11]	valid_0's huber: 0.0837034	valid_0's f1: 0.627665
[12]	valid_0's huber: 0.0821585	valid_0's f1: 0.666558
[13]	valid_0's huber: 0.080798	valid_0's f1: 0.683036
[14]	valid_0's huber: 0.0794965	valid_0's f1: 0.686383
[15]	valid_0's huber: 0.0783768	valid_0's f1: 0.691489
[16]	valid_0's huber: 0.0773373	valid_0's f1: 0.692764
[17]	valid_0's huber: 0.0763992	valid_0's f1: 0.69615
[18]	valid_0's huber: 0.0755767	valid_0's f1: 0.

[150]	valid_0's huber: 0.0672706	valid_0's f1: 0.730218
[151]	valid_0's huber: 0.0672743	valid_0's f1: 0.730085
[152]	valid_0's huber: 0.0672737	valid_0's f1: 0.730007
[153]	valid_0's huber: 0.0672755	valid_0's f1: 0.730007
[154]	valid_0's huber: 0.067261	valid_0's f1: 0.730296
[155]	valid_0's huber: 0.0672671	valid_0's f1: 0.730636
[156]	valid_0's huber: 0.0672668	valid_0's f1: 0.730819
[157]	valid_0's huber: 0.0672606	valid_0's f1: 0.730714
[158]	valid_0's huber: 0.0672605	valid_0's f1: 0.730241
[159]	valid_0's huber: 0.0672605	valid_0's f1: 0.730241
[160]	valid_0's huber: 0.0672673	valid_0's f1: 0.730241
[161]	valid_0's huber: 0.0672605	valid_0's f1: 0.730347
[162]	valid_0's huber: 0.0672547	valid_0's f1: 0.730347
[163]	valid_0's huber: 0.0672366	valid_0's f1: 0.730897
[164]	valid_0's huber: 0.0672162	valid_0's f1: 0.730791
[165]	valid_0's huber: 0.0672113	valid_0's f1: 0.730475
[166]	valid_0's huber: 0.0672083	valid_0's f1: 0.73058
[167]	valid_0's huber: 0.0672084	valid_0's f1: 0.7

[298]	valid_0's huber: 0.0672009	valid_0's f1: 0.731728
[299]	valid_0's huber: 0.0671961	valid_0's f1: 0.731805
[300]	valid_0's huber: 0.0671938	valid_0's f1: 0.731728
[301]	valid_0's huber: 0.0671964	valid_0's f1: 0.73191
[302]	valid_0's huber: 0.0671904	valid_0's f1: 0.732092
[303]	valid_0's huber: 0.0671934	valid_0's f1: 0.732273
[304]	valid_0's huber: 0.0671958	valid_0's f1: 0.732378
[305]	valid_0's huber: 0.0671932	valid_0's f1: 0.732245
[306]	valid_0's huber: 0.0671929	valid_0's f1: 0.732245
[307]	valid_0's huber: 0.0671945	valid_0's f1: 0.73235
[308]	valid_0's huber: 0.0671923	valid_0's f1: 0.73235
[309]	valid_0's huber: 0.0671868	valid_0's f1: 0.732273
[310]	valid_0's huber: 0.0671868	valid_0's f1: 0.732273
[311]	valid_0's huber: 0.0671969	valid_0's f1: 0.732015
[312]	valid_0's huber: 0.0671928	valid_0's f1: 0.732015
[313]	valid_0's huber: 0.0672038	valid_0's f1: 0.732197
[314]	valid_0's huber: 0.0672054	valid_0's f1: 0.732197
[315]	valid_0's huber: 0.0672044	valid_0's f1: 0.73

[447]	valid_0's huber: 0.0672626	valid_0's f1: 0.733809
[448]	valid_0's huber: 0.0672648	valid_0's f1: 0.733733
[449]	valid_0's huber: 0.0672609	valid_0's f1: 0.733628
[450]	valid_0's huber: 0.0672611	valid_0's f1: 0.733628
[451]	valid_0's huber: 0.0672619	valid_0's f1: 0.733628
[452]	valid_0's huber: 0.0672641	valid_0's f1: 0.733552
[453]	valid_0's huber: 0.0672695	valid_0's f1: 0.733914
[454]	valid_0's huber: 0.0672722	valid_0's f1: 0.733628
[455]	valid_0's huber: 0.0672729	valid_0's f1: 0.733733
[456]	valid_0's huber: 0.0672726	valid_0's f1: 0.733733
[457]	valid_0's huber: 0.0672719	valid_0's f1: 0.733733
[458]	valid_0's huber: 0.0672728	valid_0's f1: 0.733733
[459]	valid_0's huber: 0.0672705	valid_0's f1: 0.733733
[460]	valid_0's huber: 0.0672768	valid_0's f1: 0.733371
[461]	valid_0's huber: 0.0672786	valid_0's f1: 0.733552
[462]	valid_0's huber: 0.0672857	valid_0's f1: 0.733552
Early stopping, best iteration is:
[262]	valid_0's huber: 0.0671596	valid_0's f1: 0.732807


  if diff:


train _K_ flod 3


  'recall', 'true', average, warn_for)


[1]	valid_0's huber: 0.111867	valid_0's f1: 0
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's huber: 0.107472	valid_0's f1: 0
[3]	valid_0's huber: 0.103435	valid_0's f1: 0
[4]	valid_0's huber: 0.0998747	valid_0's f1: 0
[5]	valid_0's huber: 0.0966454	valid_0's f1: 0.390379
[6]	valid_0's huber: 0.0936909	valid_0's f1: 0.488713
[7]	valid_0's huber: 0.0910173	valid_0's f1: 0.556693
[8]	valid_0's huber: 0.0886175	valid_0's f1: 0.593233
[9]	valid_0's huber: 0.0863939	valid_0's f1: 0.626193
[10]	valid_0's huber: 0.0844704	valid_0's f1: 0.631506
[11]	valid_0's huber: 0.0826963	valid_0's f1: 0.648557
[12]	valid_0's huber: 0.0810328	valid_0's f1: 0.670033
[13]	valid_0's huber: 0.079604	valid_0's f1: 0.681512
[14]	valid_0's huber: 0.0783263	valid_0's f1: 0.685787
[15]	valid_0's huber: 0.077185	valid_0's f1: 0.688406
[16]	valid_0's huber: 0.0760484	valid_0's f1: 0.694197
[17]	valid_0's huber: 0.0750761	valid_0's f1: 0.69826
[18]	valid_0's huber: 0.0741773	valid_0's f1:

[150]	valid_0's huber: 0.0651792	valid_0's f1: 0.734664
[151]	valid_0's huber: 0.0651788	valid_0's f1: 0.734481
[152]	valid_0's huber: 0.0651661	valid_0's f1: 0.734393
[153]	valid_0's huber: 0.065147	valid_0's f1: 0.734653
[154]	valid_0's huber: 0.0651471	valid_0's f1: 0.734729
[155]	valid_0's huber: 0.0651616	valid_0's f1: 0.734623
[156]	valid_0's huber: 0.0651538	valid_0's f1: 0.734623
[157]	valid_0's huber: 0.0651544	valid_0's f1: 0.734517
[158]	valid_0's huber: 0.0651561	valid_0's f1: 0.734806
[159]	valid_0's huber: 0.0651561	valid_0's f1: 0.735018
[160]	valid_0's huber: 0.065156	valid_0's f1: 0.735095
[161]	valid_0's huber: 0.0651404	valid_0's f1: 0.735201
[162]	valid_0's huber: 0.0651355	valid_0's f1: 0.735825
[163]	valid_0's huber: 0.0651246	valid_0's f1: 0.735795
[164]	valid_0's huber: 0.0651227	valid_0's f1: 0.735871
[165]	valid_0's huber: 0.0651247	valid_0's f1: 0.735871
[166]	valid_0's huber: 0.0651209	valid_0's f1: 0.735324
[167]	valid_0's huber: 0.0651103	valid_0's f1: 0.7

[297]	valid_0's huber: 0.0649626	valid_0's f1: 0.735244
[298]	valid_0's huber: 0.0649618	valid_0's f1: 0.735244
[299]	valid_0's huber: 0.0649651	valid_0's f1: 0.735349
[300]	valid_0's huber: 0.0649715	valid_0's f1: 0.735635
[301]	valid_0's huber: 0.0649714	valid_0's f1: 0.735635
[302]	valid_0's huber: 0.0649705	valid_0's f1: 0.735635
[303]	valid_0's huber: 0.0649643	valid_0's f1: 0.735635
[304]	valid_0's huber: 0.0649673	valid_0's f1: 0.735635
[305]	valid_0's huber: 0.0649722	valid_0's f1: 0.735244
[306]	valid_0's huber: 0.0649622	valid_0's f1: 0.735349
[307]	valid_0's huber: 0.0649624	valid_0's f1: 0.735817
[308]	valid_0's huber: 0.0649542	valid_0's f1: 0.73553
[309]	valid_0's huber: 0.0649449	valid_0's f1: 0.735711
[310]	valid_0's huber: 0.0649432	valid_0's f1: 0.735817
[311]	valid_0's huber: 0.0649509	valid_0's f1: 0.735711
[312]	valid_0's huber: 0.0649501	valid_0's f1: 0.735711
[313]	valid_0's huber: 0.0649482	valid_0's f1: 0.735968
[314]	valid_0's huber: 0.0649499	valid_0's f1: 0.

[444]	valid_0's huber: 0.0649779	valid_0's f1: 0.738479
[445]	valid_0's huber: 0.0649755	valid_0's f1: 0.738479
[446]	valid_0's huber: 0.0649785	valid_0's f1: 0.738404
[447]	valid_0's huber: 0.0649781	valid_0's f1: 0.738404
[448]	valid_0's huber: 0.0649776	valid_0's f1: 0.738404
[449]	valid_0's huber: 0.0649649	valid_0's f1: 0.738224
[450]	valid_0's huber: 0.0649606	valid_0's f1: 0.738224
[451]	valid_0's huber: 0.0649644	valid_0's f1: 0.738224
[452]	valid_0's huber: 0.0649644	valid_0's f1: 0.738404
[453]	valid_0's huber: 0.0649696	valid_0's f1: 0.738299
[454]	valid_0's huber: 0.0649749	valid_0's f1: 0.738119
[455]	valid_0's huber: 0.0649661	valid_0's f1: 0.737881
[456]	valid_0's huber: 0.0649618	valid_0's f1: 0.738167
[457]	valid_0's huber: 0.0649687	valid_0's f1: 0.738242
[458]	valid_0's huber: 0.0649761	valid_0's f1: 0.738061
[459]	valid_0's huber: 0.0649759	valid_0's f1: 0.738061
[460]	valid_0's huber: 0.0649776	valid_0's f1: 0.73782
[461]	valid_0's huber: 0.0649761	valid_0's f1: 0.

  if diff:


train _K_ flod 4


  'recall', 'true', average, warn_for)


[1]	valid_0's huber: 0.112025	valid_0's f1: 0
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's huber: 0.107746	valid_0's f1: 0
[3]	valid_0's huber: 0.103865	valid_0's f1: 0
[4]	valid_0's huber: 0.100426	valid_0's f1: 0
[5]	valid_0's huber: 0.0972539	valid_0's f1: 0.366969
[6]	valid_0's huber: 0.094418	valid_0's f1: 0.468869
[7]	valid_0's huber: 0.0918248	valid_0's f1: 0.552367
[8]	valid_0's huber: 0.0894957	valid_0's f1: 0.573511
[9]	valid_0's huber: 0.0874284	valid_0's f1: 0.607074
[10]	valid_0's huber: 0.0855035	valid_0's f1: 0.620809
[11]	valid_0's huber: 0.0838001	valid_0's f1: 0.629098
[12]	valid_0's huber: 0.0822757	valid_0's f1: 0.653452
[13]	valid_0's huber: 0.0808334	valid_0's f1: 0.666667
[14]	valid_0's huber: 0.0795893	valid_0's f1: 0.670651
[15]	valid_0's huber: 0.0784767	valid_0's f1: 0.673959
[16]	valid_0's huber: 0.0774156	valid_0's f1: 0.676245
[17]	valid_0's huber: 0.0764871	valid_0's f1: 0.682386
[18]	valid_0's huber: 0.0756026	valid_0's f1

[151]	valid_0's huber: 0.0673472	valid_0's f1: 0.715085
[152]	valid_0's huber: 0.0673537	valid_0's f1: 0.715085
[153]	valid_0's huber: 0.0673537	valid_0's f1: 0.715064
[154]	valid_0's huber: 0.0673467	valid_0's f1: 0.715275
[155]	valid_0's huber: 0.0673491	valid_0's f1: 0.715464
[156]	valid_0's huber: 0.0673449	valid_0's f1: 0.715548
[157]	valid_0's huber: 0.067331	valid_0's f1: 0.716304
[158]	valid_0's huber: 0.0673237	valid_0's f1: 0.717081
[159]	valid_0's huber: 0.0673262	valid_0's f1: 0.716976
[160]	valid_0's huber: 0.067325	valid_0's f1: 0.716976
[161]	valid_0's huber: 0.0673245	valid_0's f1: 0.717059
[162]	valid_0's huber: 0.067328	valid_0's f1: 0.717059
[163]	valid_0's huber: 0.0673311	valid_0's f1: 0.71687
[164]	valid_0's huber: 0.0673277	valid_0's f1: 0.716953
[165]	valid_0's huber: 0.0673324	valid_0's f1: 0.716826
[166]	valid_0's huber: 0.0673309	valid_0's f1: 0.71712
[167]	valid_0's huber: 0.0673363	valid_0's f1: 0.717014
[168]	valid_0's huber: 0.0673333	valid_0's f1: 0.7173

[300]	valid_0's huber: 0.0673222	valid_0's f1: 0.718568
[301]	valid_0's huber: 0.0673217	valid_0's f1: 0.718568
[302]	valid_0's huber: 0.0673113	valid_0's f1: 0.718568
[303]	valid_0's huber: 0.0673201	valid_0's f1: 0.718673
[304]	valid_0's huber: 0.0673229	valid_0's f1: 0.718673
[305]	valid_0's huber: 0.067325	valid_0's f1: 0.718673
[306]	valid_0's huber: 0.0673286	valid_0's f1: 0.718859
[307]	valid_0's huber: 0.0673249	valid_0's f1: 0.71915
[308]	valid_0's huber: 0.0673329	valid_0's f1: 0.718941
[309]	valid_0's huber: 0.0673324	valid_0's f1: 0.718836
[310]	valid_0's huber: 0.0673311	valid_0's f1: 0.719023
[311]	valid_0's huber: 0.0673312	valid_0's f1: 0.719023
[312]	valid_0's huber: 0.0673238	valid_0's f1: 0.71865
[313]	valid_0's huber: 0.0673208	valid_0's f1: 0.718732
[314]	valid_0's huber: 0.067316	valid_0's f1: 0.718545
[315]	valid_0's huber: 0.0673141	valid_0's f1: 0.718918
[316]	valid_0's huber: 0.0673158	valid_0's f1: 0.718627
[317]	valid_0's huber: 0.0673157	valid_0's f1: 0.718

In [None]:
'''
Save result
'''
s = 0
for i in xx_submit:
    s = s + i
test_data_['pred_label'] = list(s[:, 1] / N)  # 二元分类中，概率分布对应了- 0，1 -
test_data_['pred_label'] = test_data_['pred_label'].apply(lambda x: round(x))
test_data_['pred_label'].to_csv('../data/Result_LLC_1016_pre ' + str(np.mean(xx_f1)) + '.csv', index=False)
