In [1]:
import math
import numpy as np
import pandas as pd
import warnings
import matplotlib.pyplot as plt
import lightgbm as lgb
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import f1_score, fbeta_score, precision_score, recall_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold,KFold
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
from datetime import datetime
from gensim.models.word2vec import Word2Vec
import xgboost as xgb
import json
warnings.filterwarnings('ignore')

pd.set_option('display.max_rows',50)
pd.set_option('display.max_columns',50)

In [2]:
train = pd.read_csv('./data/new_data_B/lgbtrain.csv')
test = pd.read_csv('./data/new_data_B/lgbtest.csv')
#seq_fea = ['launch_seq','playtime_seq','duration_prefer','interact_prefer']              

In [3]:
train.columns

Index(['user_id', 'end_date', 'label', 'launch_date_len', 'launch_date',
       'launch_type', 'launch_times', 'launch_type_0', 'launch_type_1',
       'launch_type_01rate',
       ...
       'interact_prefer_1', 'interact_prefer_2', 'interact_prefer_3',
       'interact_prefer_4', 'interact_prefer_5', 'interact_prefer_6',
       'interact_prefer_7', 'interact_prefer_8', 'interact_prefer_9',
       'interact_prefer_10'],
      dtype='object', length=125)

In [4]:
train.head()

Unnamed: 0,user_id,end_date,label,launch_date_len,launch_date,launch_type,launch_times,launch_type_0,launch_type_1,launch_type_01rate,start_end_launch,launch_seq_31,launch_seq_15,launch_seq_7,launch_times_31,launch_times_15,launch_times_7,playtime_31,playtime_15,playtime_7,playtime_seq,duration_prefer,father_id_score,cast_id_score,tag_score,...,duration_prefer_2,duration_prefer_3,duration_prefer_4,duration_prefer_5,duration_prefer_6,duration_prefer_7,duration_prefer_8,duration_prefer_9,duration_prefer_10,duration_prefer_11,duration_prefer_12,duration_prefer_13,duration_prefer_14,duration_prefer_15,interact_prefer_0,interact_prefer_1,interact_prefer_2,interact_prefer_3,interact_prefer_4,interact_prefer_5,interact_prefer_6,interact_prefer_7,interact_prefer_8,interact_prefer_9,interact_prefer_10
0,10000000,211,2,16,"[131, 132, 141, 164, 179, 185, 187, 189, 191, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.849842,0.874528,-0.611652,-0.527202,2.331488,"[0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, ...","[1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0]",1.025232,0.331995,-0.776345,-0.346441,-0.385211,-0.373219,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1.062609,1.011145,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,10000001,173,0,3,"[114, 117, 118]","[0, 0, 0]",-0.410684,-0.372292,-0.611652,-0.527202,-0.738611,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0]",-0.777207,-0.798152,-0.776345,0.0,0.0,0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,10000002,128,1,1,[128],[0],-0.604611,-0.564111,-0.611652,-0.527202,-0.921901,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]","[0, 0, 0, 0, 0, 0, 0, 1]",-0.613349,-0.515615,-0.277768,-0.38198,-0.384736,-0.372374,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.0,-1.500864,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,10000003,199,0,2,"[144, 144]","[1, 0]",-0.507648,-0.564111,1.117465,1.164124,-0.921901,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0]",-0.777207,-0.798152,-0.776345,0.0,0.0,0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10000004,167,0,4,"[154, 155, 156, 157]","[0, 0, 0, 0]",-0.31372,-0.276383,-0.611652,-0.527202,-0.784434,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0]",-0.121774,0.331995,-0.776345,-0.309601,-0.259957,-0.373219,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
for i,j in zip(train,train.dtypes):
    if j == 'object':
        print(i)

launch_date
launch_type
launch_seq_31
launch_seq_15
launch_seq_7
playtime_seq
duration_prefer
interact_prefer


In [6]:
chuanyue_fea = ['start_end_launch','launch_type_0', 'launch_type_1', 'launch_type_01rate']
object_fea = ['launch_type','launch_seq_31','launch_seq_15','launch_seq_7','playtime_seq']

In [7]:
test['label']=-1

In [8]:
len(test)

35000

In [9]:
data = pd.concat([train,test],axis=0)
data

Unnamed: 0,user_id,end_date,label,launch_date_len,launch_date,launch_type,launch_times,launch_type_0,launch_type_1,launch_type_01rate,start_end_launch,launch_seq_31,launch_seq_15,launch_seq_7,launch_times_31,launch_times_15,launch_times_7,playtime_31,playtime_15,playtime_7,playtime_seq,duration_prefer,father_id_score,cast_id_score,tag_score,...,duration_prefer_2,duration_prefer_3,duration_prefer_4,duration_prefer_5,duration_prefer_6,duration_prefer_7,duration_prefer_8,duration_prefer_9,duration_prefer_10,duration_prefer_11,duration_prefer_12,duration_prefer_13,duration_prefer_14,duration_prefer_15,interact_prefer_0,interact_prefer_1,interact_prefer_2,interact_prefer_3,interact_prefer_4,interact_prefer_5,interact_prefer_6,interact_prefer_7,interact_prefer_8,interact_prefer_9,interact_prefer_10
0,10000000,211,2,16,"[131, 132, 141, 164, 179, 185, 187, 189, 191, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.849842,0.874528,-0.611652,-0.527202,2.331488,"[0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, ...","[1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0]",1.025232,0.331995,-0.776345,-0.346441,-0.385211,-0.373219,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1.062609,1.011145,0.0,...,0.0,0.0000,0.0000,0.0000,0.0000,0.000,0.0,0.0000,1.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,10000001,173,0,3,"[114, 117, 118]","[0, 0, 0]",-0.410684,-0.372292,-0.611652,-0.527202,-0.738611,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0]",-0.777207,-0.798152,-0.776345,0.000000,0.000000,0.000000,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.000000,0.000000,0.0,...,0.0,0.0000,0.0000,0.0000,0.0000,0.000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,10000002,128,1,1,[128],[0],-0.604611,-0.564111,-0.611652,-0.527202,-0.921901,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]","[0, 0, 0, 0, 0, 0, 0, 1]",-0.613349,-0.515615,-0.277768,-0.381980,-0.384736,-0.372374,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0.000000,-1.500864,0.0,...,0.0,0.0000,0.0000,0.0000,0.0000,0.000,0.0,0.0000,0.0,1.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,10000003,199,0,2,"[144, 144]","[1, 0]",-0.507648,-0.564111,1.117465,1.164124,-0.921901,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0]",-0.777207,-0.798152,-0.776345,0.000000,0.000000,0.000000,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.000000,0.000000,0.0,...,0.0,0.0000,0.0000,0.0000,0.0000,0.000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10000004,167,0,4,"[154, 155, 156, 157]","[0, 0, 0, 0]",-0.313720,-0.276383,-0.611652,-0.527202,-0.784434,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0]",-0.121774,0.331995,-0.776345,-0.309601,-0.259957,-0.373219,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.000000,0.000000,0.0,...,0.0,0.0000,0.0000,0.0000,0.0000,0.000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34995,10517398,216,-1,36,"[132, 133, 134, 139, 144, 145, 147, 148, 149, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",2.789112,2.792714,-0.611652,-0.527202,2.881357,"[0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...","[0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0]","[1, 0, 1, 0, 0, 1, 1, 0]",1.025232,1.179605,1.217962,0.692684,0.589955,0.217845,"[0, 0.1903, 0, 0, 0, 0.0673, 0, 0, 0, 0, 0, 0,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.75, 1.0, 1.0, 0.5,...",0.380136,-0.046569,0.0,...,0.0,0.0000,0.0000,0.7500,1.0000,1.000,0.5,0.0000,0.0,0.5000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34996,10110309,172,-1,6,"[101, 111, 136, 142, 148, 154]","[0, 0, 0, 0, 0, 0]",-0.119793,-0.084564,-0.611652,-0.527202,1.506685,"[0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0]",-0.285633,-0.798152,-0.776345,-0.381145,-0.383296,-0.369813,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.000000,0.000000,0.0,...,0.0,0.0000,0.0000,0.0000,0.0000,0.000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34997,10528272,220,-1,22,"[140, 152, 153, 154, 155, 161, 164, 165, 166, ...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1.431623,1.258165,2.846582,-0.219689,1.689975,"[0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0]",-0.613349,-0.798152,-0.776345,0.385772,0.938841,1.981268,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0.000000,0.000000,0.0,...,0.0,0.0000,0.0000,0.0000,0.0000,0.000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34998,10395221,195,-1,5,"[125, 144, 145, 146, 169]","[0, 0, 0, 0, 0]",-0.216757,-0.180474,-0.611652,-0.527202,1.094284,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0]",-0.613349,-0.798152,-0.776345,-0.381670,-0.385211,-0.373219,"[0, 0, 0, 0, 0, 0.0505, 0, 0, 0, 0, 0, 0, 0, 0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...",0.000000,0.000000,0.0,...,0.0,0.0000,0.0000,0.0000,1.0000,0.000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
len(data)

635001

In [11]:
data = data.drop(chuanyue_fea+object_fea,axis=1)

In [13]:
# def kfold_mean(df_train, df_test, target, target_mean_list):
#     folds = StratifiedKFold(n_splits=5)

#     mean_of_target = df_train[target].mean()

#     for fold_, (trn_idx, val_idx) in tqdm(enumerate(folds.split(df_train, y=df_train['label']))):
#         tr_x = df_train.iloc[trn_idx, :]
#         vl_x = df_train.iloc[val_idx, :]

#         for col in target_mean_list:
#             df_train.loc[vl_x.index, f'{col}_target_enc'] = vl_x[col].map(tr_x.groupby(col)[target].mean())

#     for col in target_mean_list:
#         df_train[f'{col}_target_enc'].fillna(mean_of_target, inplace=True)

#         df_test[f'{col}_target_enc'] = df_test[col].map(df_train.groupby(col)[f'{col}_target_enc'].mean())

#         df_test[f'{col}_target_enc'].fillna(mean_of_target, inplace=True)
#     return pd.concat([df_train, df_test], ignore_index=True)

# feature_list =  ['launch_date_len']
# data = kfold_mean(data.iloc[:600001], data[600001:],'label',feature_list)
# print(data)

In [14]:
# data['launch_date'] = data['launch_date'].apply(lambda x: eval(x))
# sentences = data['launch_date'].values.tolist()
# for i in range(len(sentences)):
#     sentences[i] = [str(x) for x in sentences[i]]   #将每个tagid转化成str格式
# print('预处理完毕')
# emb_size = 32
# model = Word2Vec(sentences, size=emb_size, sg=1, hs=1 ,seed=1, iter=3)#sentences, size=emb_size, window=3, min_count=5, sg=1, hs=1 ,seed=1, iter=3
# print("训练完毕")
# model.save('./data/w2v_model/launch_date.model')

# emb_matrix = []
# for seq in sentences:
#     vec = []
#     for w in seq:
#         if w in model.wv.vocab:
#             vec.append(model.wv[w])
#     if len(vec) > 0:
#         emb_matrix.append(np.mean(vec, axis=0))
#     else:
#         emb_matrix.append([0] * emb_size)
# emb_matrix = np.array(emb_matrix)
# for i in range(emb_size):
#     data['launch_date_emb_{}'.format(i)] = emb_matrix[:, i]
    


In [15]:
# for i,j in zip(data,data.dtypes):
#     if j == 'object':
#         print(i)

In [16]:
#特征筛选样本
# dropfea = ['duration_prefer_0', 'duration_prefer_14', 'duration_prefer_15', 'interact_prefer_5']

In [12]:
train = data.iloc[:600001]
test = data.iloc[600001:]

In [13]:
features = [i for i in train.columns if i not in ['label', 'end_date',"launch_date","interact_type","date_list",'launch_seq','playtime_seq','duration_prefer','interact_prefer']]  #将用户pid也加进去看看效果

y = train['label']
x = train[features]

new_test = test[features]
KF = StratifiedKFold(n_splits=5, random_state=2021, shuffle=True)
oof_lgb = np.zeros(len(train))
predictions_lgb = np.zeros((len(new_test)))

# 特征重要性
feat_imp_df = pd.DataFrame({'feat': features, 'imp': 0})

model = lgb.LGBMRegressor( num_leaves=32,
                           max_depth=6,
                           learning_rate=0.08,
                           n_estimators=10000,
                           subsample=0.8,
                           feature_fraction=0.8,
                           reg_alpha=0.5,
                           reg_lambda=0.5,
                           random_state=2021,
                           objective='regression',
                           metric='mse',
                           device='gpu')
# 五折交叉验证
for fold_, (trn_idx, val_idx) in enumerate(KF.split(x,y)):
    print("##########第{}折############".format(fold_+1))

    Xtrain = x.iloc[trn_idx]
    Ytrain = y.iloc[trn_idx]

    X_val = x.iloc[val_idx]
    Y_val = y.iloc[val_idx]

    model = model.fit(Xtrain,
                      Ytrain,
                      eval_metric="mse",
                      eval_set=[(X_val, Y_val)],
                      verbose=100,
                      early_stopping_rounds=100
                    )
#     # 模型存储
#     joblib.dump(gbm, 'loan_model.pkl')
#     # 模型加载
#     gbm = joblib.load('loan_model.pkl')
    
    feat_imp_df['imp'] += model.feature_importances_ / 5
    oof_lgb[val_idx] = model.predict(X_val,num_iteration=model.best_iteration_)
    predictions_lgb += model.predict(new_test,num_iteration=model.best_iteration_)

##########第1折############
[100]	valid_0's l2: 1.60455
[200]	valid_0's l2: 1.59635
[300]	valid_0's l2: 1.59286
[400]	valid_0's l2: 1.5912
[500]	valid_0's l2: 1.59039
[600]	valid_0's l2: 1.59028
##########第2折############
[100]	valid_0's l2: 1.60729
[200]	valid_0's l2: 1.59863
[300]	valid_0's l2: 1.59548
[400]	valid_0's l2: 1.59376
[500]	valid_0's l2: 1.59254
[600]	valid_0's l2: 1.59236
[700]	valid_0's l2: 1.59198
[800]	valid_0's l2: 1.59254
##########第3折############
[100]	valid_0's l2: 1.60934
[200]	valid_0's l2: 1.6008
[300]	valid_0's l2: 1.59725
[400]	valid_0's l2: 1.59559
[500]	valid_0's l2: 1.59476
[600]	valid_0's l2: 1.59472
##########第4折############
[100]	valid_0's l2: 1.61955
[200]	valid_0's l2: 1.61196
[300]	valid_0's l2: 1.60935
[400]	valid_0's l2: 1.60864
[500]	valid_0's l2: 1.60813
[600]	valid_0's l2: 1.60828
##########第5折############
[100]	valid_0's l2: 1.61106
[200]	valid_0's l2: 1.60247
[300]	valid_0's l2: 1.5992
[400]	valid_0's l2: 1.59821
[500]	valid_0's l2: 1.598
[600]	v

In [14]:
def aiyiqi_metric(y_true,y_pred):
    y_true = list(y_true)
    y_pred = list(y_pred)
    score = 0
    for i in range(len(y_true)):
        score += abs(y_true[i]-y_pred[i])/7
    return 100*(1-score/len(y_true))

In [15]:
score = aiyiqi_metric(y,oof_lgb)
print("aiqyiqi score: {}".format(score))

aiqyiqi score: 87.07917368242995


In [16]:
submit = test[['user_id']]
submit['pred'] = predictions_lgb/5
submit.columns = ['user_id', 'pred']
submit.to_csv("./data/submit/lgb_submit_B_{}.csv".format(score), index=False, header=False, float_format="%.2f")