In [5]:
import pandas as pd
import numpy as np
import os
import time
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
import tensorflow as tf
from deepctr.models import DeepFM,WDL
from tensorflow.python.keras.optimizers import Adam,Adagrad
from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names
from tensorflow.python.keras.models import save_model,load_model
from deepctr.layers import custom_objects

from evaluation_v2 import uAUC,compute_weighted_score

In [12]:
# 存储数据的根目录
ROOT_PATH = "/testcbd017_gujinfang/GJFCode/WeChat_2021/Code/data"
TEST_FILE=ROOT_PATH+'/wechat_algo_data1/test_b.csv'
DATASET_PATH=ROOT_PATH+'/wechat_algo_data1'
SUB_PATH=ROOT_PATH+'/submit'

In [3]:
# GPU相关设置
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# 设置GPU按需增长
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

Please check the latest version manually on https://pypi.org/project/deepctr/#history


In [22]:
SEED=2021
ACTION_LIST = ["read_comment", "like", "click_avatar",  "forward"]
# ACTION_LIST = ["forward"]
FEA_COLUMN_LIST = ["read_comment", "like", "click_avatar",  "forward", "comment", "follow", "favorite"]
# 负样本下采样比例(负样本:正样本)
ACTION_SAMPLE_RATE = {"read_comment": 15, "like": 15, "click_avatar": 10, "forward": 10, "comment": 10, "follow": 10, "favorite": 10}
# 各个行为构造训练数据的天数
ACTION_DAY_NUM = {"read_comment": 14, "like": 14, "click_avatar": 14, "forward": 14, "comment": 14, "follow": 14, "favorite": 14}

In [8]:
epochs=4
batch_size=512
embedding_dim=16
model_name='deepfm'
repeat=4 # 重复训练模型repeat次 预测结果取平均
LAST_DAY=14 # =15 意味着要提交 =14 意味着线下验证
START_DAY=14
SUBMIT_FLAG=False
# 提交
if(SUBMIT_FLAG):
    LAST_DAY=15 # =15 意味着要提交 =14 意味着线下验证
    START_DAY=14
else:
    LAST_DAY=14 # =15 意味着要提交 =14 意味着线下验证
    START_DAY=14


In [9]:
def mkdir(path):
    folder = os.path.exists(path)
    if not folder:
        os.makedirs(path)       
    else:
        return

In [7]:
# 读取某个action的sample_conat数据；最后一天为val，其他为train 
def get_df_data(action,day=14):
    df=pd.read_csv('{}/generater_data/{}_{}_concat_sample.csv'.format(ROOT_PATH,action,day))
#     return df,df[df['date_']<day],df[df['date_']==day]
    return pd.DataFrame(df)

In [10]:
# 采样History数据
def generate_sample(df,action,day=14):
    """
    对负样本进行下采样，生成各个阶段所需样本
    """
    df = df.drop_duplicates(subset=['userid', 'feedid', action], keep='last')
    
    # 负样本下采样
    action_df = df[(df["date_"] <= day) & (df["date_"] >= day - ACTION_DAY_NUM[action] + 1)]
    df_neg = action_df[action_df[action] == 0]
                 
    all_pos_num=len(action_df[action_df[action] == 1])
    all_neg_num=len(action_df)-all_pos_num
    
        
    sample_neg_num=min(len(df_neg),all_pos_num*ACTION_SAMPLE_RATE[action])
    if(sample_neg_num<len(df_neg)):
        df_neg=df_neg.sample(n=sample_neg_num, random_state=SEED, replace=False)
    print('-----------{}-------------'.format(action))
    print('pos num:{};neg num:{}'.format(all_pos_num,sample_neg_num))
  #每个aciton进行负采样
#   #按照停留时间进行采样
#     df_neg=df_neg.sort_values(by='play',ascending=True)
#     df_neg = df_neg[:sample_neg_num]
    
    df_all = pd.concat([df_neg,action_df[action_df[action] == 1]])  
    col = ["userid", "feedid", "date_", "device"] + ACTION_LIST
    
    return df_all[col]

# 把train拼接上 u i特征
def train_concat(sample,action):
    # 用户基本特征
    df_users=pd.read_csv(DATASET_PATH+'/user_info.csv')
    df_users = df_users.set_index('userid')
    # 用户统计特征
    df_users_static=pd.read_csv(DATASET_PATH+'/user_feature_sum_avg.csv')
    df_users_static=df_users_static.drop_duplicates(subset=['userid','date_'], keep='last')
    df_users_static=df_users_static.set_index(['userid','date_']) # 必须重新设置idx 不然join的时候报错
    # 视频特征
    df_feed=pd.read_csv(DATASET_PATH+'/feed_feature.csv')
    df_feed = df_feed.set_index('feedid')
    
    features = ["userid", "feedid", "device", "authorid", "bgm_song_id", "bgm_singer_id",\
                'watch_count_group','video_time_group','feed_cluter',\
                "videoplayseconds","watch_count","play_times",'date_','des_words','ocr_words','asr_words',\
                'manual_tag','machine_tag','manual_keywords','machine_keywords','feed_emb_id','one_tag']
    features=features+['user_'+b+'_sum_group' for b in FEA_COLUMN_LIST]+['user_'+b+'_mean_group' for b in FEA_COLUMN_LIST]
        
    sample = sample.join(df_feed, on="feedid", how="left", rsuffix="_feed")
    sample = sample.join(df_users, on=["userid"], how="left", rsuffix="_user_id")
    sample = sample.join(df_users_static, on=["userid", "date_"], how="left", rsuffix="_user_static")
        
    # 把各种统计信息更新到features中
    user_feature_col = [b+"_sum" for b in FEA_COLUMN_LIST]+[b+"_mean" for b in FEA_COLUMN_LIST]
    sample[user_feature_col] = sample[user_feature_col].fillna(0.0)
        
    features += user_feature_col
    features+=[action]
    
    # id=0 填充未知分类数据和离散数据
    sample[["authorid", "bgm_song_id", "bgm_singer_id",'watch_count_group','video_time_group']] += 1  
    sample[["authorid", "bgm_song_id", "bgm_singer_id", "videoplayseconds",'watch_count_group','video_time_group']] = \
        sample[["authorid", "bgm_song_id", "bgm_singer_id", "videoplayseconds",\
                'watch_count_group','video_time_group']].fillna(0)
        
    # 给数值型数据增加非线性
    dense_cols=['videoplayseconds','watch_count']+user_feature_col

    # 把分类数据id转化成int格式
    sample[["authorid", "bgm_song_id", "bgm_singer_id",'watch_count_group','video_time_group']] = \
    sample[["authorid", "bgm_song_id", "bgm_singer_id",'watch_count_group','video_time_group']].astype(int)
        
    return sample[features]


# 把test数据 拼接上 u i特征
def test_concat(df_test):
    # 用户基本特征
    df_users=pd.read_csv(DATASET_PATH+'/user_info.csv')
    df_users = df_users.set_index('userid')
    # 用户统计特征
    df_users_static=pd.read_csv(DATASET_PATH+'/user_feature_sum_avg.csv')
    df_users_static=df_users_static.drop_duplicates(subset=['userid','date_'], keep='last')
    # test的时候直接使用14天的统计数据
    df_users_static=df_users_static[df_users_static['date_']==14]
    df_users_static=df_users_static.set_index('userid')
    
    # 视频特征
    df_feed=pd.read_csv(DATASET_PATH+'/feed_feature.csv')
    df_feed = df_feed.set_index('feedid')
    
    features = ["userid", "feedid", "device", "authorid", "bgm_song_id", "bgm_singer_id",\
                'watch_count_group','video_time_group','feed_cluter',\
                "videoplayseconds","watch_count","play_times",'des_words','ocr_words','asr_words',\
                'manual_tag','machine_tag','manual_keywords','machine_keywords','feed_emb_id','one_tag']
    
    features=features+['user_'+b+'_sum_group' for b in FEA_COLUMN_LIST]+['user_'+b+'_mean_group' for b in FEA_COLUMN_LIST]

    sample=df_test
    sample = sample.join(df_feed, on="feedid", how="left", rsuffix="_feed")
    sample = sample.join(df_users, on="userid", how="left", rsuffix="_user_id")
    sample = sample.join(df_users_static, on="userid", how="left", rsuffix="_user_static")

    # 把各种统计信息更新到features中
    user_feature_col = [b+"_sum" for b in FEA_COLUMN_LIST]+[b+"_mean" for b in FEA_COLUMN_LIST]
    # test中可能有冷启动 所以必须填充空值
    sample[user_feature_col] = sample[user_feature_col].fillna(0.0)

    features += user_feature_col

    # id=0 填充未知分类数据和离散数据
    sample[["authorid", "bgm_song_id", "bgm_singer_id",'watch_count_group','video_time_group']] += 1  
    sample[["authorid", "bgm_song_id", "bgm_singer_id", "videoplayseconds",'watch_count_group','video_time_group']] = \
        sample[["authorid", "bgm_song_id", "bgm_singer_id", "videoplayseconds",\
                'watch_count_group','video_time_group']].fillna(0)

    dense_cols=['videoplayseconds','watch_count']+user_feature_col

    # 把分类数据id转化成int格式
    sample[["authorid", "bgm_song_id", "bgm_singer_id",'watch_count_group','video_time_group']] = \
        sample[["authorid", "bgm_song_id", "bgm_singer_id",'watch_count_group','video_time_group']].astype(int)

    return sample[features]

#### 不同的action其test数据是一样的

In [13]:
df_history_list=pd.read_csv(ROOT_PATH+'/wechat_algo_data1/user_history_list.csv')
test=pd.read_csv(TEST_FILE)
test=test_concat(test)
# test.head(5)

In [14]:
# 制作训练集和验证集 模型输入
def make_train_val(df,day):
    train=df[(df['date_']<day) & (df['date_']>=day-14)]
    day=min(day,14)
    val=df[df['date_']==day]
    return train,val


In [16]:
df_actions=pd.read_csv(DATASET_PATH+'/user_action.csv')

#### 为每个action构造单独的Model

In [None]:
# sparse_features = ['userid', 'feedid', 'authorid', 'bgm_song_id', 'bgm_singer_id',\
#                    'watch_count_group','video_time_group','feed_cluter','device']

# dense_features = ['videoplayseconds',"watch_count","play_times"]

sparse_features = ['userid', 'feedid', 'authorid', 'bgm_song_id', 'bgm_singer_id',\
                  'video_time_group','feed_cluter','one_tag']

dense_features = ['videoplayseconds']
# [b+"_sum" for b in ACTION_LIST]+[b+"_mean" for b in ACTION_LIST]
# dense_features += [b+'_log' for b in dense_features]+[b+'_square' for b in dense_features]+[b+'_exp' for b in dense_features]

sub_predict=test[['userid', 'feedid']]
# start_day=12 # 相当于取 15-start_day次的val取平均来作为线下指标
best_auc=dict()
for action in ACTION_LIST:
    best_auc[action]=[0.0]*(LAST_DAY-START_DAY+1) # day=15时 是全量数据进行train
    
predict_best=dict(zip(ACTION_LIST,[]*len(ACTION_LIST)))# 只在day=15时保存最好auc时的预测结果

for action in ACTION_LIST:
    epochs=4
    print('******************{}********************'.format(action))
    # 1 读取 train val 数据集
    df=generate_sample(df_actions,action,day=14)
    df=train_concat(df,action).sample(frac=1.0)
    for c in dense_features:
        if 'mean' in c:
            df[c]=df[c]
        else:
            df[c]=np.log(df[c]+1.0)
            
        
    mms = MinMaxScaler(feature_range=(0, 1))
    
    all_dense_concat=df[dense_features].append([test[dense_features]])
    all_dense_concat=mms.fit_transform(all_dense_concat[dense_features])
    
    df[dense_features] = all_dense_concat[0:len(df),0:len(dense_features)]
    test[dense_features] = all_dense_concat[len(df):len(all_dense_concat),0:len(dense_features)]

    # 2 生成特征列
    fixlen_sparse_columns=[SparseFeat(feat, vocabulary_size=df[feat].max() + 1,embedding_dim=embedding_dim)
                           for feat in sparse_features]
    fixlen_dense_columns= [DenseFeat(feat, 1,) for feat in dense_features]

    # 挑选dnn列和linear列
    dnn_feature_columns = fixlen_sparse_columns+fixlen_dense_columns
    linear_feature_columns=fixlen_dense_columns

    feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
    
    # 3-0 test都一样
    test_model_input = {name:test[name].values for name in feature_names}
    
## 为每个action只构造一个函数 以滑动窗口类似于参数预训练的方式 表现并不如每个滑动窗口重新训练模型参数

#     # 3-2 构造模型 训练
#     model = DeepFM(linear_feature_columns,dnn_feature_columns,fm_group=sparse_features,
#                        l2_reg_embedding=1e-5,l2_reg_linear=1e-5,l2_reg_dnn=1e-10,
#                        dnn_hidden_units=[128,128,64],seed=2021,task='binary')


#     model.compile(Adam(0.001), "binary_crossentropy",
#                       metrics=['binary_crossentropy',])
    
    
    # 3-0 从start day开始训练 先根据start day制作train 和 val
    for day in range(START_DAY,LAST_DAY+1):
        train,val=make_train_val(df,day)
        train=train.sample(frac=1.0)
        
        # 3-1 生成模型的输入数据
        train_model_input = {name:train[name].values for name in feature_names}
        val_model_input = {name: val[name] for name in feature_names}
        userid_list = val['userid'].astype(str).tolist() # val中所有uid列表 计算auc需要使用
        train_labels = train[action].values
        val_labels = val[action].values
    
        # 3-2 构造模型 训练
        model = DeepFM(linear_feature_columns,dnn_feature_columns,fm_group=sparse_features,
                       l2_reg_embedding=1e-5,l2_reg_linear=1e-5,l2_reg_dnn=1e-10,
                       dnn_hidden_units=[128,128,64],seed=2021,task='binary')


        model.compile(Adam(1e-4), "binary_crossentropy",
                      metrics=['binary_crossentropy',])

        # day==15时 最多训练2轮
        if(day==15):
            if(action in ["read_comment", "like","click_avatar"]):
                #epochs=[2,3]
                epochs=2
            else:
                #epochs=[1,2]
                epochs=1
            
        
        for epoch in range(epochs):

            history = model.fit(train_model_input, train_labels,shuffle=True,
                                      batch_size=batch_size, epochs=1, verbose=1,)

            val_pred_ans = model.predict(val_model_input, batch_size=batch_size * 4)
            auc=uAUC(val_labels, val_pred_ans, userid_list)

            print('start(val) day:{},epoch:{},auc:{}'.format(day,epoch+1,auc))
            # 更新当前day当前模型的最好auc 
            if(auc>=best_auc[action][day-START_DAY]):
                best_auc[action][day-START_DAY]=auc

                # 注:day=15时要保存模型 进行预测
                if(day==15):
                    model_root_path=ROOT_PATH+'/MyModel/deepfm'
                    mkdir(model_root_path)
                    save_model(model, '{}/deepfm_{}.h5'.format(model_root_path,action))
                    predict_best[action]=model.predict(test_model_input , batch_size=batch_size * 4)[:,0]
            # 当前模型不能降低auc 停止epochs
            else:
                if(day<15):
                    break

        
weight_dict = {"read_comment": 4, "like": 3, "click_avatar": 2, "favorite": 1, "forward": 1,
                   "comment": 1, "follow": 1}
# 所有模型train完 输出auc
print(best_auc)

# 更新best_auc取平均 #注：不算day=15的auc
for action in best_auc.keys():
    tmp=best_auc[action]
    if(LAST_DAY==15):
        best_auc[action]=(sum(tmp)-tmp[-1])/(15-START_DAY)
    else:
        best_auc[action]=(sum(tmp))/(15-START_DAY)
        

print(best_auc)

# 保存sub_dict
weight_auc=compute_weighted_score(best_auc,weight_dict)
print(weight_auc)

******************read_comment********************
-----------read_comment-------------
pos num:256184;neg num:3842760
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 3752812 samples

In [14]:
for action,predict in predict_best.items():
    sub_predict[action]=predict
sub_predict.to_csv('{}/sub_{}_{}.csv'.format(SUB_PATH,weight_auc,model_name),index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [42]:
sub_predict_final=sub_predict
## 依次读取模型 将train val合并 再训练1个epoch
for action in ACTION_LIST:
    # 0 加载模型
    model_root_path='{}/MyModel/{}'.format(ROOT_PATH,model_name)
    model = load_model('{}/{}_{}.h5'.format(model_root_path,model_name,action),custom_objects)
    print('******************{}********************'.format(action))
    # 1 读取 train val 数据集
    df=get_df_data(action,day=14)
    val=df[df['date_']==14]

    # 2 生成特征列
    fixlen_sparse_columns=[SparseFeat(feat, vocabulary_size=df[feat].max() + 1,embedding_dim=embedding_dim)
                           for feat in sparse_features]
    fixlen_dense_columns= [DenseFeat(feat, 1,) for feat in dense_features]

    # 挑选dnn列和linear列
    dnn_feature_columns = fixlen_sparse_columns+fixlen_dense_columns
    linear_feature_columns=fixlen_dense_columns

    feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)


    # 3 生成模型的输入数据
    df_all_model_input={name:df[name].values for name in feature_names}
    test_model_input = {name:test[name].values for name in feature_names}

    val_model_input = {name: val[name] for name in feature_names}
    userid_list = val['userid'].astype(str).tolist() # val中所有uid列表 计算auc需要使用
    test_model_input = {name: test[name] for name in feature_names}
    df_all_labels = df[action].values
    val_labels = val[action].values
    
    # 4 使用train val数据再训练模型
    model.fit(df_all_model_input, df_all_labels,shuffle=True,
                                  batch_size=batch_size, epochs=1, verbose=1)
    
    # 5 训练完之后进行预测
    sub_predict_final[action]=model.predict(test_model_input , batch_size=batch_size * 4)[:,0]

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


KeyboardInterrupt: 

In [None]:
# 保存最终的sub file
sub_predict_final.to_csv('{}/sub_final_{}_{}.csv'.format(SUB_PATH,weight_auc,model_name),index=False)

In [94]:
# # 读取模型
# model = load_model('{}/deepfm_{}.h5'.format(model_root_path,action),custom_objects)
# val_pred_ans = model.predict(val_model_input, batch_size=batch_size * 4)
# auc=uAUC(val_labels, val_pred_ans, userid_list)
# print(auc)

In [114]:
# sparse_features = ['userid', 'feedid', 'authorid', 'bgm_song_id', 'bgm_singer_id',\
#                    'watch_count_group','video_time_group']

# dense_features = ['videoplayseconds',"watch_count","play_times"]+\
# [b+"_sum" for b in FEA_COLUMN_LIST]+[b+"_mean" for b in FEA_COLUMN_LIST]

# # dense_features = ['videoplayseconds']

# sparse_features_lens_dict={
#     'userid':30000, 
#     'feedid':120000, 
#     'authorid':20000, 
#     'bgm_song_id':30000,
#     'bgm_singer_id':20000,
#     'watch_count_group':25,
#     'video_time_group':25,
# }
# action='like'

# # 1 读取 train val 数据集
# df=get_df_data(action,day=14)
# # print(train.columns)

# # 2 生成特征列
# # fixlen_feature_columns=[SparseFeat(feat, vocabulary_size=lens,embedding_dim=16)
# #                        for feat,lens in sparse_features_lens_dict.items()]+[DenseFeat(feat, 1,) for feat in dense_features]
# fixlen_sparse_columns=[SparseFeat(feat, vocabulary_size=df[feat].max() + 1,embedding_dim=embedding_dim)
#                        for feat in sparse_features]
# fixlen_dense_columns= [DenseFeat(feat, 1,) for feat in dense_features]
# # id的encoding
# # for feat in sparse_features:
# #     lbe = LabelEncoder()
# #     df[feat] = lbe.fit_transform(df[feat])

# dnn_feature_columns = fixlen_sparse_columns+fixlen_dense_columns
# # linear_feature_columns = fixlen_feature_columns
# linear_feature_columns=fixlen_dense_columns

# feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

# train,val=df[df['date_']<14],df[df['date_']==14]

# # 3 生成模型的输入数据
# train_model_input = {name:train[name].values for name in feature_names}
# # test_model_input = {name:test[name].values for name in feature_names}

# val_model_input = {name: val[name] for name in feature_names}
# userid_list = val['userid'].astype(str).tolist() # val中所有uid列表 计算auc需要使用
# # test_model_input = {name: test[name] for name in feature_names}
# train_labels = train[action].values
# val_labels = val[action].values

# # 4 构造模型 训练
# model = DeepFM(linear_feature_columns,dnn_feature_columns,fm_group=sparse_features,
#                dnn_hidden_units=[128,128,64],seed=6666,task='binary')

# #model=WDL(linear_feature_columns,dnn_feature_columns,dnn_hidden_units=[128,128],seed=6666,task='binary')

# model.compile('adam', "binary_crossentropy",
#               metrics=['binary_crossentropy'], )
# best_auc=0.0
# for epoch in range(epochs):
#     history = model.fit(train_model_input, train_labels,
#                               batch_size=batch_size, epochs=1, verbose=1)

#     val_pred_ans = model.predict(val_model_input, batch_size=batch_size * 4)
#     auc=uAUC(val_labels, val_pred_ans, userid_list)
#     if(auc>best_auc):
#         sub_predict[action]=model.predict(test_model_input , batch_size=batch_size * 4)[:,0]
#         best_auc=auc
#         model_root_path=ROOT_PATH+'/MyModel/deepfm'
#         mkdir(model_root_path)
#         save_model(model, '{}/deepfm_{}.h5'.format(model_root_path,action))
#     print('epoch:{},auc:{}'.format(epoch+1,auc))

In [None]:
### 之前单独写的

# sparse_features = ['userid', 'feedid', 'authorid', 'bgm_song_id', 'bgm_singer_id',\
#                    'watch_count_group','video_time_group']

# dense_features = ['videoplayseconds',"watch_count","play_times"]+\
# [b+"_sum" for b in FEA_COLUMN_LIST]+[b+"_mean" for b in FEA_COLUMN_LIST]

# # dense_features = ['videoplayseconds']

# sparse_features_lens_dict={
#     'userid':30000, 
#     'feedid':120000, 
#     'authorid':20000, 
#     'bgm_song_id':30000,
#     'bgm_singer_id':20000,
#     'watch_count_group':25,
#     'video_time_group':25,
# }
# action='click_avatar'

# # 1 读取 train val 数据集
# df=get_df_data(action,day=14)
# # print(train.columns)

# # 2 生成特征列
# # fixlen_feature_columns=[SparseFeat(feat, vocabulary_size=lens,embedding_dim=16)
# #                        for feat,lens in sparse_features_lens_dict.items()]+[DenseFeat(feat, 1,) for feat in dense_features]
# fixlen_sparse_columns=[SparseFeat(feat, vocabulary_size=df[feat].max() + 1,embedding_dim=embedding_dim)
#                        for feat in sparse_features]
# fixlen_dense_columns= [DenseFeat(feat, 1,) for feat in dense_features]
# # id的encoding
# # for feat in sparse_features:
# #     lbe = LabelEncoder()
# #     df[feat] = lbe.fit_transform(df[feat])

# dnn_feature_columns = fixlen_sparse_columns+fixlen_dense_columns
# # linear_feature_columns = fixlen_feature_columns
# linear_feature_columns=fixlen_dense_columns

# feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

# train,val=df[df['date_']<14],df[df['date_']==14]

# # 3 生成模型的输入数据
# train_model_input = {name:train[name].values for name in feature_names}
# # test_model_input = {name:test[name].values for name in feature_names}

# val_model_input = {name: val[name] for name in feature_names}
# userid_list = val['userid'].astype(str).tolist() # val中所有uid列表 计算auc需要使用
# # test_model_input = {name: test[name] for name in feature_names}
# train_labels = train[action].values
# val_labels = val[action].values

# # 4 构造模型 训练
# # model = DeepFM(linear_feature_columns,dnn_feature_columns,fm_group=sparse_features,
# #                dnn_hidden_units=[128,128,64],seed=6666,task='binary')

# model=WDL(linear_feature_columns,dnn_feature_columns,dnn_hidden_units=[128,128],seed=6666,task='binary')

# model.compile('adam', "binary_crossentropy",
#               metrics=['binary_crossentropy'], )
# best_auc=0.0
# for epoch in range(epochs):
#     history = model.fit(train_model_input, train_labels,
#                               batch_size=batch_size, epochs=1, verbose=1)

#     val_pred_ans = model.predict(val_model_input, batch_size=batch_size * 4)
#     auc=uAUC(val_labels, val_pred_ans, userid_list)
#     if(auc>best_auc):
#         best_auc=auc
#         model_root_path=ROOT_PATH+'/MyModel/deepfm'
#         mkdir(model_root_path)
#         save_model(model, '{}/deepfm_{}.h5'.format(model_root_path,action))
#     print('epoch:{},auc:{}'.format(epoch+1,auc))