In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import warnings
import os
import DeepFM as dfm
import preprocess as prep
from sklearn.metrics import roc_curve,confusion_matrix,recall_score,roc_auc_score
import matplotlib.pyplot as plt
import datetime
warnings.filterwarnings('ignore')
path_model='D:\\kyk-ml\\Recommendation_FactorModel+lightgbm\\DeepFM\\'

In [2]:
delta0=datetime.timedelta(days=1)
delta=datetime.timedelta(days=15)
now=(datetime.datetime.now()-delta0).strftime(format='%Y/%m/%d')
past=(datetime.datetime.now()-delta).strftime(format='%Y/%m/%d')
train,test=prep.load_full_log(past,now,'2021/2/18','2021/2/19',is_test=True)
item_pool=prep.load_item_pool(past,now)

In [3]:
train_set,test_set=prep.process_data(train,item_pool,test_data=test,sampling_ratio=5,user_cols=['user_id'],item_cols=['item_id','item_catalog'])

In [4]:
model=dfm.DeepFatorizationMachine(64,1024)
if os.path.exists(path_model+'DeepFM.h5'):
    print('loading model.\n')
    model.predict(test_set.take(1))
    model.load_weights(path_model+'DeepFM.h5')
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(),optimizer=tf.keras.optimizers.Adam(0.01),metrics=
    [dfm.roc_auc,tf.keras.metrics.Recall()])
    model.fit(test_set,epochs=10)
    model.evaluate(test_set)
    model.save_weights(path_model+'DeepFM.h5')
else:   
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(),optimizer=tf.keras.optimizers.Adam(0.01),metrics=         
    [dfm.roc_auc,tf.keras.metrics.Recall()])
    model.fit(train_set,epochs=15,validation_data=test_set)
    model.evaluate(test_set)
    model.summary()
    model.save_weights(path_model+'DeepFM.h5')

loading model.

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [9]:
user_ids=["'00d481e75bed4c4aa2cdc0799711fe68'","'017a5094fe544967ad557489a3c97189'","'714a4f3891024e1daf6753e01a14cbb8'"]
recmd=dfm.feeling_lucky(model,user_ids,topK=10,feature_cols=['user_id','item_id','item_catalog'])

In [10]:
recmd

Unnamed: 0,Unnamed: 1,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0
0,00d481e75bed4c4aa2cdc0799711fe68,平安少儿综合意外险-基础版,民生银行-网乐贷,客宜贷,苏宁银行-微商贷,外贸贷,税E贷,平安税金贷（企业主）,新网好企贷,微业贷（仅做展示）,大数金盈贷（广东）-诺诺
1,017a5094fe544967ad557489a3c97189,平安少儿综合意外险-基础版,民生银行-网乐贷,满意贷,甜橙借钱,学习无忧”疾病医疗学平险,客宜贷,小蜜蜂综合意外保险（尊享款）,亚太百万人生意外险,平安快贷-新一贷,云联烟草贷
2,714a4f3891024e1daf6753e01a14cbb8,平安少儿综合意外险-基础版,亚太百万人生意外险,云利宝A款,孝心安老年人意外险,小蜜蜂综合意外保险（尊享款）,臻爱百万医疗保险计划(甲状腺病特别版),招行薪福通,盈票,拉卡拉灵活金-诺诺,好运贷（限物流、运输行业企业）


In [None]:
y_true,y_score,data=dfm.get_prediction(model,test_set)
fpr,tpr,thresholds=roc_curve(y_true,y_score,drop_intermediate=False)
auc=roc_auc_score(y_true,y_score)
plt.plot(fpr,tpr)
plt.legend(['AUC='+str(np.round(auc,3))])
plt.show()

In [None]:
y_pred=np.zeros(len(y_true))
threshold=0.5
y_pred[y_score[:,0]>threshold]=1

In [None]:
recall_score(y_true,y_pred)