In [1]:
import os
import tensorflow as tf
from tempfile import TemporaryDirectory
from recommenders.utils.constants import SEED
from recommenders.models.deeprec.deeprec_utils import prepare_hparams
from recommenders.models.deeprec.models.xDeepFM import XDeepFMModel
from recommenders.models.deeprec.io.iterator import FFMTextIterator
tf.get_logger().setLevel('ERROR')

In [2]:
EPOCHS_FOR_SYNTHETIC_RUN = 15
EPOCHS_FOR_CRITEO_RUN = 30
BATCH_SIZE_SYNTHETIC = 128
BATCH_SIZE_CRITEO = 4096
RANDOM_SEED = SEED

In [3]:
# tmpdir = TemporaryDirectory()
# data_path = tmpdir.name
data_path = os.path.join(os.getcwd(), 'xdeepfmresources')
yaml_file = os.path.join(data_path, r'xDeepFM.yaml')
train_file = os.path.join(data_path, r'synthetic_part_0')
valid_file = os.path.join(data_path, r'synthetic_part_1')
test_file = os.path.join(data_path, r'synthetic_part_2')
output_file = os.path.join(data_path, r'output.txt')

In [4]:
hparams = prepare_hparams(yaml_file,
                          FEATURE_COUNT=1000,
                          FIELD_COUNT=10,
                          cross_l2=0.0001,
                          embed_l2=0.0001,
                          learning_rate=0.001,
                          epochs=EPOCHS_FOR_SYNTHETIC_RUN,
                          batch_size=BATCH_SIZE_SYNTHETIC)
# print(hparams)

In [5]:
input_creator = FFMTextIterator

In [6]:
model = XDeepFMModel(hparams, input_creator, seed=RANDOM_SEED)

Add CIN part.


  "shape. This may consume a large amount of memory." % value)


In [7]:
# 模型训练前表现
print(model.run_eval(test_file))

{'auc': 0.5043, 'logloss': 0.7515}


In [8]:
model.fit(train_file, valid_file)

at epoch 1
train info: logloss loss:0.755682612612485
eval info: auc:0.504, logloss:0.7042
at epoch 1 , train time: 4.5 eval time: 0.7
at epoch 2
train info: logloss loss:0.7263523229618662
eval info: auc:0.5066, logloss:0.6973
at epoch 2 , train time: 4.2 eval time: 0.7
at epoch 3
train info: logloss loss:0.7177084289055919
eval info: auc:0.5099, logloss:0.6953
at epoch 3 , train time: 4.1 eval time: 0.7
at epoch 4
train info: logloss loss:0.7118660174694258
eval info: auc:0.5147, logloss:0.6946
at epoch 4 , train time: 4.1 eval time: 0.7
at epoch 5
train info: logloss loss:0.7055103305688838
eval info: auc:0.523, logloss:0.6941
at epoch 5 , train time: 4.1 eval time: 0.7
at epoch 6
train info: logloss loss:0.6954095564347362
eval info: auc:0.5416, logloss:0.6929
at epoch 6 , train time: 4.1 eval time: 0.7
at epoch 7
train info: logloss loss:0.6723950161147363
eval info: auc:0.5916, logloss:0.6831
at epoch 7 , train time: 4.1 eval time: 0.7
at epoch 8
train info: logloss loss:0.611980

<recommenders.models.deeprec.models.xDeepFM.XDeepFMModel at 0x27c05a77788>

In [9]:
# 模型训练后表现
res_syn = model.run_eval(test_file)
print(res_syn)

{'auc': 0.9716, 'logloss': 0.2278}


In [10]:
# 测试集测试并保存结果
model.predict(test_file, output_file)

<recommenders.models.deeprec.models.xDeepFM.XDeepFMModel at 0x27c05a77788>

In [11]:
# Criteo

In [12]:
hparams = prepare_hparams(yaml_file,
                          FEATURE_COUNT=2300000,
                          FIELD_COUNT=39,
                          cross_l2=0.01,
                          embed_l2=0.01,
                          layer_l2=0.01,
                          learning_rate=0.002,
                          batch_size=BATCH_SIZE_CRITEO,
                          epochs=EPOCHS_FOR_CRITEO_RUN,
                          cross_layer_sizes=[20, 10],
                          init_value=0.1,
                          layer_sizes=[20,20],
                          use_Linear_part=True,
                          use_CIN_part=True,
                          use_DNN_part=True)

In [13]:
train_file = os.path.join(data_path, r'cretio_tiny_train')
valid_file = os.path.join(data_path, r'cretio_tiny_valid')
test_file = os.path.join(data_path, r'cretio_tiny_test')

In [14]:
model = XDeepFMModel(hparams, FFMTextIterator, seed=RANDOM_SEED)

Add linear part.
Add CIN part.
Add DNN part.


  "shape. This may consume a large amount of memory." % value)


In [15]:
# 模型训练前表现
print(model.run_eval(test_file))

{'auc': 0.4728, 'logloss': 0.7113}


In [16]:
model.fit(train_file, valid_file)

at epoch 1
train info: logloss loss:744.360164642334
eval info: auc:0.6637, logloss:0.5342
at epoch 1 , train time: 29.8 eval time: 3.9
at epoch 2
train info: logloss loss:385.6692314147949
eval info: auc:0.7137, logloss:0.5109
at epoch 2 , train time: 26.8 eval time: 3.7
at epoch 3
train info: logloss loss:191.5082721710205
eval info: auc:0.7283, logloss:0.5037
at epoch 3 , train time: 26.8 eval time: 3.7
at epoch 4
train info: logloss loss:92.20771861076355
eval info: auc:0.7359, logloss:0.4991
at epoch 4 , train time: 26.9 eval time: 3.8
at epoch 5
train info: logloss loss:43.15944170951843
eval info: auc:0.74, logloss:0.4963
at epoch 5 , train time: 27.0 eval time: 3.8
at epoch 6
train info: logloss loss:19.656913101673126
eval info: auc:0.7426, logloss:0.4946
at epoch 6 , train time: 26.4 eval time: 3.5
at epoch 7
train info: logloss loss:8.77035242319107
eval info: auc:0.7441, logloss:0.4934
at epoch 7 , train time: 26.4 eval time: 3.6
at epoch 8
train info: logloss loss:3.922732

<recommenders.models.deeprec.models.xDeepFM.XDeepFMModel at 0x27c07351148>

In [17]:
# 模型训练后表现
res_real = model.run_eval(test_file)
print(res_real)

{'auc': 0.7356, 'logloss': 0.5017}


In [18]:
# tmpdir.cleanup()