In [13]:
import sys
import os
import logging
import papermill as pm
import scrapbook as sb
from tempfile import TemporaryDirectory
import numpy as np
import tensorflow.compat.v1 as tf
tf.get_logger().setLevel('ERROR') # only show error messages

if os.path.join('..', '..', 'recommenders') not in sys.path:
    sys.path.append(os.path.join('..', '..', 'recommenders'))

from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED
from recommenders.models.deeprec.deeprec_utils import (
    prepare_hparams
)
from recommenders.datasets.amazon_reviews import download_and_extract, data_preprocessing
from recommenders.datasets.download_utils import maybe_download


# Locally import the model
from models.deeprec.models.sequential.din import DIN_RECModel as SeqModel


#from recommenders.models.deeprec.models.sequential.nextitnet import NextItNetModel

from recommenders.models.deeprec.io.sequential_iterator import SequentialIterator
#from recommenders.models.deeprec.io.nextitnet_iterator import NextItNetIterator

print("System version: {}".format(sys.version))
print("Tensorflow version: {}".format(tf.__version__))

import pickle as pkl
from recommenders.models.deeprec.deeprec_utils import load_dict

System version: 3.9.7 (default, Sep 16 2021, 13:09:58) 
[GCC 7.5.0]
Tensorflow version: 2.9.0


In [14]:
yaml_file = '../../recommenders/models/deeprec/config/din.yaml'  

In [15]:
EPOCHS = 10
BATCH_SIZE = 400
RANDOM_SEED = SEED  # Set None for non-deterministic result

data_path = os.path.join("..", "..", "tests", "resources", "deeprec", "slirec")

In [16]:

# for test
train_file = os.path.join(data_path, r'train_data')
valid_file = os.path.join(data_path, r'valid_data')
test_file = os.path.join(data_path, r'test_data')
user_vocab = os.path.join(data_path, r'user_vocab.pkl')
item_vocab = os.path.join(data_path, r'item_vocab.pkl')
cate_vocab = os.path.join(data_path, r'category_vocab.pkl')
output_file = os.path.join(data_path, r'output.txt')

reviews_name = 'reviews_Movies_and_TV_5.json'
meta_name = 'meta_Movies_and_TV.json'
reviews_file = os.path.join(data_path, reviews_name)
meta_file = os.path.join(data_path, meta_name)
train_num_ngs = 4 # number of negative instances with a positive instance for training
valid_num_ngs = 4 # number of negative instances with a positive instance for validation
test_num_ngs = 9 # number of negative instances with a positive instance for testing
# sample_rate = 0.01 # sample a small item set for training and testing here for fast example
sample_rate = 1

input_files = [reviews_file, meta_file, train_file, valid_file, test_file, user_vocab, item_vocab, cate_vocab]

if not os.path.exists(train_file):
    download_and_extract(reviews_name, reviews_file)
    download_and_extract(meta_name, meta_file)
    data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs)
    #### uncomment this for the NextItNet model, because it does not need to unfold the user history
    # data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs, is_history_expanding=False)

# data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs)

In [19]:
hparams = prepare_hparams(yaml_file, 
                          embed_l2=0., 
                          layer_l2=0., 
                          learning_rate=0.001,  # set to 0.01 if batch normalization is disable
                          epochs=EPOCHS,
                          batch_size=BATCH_SIZE,
                          show_step=20,
                          MODEL_DIR=os.path.join(data_path, "model/din_sum_pooling/"),
                          SUMMARIES_DIR=os.path.join(data_path, "summary/din_sum_pooling/"),
                          user_vocab=user_vocab,
                          item_vocab=item_vocab,
                          cate_vocab=cate_vocab,
                          need_sample=True,
                          train_num_ngs=train_num_ngs, # provides the number of negative instances for each positive instance for loss computation.
                          attention_mode="sum_pooling"
            )

In [20]:
hparams.values()

{'use_entity': True,
 'use_context': True,
 'cross_activation': 'identity',
 'user_dropout': True,
 'dropout': [0.3, 0.3],
 'attention_dropout': 0.0,
 'load_saved_model': False,
 'fast_CIN_d': 0,
 'use_Linear_part': False,
 'use_FM_part': False,
 'use_CIN_part': False,
 'use_DNN_part': False,
 'init_method': 'tnormal',
 'init_value': 0.01,
 'embed_l2': 0.0,
 'embed_l1': 0.0,
 'layer_l2': 0.0,
 'layer_l1': 0.0,
 'cross_l2': 0.0,
 'cross_l1': 0.0,
 'reg_kg': 0.0,
 'learning_rate': 0.001,
 'lr_rs': 1,
 'lr_kg': 0.5,
 'kg_training_interval': 5,
 'max_grad_norm': 2,
 'is_clip_norm': 0,
 'dtype': 32,
 'optimizer': 'adam',
 'epochs': 10,
 'batch_size': 400,
 'enable_BN': True,
 'show_step': 20,
 'save_model': True,
 'save_epoch': 1,
 'write_tfevents': True,
 'train_num_ngs': 4,
 'need_sample': True,
 'embedding_dropout': 0.0,
 'EARLY_STOP': 10,
 'min_seq_length': 1,
 'slots': 5,
 'cell': 'SUM',
 'user_vocab': '../../tests/resources/deeprec/slirec/user_vocab.pkl',
 'item_vocab': '../../tests/r

In [21]:
input_creator = SequentialIterator
#### uncomment this for the NextItNet model, because it needs a special data iterator for training
#input_creator = NextItNetIterator

In [22]:
model = SeqModel(hparams, input_creator, seed=RANDOM_SEED)

with Timer() as train_time:
    model = model.fit(train_file, valid_file, valid_num_ngs=valid_num_ngs) 

# valid_num_ngs is the number of negative lines after each positive line in your valid_file 
# we will evaluate the performance of model on valid_file every epoch
print('Time cost for training is {0:.2f} mins'.format(train_time.interval/60.0))

  curr_hidden_nn_layer = tf.compat.v1.layers.batch_normalization(
2022-06-04 08:55:17.465481: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-04 08:55:17.522943: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-04 08:55:17.523128: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-04 08:55:17.973985: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other 

step 20 , total_loss: 1.6080, data_loss: 1.6080
step 40 , total_loss: 1.6070, data_loss: 1.6070
step 60 , total_loss: 1.5773, data_loss: 1.5773
step 80 , total_loss: 1.5347, data_loss: 1.5347
step 100 , total_loss: 1.5522, data_loss: 1.5522
step 120 , total_loss: 1.5548, data_loss: 1.5548
step 140 , total_loss: 1.5520, data_loss: 1.5520
step 160 , total_loss: 1.5496, data_loss: 1.5496
step 180 , total_loss: 1.5579, data_loss: 1.5579
step 200 , total_loss: 1.5484, data_loss: 1.5484
step 220 , total_loss: 1.5317, data_loss: 1.5317
step 240 , total_loss: 1.5367, data_loss: 1.5367
step 260 , total_loss: 1.5607, data_loss: 1.5607
step 280 , total_loss: 1.5311, data_loss: 1.5311
step 300 , total_loss: 1.5217, data_loss: 1.5217
step 320 , total_loss: 1.5196, data_loss: 1.5196
step 340 , total_loss: 1.5241, data_loss: 1.5241
step 360 , total_loss: 1.5295, data_loss: 1.5295
step 380 , total_loss: 1.5077, data_loss: 1.5077
step 400 , total_loss: 1.4783, data_loss: 1.4783
step 420 , total_loss: 1

eval valid at epoch 1: auc:0.7566,logloss:0.7411,mean_mrr:0.6879,ndcg@2:0.6409,ndcg@4:0.745,ndcg@6:0.766,group_auc:0.7569
step 20 , total_loss: 1.2215, data_loss: 1.2215
step 40 , total_loss: 1.1664, data_loss: 1.1664
step 60 , total_loss: 1.1949, data_loss: 1.1949
step 80 , total_loss: 1.1979, data_loss: 1.1979
step 100 , total_loss: 1.2577, data_loss: 1.2577
step 120 , total_loss: 1.1915, data_loss: 1.1915
step 140 , total_loss: 1.2080, data_loss: 1.2080
step 160 , total_loss: 1.2801, data_loss: 1.2801
step 180 , total_loss: 1.2241, data_loss: 1.2241
step 200 , total_loss: 1.2416, data_loss: 1.2416
step 220 , total_loss: 1.2071, data_loss: 1.2071
step 240 , total_loss: 1.1806, data_loss: 1.1806
step 260 , total_loss: 1.1966, data_loss: 1.1966
step 280 , total_loss: 1.1860, data_loss: 1.1860
step 300 , total_loss: 1.1688, data_loss: 1.1688
step 320 , total_loss: 1.2680, data_loss: 1.2680
step 340 , total_loss: 1.1738, data_loss: 1.1738
step 360 , total_loss: 1.3408, data_loss: 1.3408


step 3280 , total_loss: 1.1596, data_loss: 1.1596
step 3300 , total_loss: 1.1375, data_loss: 1.1375
eval valid at epoch 2: auc:0.7879,logloss:0.7763,mean_mrr:0.7236,ndcg@2:0.6864,ndcg@4:0.7763,ndcg@6:0.7929,group_auc:0.7896
step 20 , total_loss: 1.1176, data_loss: 1.1176
step 40 , total_loss: 1.1296, data_loss: 1.1296
step 60 , total_loss: 1.0346, data_loss: 1.0346
step 80 , total_loss: 1.1152, data_loss: 1.1152
step 100 , total_loss: 1.0457, data_loss: 1.0457
step 120 , total_loss: 1.0871, data_loss: 1.0871
step 140 , total_loss: 1.1076, data_loss: 1.1076
step 160 , total_loss: 1.1007, data_loss: 1.1007
step 180 , total_loss: 1.1363, data_loss: 1.1363
step 200 , total_loss: 1.0172, data_loss: 1.0172
step 220 , total_loss: 1.0851, data_loss: 1.0851
step 240 , total_loss: 1.1939, data_loss: 1.1939
step 260 , total_loss: 0.9940, data_loss: 0.9940
step 280 , total_loss: 1.0794, data_loss: 1.0794
step 300 , total_loss: 1.0618, data_loss: 1.0618
step 320 , total_loss: 1.0846, data_loss: 1.0

step 3240 , total_loss: 1.1154, data_loss: 1.1154
step 3260 , total_loss: 1.1155, data_loss: 1.1155
step 3280 , total_loss: 1.0868, data_loss: 1.0868
step 3300 , total_loss: 1.0711, data_loss: 1.0711
eval valid at epoch 3: auc:0.7929,logloss:0.9656,mean_mrr:0.7331,ndcg@2:0.6977,ndcg@4:0.7847,ndcg@6:0.8001,group_auc:0.7976
step 20 , total_loss: 0.9759, data_loss: 0.9759
step 40 , total_loss: 0.9874, data_loss: 0.9874
step 60 , total_loss: 1.0063, data_loss: 1.0063
step 80 , total_loss: 1.0108, data_loss: 1.0108
step 100 , total_loss: 1.0479, data_loss: 1.0479
step 120 , total_loss: 1.0273, data_loss: 1.0273
step 140 , total_loss: 0.9958, data_loss: 0.9958
step 160 , total_loss: 1.0533, data_loss: 1.0533
step 180 , total_loss: 1.0152, data_loss: 1.0152
step 200 , total_loss: 1.0000, data_loss: 1.0000
step 220 , total_loss: 1.0125, data_loss: 1.0125
step 240 , total_loss: 0.9638, data_loss: 0.9638
step 260 , total_loss: 1.0101, data_loss: 1.0101
step 280 , total_loss: 1.1145, data_loss: 1

step 3200 , total_loss: 1.0630, data_loss: 1.0630
step 3220 , total_loss: 1.0015, data_loss: 1.0015
step 3240 , total_loss: 0.9543, data_loss: 0.9543
step 3260 , total_loss: 1.0646, data_loss: 1.0646
step 3280 , total_loss: 1.0133, data_loss: 1.0133
step 3300 , total_loss: 0.9747, data_loss: 0.9747
eval valid at epoch 4: auc:0.8,logloss:1.0104,mean_mrr:0.7412,ndcg@2:0.7074,ndcg@4:0.7911,ndcg@6:0.8062,group_auc:0.8038
step 20 , total_loss: 0.9590, data_loss: 0.9590
step 40 , total_loss: 1.0819, data_loss: 1.0819
step 60 , total_loss: 0.9829, data_loss: 0.9829
step 80 , total_loss: 0.9412, data_loss: 0.9412
step 100 , total_loss: 1.0827, data_loss: 1.0827
step 120 , total_loss: 0.9747, data_loss: 0.9747
step 140 , total_loss: 0.9785, data_loss: 0.9785
step 160 , total_loss: 0.9957, data_loss: 0.9957
step 180 , total_loss: 0.9046, data_loss: 0.9046
step 200 , total_loss: 0.9855, data_loss: 0.9855
step 220 , total_loss: 0.9411, data_loss: 0.9411
step 240 , total_loss: 1.0039, data_loss: 1.

step 3160 , total_loss: 1.0339, data_loss: 1.0339
step 3180 , total_loss: 1.0871, data_loss: 1.0871
step 3200 , total_loss: 0.9449, data_loss: 0.9449
step 3220 , total_loss: 1.0238, data_loss: 1.0238
step 3240 , total_loss: 0.9594, data_loss: 0.9594
step 3260 , total_loss: 0.9412, data_loss: 0.9412
step 3280 , total_loss: 1.0440, data_loss: 1.0440
step 3300 , total_loss: 1.0652, data_loss: 1.0652
eval valid at epoch 5: auc:0.8044,logloss:1.1837,mean_mrr:0.7457,ndcg@2:0.7134,ndcg@4:0.7958,ndcg@6:0.8096,group_auc:0.809
step 20 , total_loss: 0.9664, data_loss: 0.9664
step 40 , total_loss: 0.9381, data_loss: 0.9381
step 60 , total_loss: 0.9610, data_loss: 0.9610
step 80 , total_loss: 0.9803, data_loss: 0.9803
step 100 , total_loss: 0.9352, data_loss: 0.9352
step 120 , total_loss: 1.0030, data_loss: 1.0030
step 140 , total_loss: 1.0022, data_loss: 1.0022
step 160 , total_loss: 0.9401, data_loss: 0.9401
step 180 , total_loss: 0.9434, data_loss: 0.9434
step 200 , total_loss: 1.0426, data_loss

step 3120 , total_loss: 0.9828, data_loss: 0.9828
step 3140 , total_loss: 0.9495, data_loss: 0.9495
step 3160 , total_loss: 0.9434, data_loss: 0.9434
step 3180 , total_loss: 1.0014, data_loss: 1.0014
step 3200 , total_loss: 0.9909, data_loss: 0.9909
step 3220 , total_loss: 1.0077, data_loss: 1.0077
step 3240 , total_loss: 0.9226, data_loss: 0.9226
step 3260 , total_loss: 0.9461, data_loss: 0.9461
step 3280 , total_loss: 0.9963, data_loss: 0.9963
step 3300 , total_loss: 0.8964, data_loss: 0.8964
eval valid at epoch 6: auc:0.81,logloss:1.3324,mean_mrr:0.7539,ndcg@2:0.7232,ndcg@4:0.8026,ndcg@6:0.8158,group_auc:0.8159
step 20 , total_loss: 0.8800, data_loss: 0.8800
step 40 , total_loss: 0.8629, data_loss: 0.8629
step 60 , total_loss: 0.9532, data_loss: 0.9532
step 80 , total_loss: 0.9174, data_loss: 0.9174
step 100 , total_loss: 0.9426, data_loss: 0.9426
step 120 , total_loss: 0.9869, data_loss: 0.9869
step 140 , total_loss: 0.8789, data_loss: 0.8789
step 160 , total_loss: 0.8546, data_los

step 3080 , total_loss: 0.9671, data_loss: 0.9671
step 3100 , total_loss: 1.0599, data_loss: 1.0599
step 3120 , total_loss: 1.0481, data_loss: 1.0481
step 3140 , total_loss: 0.9371, data_loss: 0.9371
step 3160 , total_loss: 1.0204, data_loss: 1.0204
step 3180 , total_loss: 0.9472, data_loss: 0.9472
step 3200 , total_loss: 0.9728, data_loss: 0.9728
step 3220 , total_loss: 0.9194, data_loss: 0.9194
step 3240 , total_loss: 0.9749, data_loss: 0.9749
step 3260 , total_loss: 0.9861, data_loss: 0.9861
step 3280 , total_loss: 0.9691, data_loss: 0.9691
step 3300 , total_loss: 0.9799, data_loss: 0.9799
eval valid at epoch 7: auc:0.8089,logloss:1.4253,mean_mrr:0.7545,ndcg@2:0.7234,ndcg@4:0.8027,ndcg@6:0.8162,group_auc:0.8155
step 20 , total_loss: 0.8501, data_loss: 0.8501
step 40 , total_loss: 0.8616, data_loss: 0.8616
step 60 , total_loss: 0.8679, data_loss: 0.8679
step 80 , total_loss: 0.8696, data_loss: 0.8696
step 100 , total_loss: 0.8833, data_loss: 0.8833
step 120 , total_loss: 0.9562, data

step 3040 , total_loss: 0.9578, data_loss: 0.9578
step 3060 , total_loss: 0.9595, data_loss: 0.9595
step 3080 , total_loss: 0.9307, data_loss: 0.9307
step 3100 , total_loss: 0.8914, data_loss: 0.8914
step 3120 , total_loss: 0.9273, data_loss: 0.9273
step 3140 , total_loss: 0.9879, data_loss: 0.9879
step 3160 , total_loss: 0.9552, data_loss: 0.9552
step 3180 , total_loss: 1.0188, data_loss: 1.0188
step 3200 , total_loss: 0.9291, data_loss: 0.9291
step 3220 , total_loss: 0.9185, data_loss: 0.9185
step 3240 , total_loss: 0.9704, data_loss: 0.9704
step 3260 , total_loss: 0.8537, data_loss: 0.8537
step 3280 , total_loss: 0.9777, data_loss: 0.9777
step 3300 , total_loss: 0.9842, data_loss: 0.9842
eval valid at epoch 8: auc:0.811,logloss:1.5735,mean_mrr:0.7555,ndcg@2:0.725,ndcg@4:0.8032,ndcg@6:0.8169,group_auc:0.8164
step 20 , total_loss: 0.9404, data_loss: 0.9404
step 40 , total_loss: 0.8516, data_loss: 0.8516
step 60 , total_loss: 0.8964, data_loss: 0.8964
step 80 , total_loss: 0.8826, data

step 3000 , total_loss: 0.9006, data_loss: 0.9006
step 3020 , total_loss: 0.9393, data_loss: 0.9393
step 3040 , total_loss: 0.9517, data_loss: 0.9517
step 3060 , total_loss: 0.9323, data_loss: 0.9323
step 3080 , total_loss: 0.8451, data_loss: 0.8451
step 3100 , total_loss: 0.9581, data_loss: 0.9581
step 3120 , total_loss: 0.8958, data_loss: 0.8958
step 3140 , total_loss: 0.9085, data_loss: 0.9085
step 3160 , total_loss: 0.9454, data_loss: 0.9454
step 3180 , total_loss: 0.9023, data_loss: 0.9023
step 3200 , total_loss: 0.9170, data_loss: 0.9170
step 3220 , total_loss: 0.8802, data_loss: 0.8802
step 3240 , total_loss: 0.9812, data_loss: 0.9812
step 3260 , total_loss: 0.9194, data_loss: 0.9194
step 3280 , total_loss: 0.9143, data_loss: 0.9143
step 3300 , total_loss: 0.9519, data_loss: 0.9519
eval valid at epoch 9: auc:0.8157,logloss:1.6467,mean_mrr:0.7598,ndcg@2:0.7306,ndcg@4:0.8073,ndcg@6:0.8202,group_auc:0.8206
step 20 , total_loss: 0.7696, data_loss: 0.7696
step 40 , total_loss: 0.8752

step 2960 , total_loss: 0.8998, data_loss: 0.8998
step 2980 , total_loss: 0.9157, data_loss: 0.9157
step 3000 , total_loss: 0.8758, data_loss: 0.8758
step 3020 , total_loss: 0.9353, data_loss: 0.9353
step 3040 , total_loss: 0.9480, data_loss: 0.9480
step 3060 , total_loss: 0.9077, data_loss: 0.9077
step 3080 , total_loss: 0.8816, data_loss: 0.8816
step 3100 , total_loss: 0.9274, data_loss: 0.9274
step 3120 , total_loss: 0.8664, data_loss: 0.8664
step 3140 , total_loss: 0.8699, data_loss: 0.8699
step 3160 , total_loss: 0.9956, data_loss: 0.9956
step 3180 , total_loss: 0.8816, data_loss: 0.8816
step 3200 , total_loss: 0.9471, data_loss: 0.9471
step 3220 , total_loss: 0.8502, data_loss: 0.8502
step 3240 , total_loss: 0.9317, data_loss: 0.9317
step 3260 , total_loss: 0.9315, data_loss: 0.9315
step 3280 , total_loss: 0.8923, data_loss: 0.8923
step 3300 , total_loss: 0.9480, data_loss: 0.9480
eval valid at epoch 10: auc:0.8104,logloss:1.8416,mean_mrr:0.7564,ndcg@2:0.7253,ndcg@4:0.8039,ndcg@6

In [25]:
res_syn = model.run_eval(test_file, num_ngs=test_num_ngs)
print(res_syn)

{'auc': 0.7947, 'logloss': 2.0921, 'mean_mrr': 0.6151, 'ndcg@2': 0.5539, 'ndcg@4': 0.6377, 'ndcg@6': 0.6744, 'group_auc': 0.8}


In [23]:
model_best_trained = SeqModel(hparams, input_creator, seed=RANDOM_SEED)
path_best_trained = os.path.join(hparams.MODEL_DIR, "best_model")
print('loading saved model in {0}'.format(path_best_trained))
model_best_trained.load_model(path_best_trained)

  curr_hidden_nn_layer = tf.compat.v1.layers.batch_normalization(


loading saved model in ../../tests/resources/deeprec/slirec/model/din_sum_pooling/best_model


2022-06-04 10:05:09.329131: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-04 10:05:09.329315: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-04 10:05:09.329434: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-04 10:05:09.329581: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-04 10:05:09.329697: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from S

In [24]:
model_best_trained.run_eval(test_file, num_ngs=test_num_ngs)

{'auc': 0.8058,
 'logloss': 1.8566,
 'mean_mrr': 0.6262,
 'ndcg@2': 0.5672,
 'ndcg@4': 0.6511,
 'ndcg@6': 0.6862,
 'group_auc': 0.8103}