In [1]:
import sys
import os
import logging
import papermill as pm
import scrapbook as sb
from tempfile import TemporaryDirectory
import numpy as np
import tensorflow.compat.v1 as tf
tf.get_logger().setLevel('ERROR') # only show error messages

if os.path.join('..', '..', 'recommenders') not in sys.path:
    sys.path.append(os.path.join('..', '..', 'recommenders'))

from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED
from recommenders.models.deeprec.deeprec_utils import (
    prepare_hparams
)
from recommenders.datasets.amazon_reviews import download_and_extract, data_preprocessing
from recommenders.datasets.download_utils import maybe_download


# Locally import the model
from models.deeprec.models.sequential.din import DIN_RECModel as SeqModel


#from recommenders.models.deeprec.models.sequential.nextitnet import NextItNetModel

from recommenders.models.deeprec.io.sequential_iterator import SequentialIterator
#from recommenders.models.deeprec.io.nextitnet_iterator import NextItNetIterator

print("System version: {}".format(sys.version))
print("Tensorflow version: {}".format(tf.__version__))

import pickle as pkl
from recommenders.models.deeprec.deeprec_utils import load_dict

System version: 3.9.7 (default, Sep 16 2021, 13:09:58) 
[GCC 7.5.0]
Tensorflow version: 2.9.0


In [2]:
yaml_file = '../../recommenders/models/deeprec/config/din.yaml'  

In [3]:
EPOCHS = 10
BATCH_SIZE = 400
RANDOM_SEED = SEED  # Set None for non-deterministic result

data_path = os.path.join("..", "..", "tests", "resources", "deeprec", "slirec")

In [4]:

# for test
train_file = os.path.join(data_path, r'train_data')
valid_file = os.path.join(data_path, r'valid_data')
test_file = os.path.join(data_path, r'test_data')
user_vocab = os.path.join(data_path, r'user_vocab.pkl')
item_vocab = os.path.join(data_path, r'item_vocab.pkl')
cate_vocab = os.path.join(data_path, r'category_vocab.pkl')
output_file = os.path.join(data_path, r'output.txt')

reviews_name = 'reviews_Movies_and_TV_5.json'
meta_name = 'meta_Movies_and_TV.json'
reviews_file = os.path.join(data_path, reviews_name)
meta_file = os.path.join(data_path, meta_name)
train_num_ngs = 4 # number of negative instances with a positive instance for training
valid_num_ngs = 4 # number of negative instances with a positive instance for validation
test_num_ngs = 9 # number of negative instances with a positive instance for testing
# sample_rate = 0.01 # sample a small item set for training and testing here for fast example
sample_rate = 1

input_files = [reviews_file, meta_file, train_file, valid_file, test_file, user_vocab, item_vocab, cate_vocab]

if not os.path.exists(train_file):
    download_and_extract(reviews_name, reviews_file)
    download_and_extract(meta_name, meta_file)
    data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs)
    #### uncomment this for the NextItNet model, because it does not need to unfold the user history
    # data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs, is_history_expanding=False)

# data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs)

In [5]:
hparams = prepare_hparams(yaml_file, 
                          embed_l2=0., 
                          layer_l2=0., 
                          learning_rate=0.001,  # set to 0.01 if batch normalization is disable
                          epochs=EPOCHS,
                          batch_size=BATCH_SIZE,
                          show_step=20,
                          MODEL_DIR=os.path.join(data_path, "model/din_op/"),
                          SUMMARIES_DIR=os.path.join(data_path, "summary/din_op/"),
                          user_vocab=user_vocab,
                          item_vocab=item_vocab,
                          cate_vocab=cate_vocab,
                          need_sample=True,
                          train_num_ngs=train_num_ngs, # provides the number of negative instances for each positive instance for loss computation.
                          attention_mode="outer_product"
            )

In [6]:
hparams.values()

{'use_entity': True,
 'use_context': True,
 'cross_activation': 'identity',
 'user_dropout': True,
 'dropout': [0.3, 0.3],
 'attention_dropout': 0.0,
 'load_saved_model': False,
 'fast_CIN_d': 0,
 'use_Linear_part': False,
 'use_FM_part': False,
 'use_CIN_part': False,
 'use_DNN_part': False,
 'init_method': 'tnormal',
 'init_value': 0.01,
 'embed_l2': 0.0,
 'embed_l1': 0.0,
 'layer_l2': 0.0,
 'layer_l1': 0.0,
 'cross_l2': 0.0,
 'cross_l1': 0.0,
 'reg_kg': 0.0,
 'learning_rate': 0.001,
 'lr_rs': 1,
 'lr_kg': 0.5,
 'kg_training_interval': 5,
 'max_grad_norm': 2,
 'is_clip_norm': 0,
 'dtype': 32,
 'optimizer': 'adam',
 'epochs': 10,
 'batch_size': 400,
 'enable_BN': True,
 'show_step': 20,
 'save_model': True,
 'save_epoch': 1,
 'write_tfevents': True,
 'train_num_ngs': 4,
 'need_sample': True,
 'embedding_dropout': 0.0,
 'EARLY_STOP': 10,
 'min_seq_length': 1,
 'slots': 5,
 'cell': 'SUM',
 'user_vocab': '../../tests/resources/deeprec/slirec/user_vocab.pkl',
 'item_vocab': '../../tests/r

In [7]:
input_creator = SequentialIterator
#### uncomment this for the NextItNet model, because it needs a special data iterator for training
#input_creator = NextItNetIterator

In [8]:
model = SeqModel(hparams, input_creator, seed=RANDOM_SEED)

with Timer() as train_time:
    model = model.fit(train_file, valid_file, valid_num_ngs=valid_num_ngs) 

# valid_num_ngs is the number of negative lines after each positive line in your valid_file 
# we will evaluate the performance of model on valid_file every epoch
print('Time cost for training is {0:.2f} mins'.format(train_time.interval/60.0))

  curr_hidden_nn_layer = tf.compat.v1.layers.batch_normalization(
2022-06-05 12:13:02.206734: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-05 12:13:02.212000: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-05 12:13:02.212223: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-05 12:13:03.137716: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other 

step 20 , total_loss: 1.5636, data_loss: 1.5636
step 40 , total_loss: 1.5724, data_loss: 1.5724
step 60 , total_loss: 1.5581, data_loss: 1.5581
step 80 , total_loss: 1.5031, data_loss: 1.5031
step 100 , total_loss: 1.5225, data_loss: 1.5225
step 120 , total_loss: 1.5605, data_loss: 1.5605
step 140 , total_loss: 1.5286, data_loss: 1.5286
step 160 , total_loss: 1.5158, data_loss: 1.5158
step 180 , total_loss: 1.5446, data_loss: 1.5446
step 200 , total_loss: 1.5366, data_loss: 1.5366
step 220 , total_loss: 1.5122, data_loss: 1.5122
step 240 , total_loss: 1.5104, data_loss: 1.5104
step 260 , total_loss: 1.5143, data_loss: 1.5143
step 280 , total_loss: 1.4912, data_loss: 1.4912
step 300 , total_loss: 1.4262, data_loss: 1.4262
step 320 , total_loss: 1.4686, data_loss: 1.4686
step 340 , total_loss: 1.4898, data_loss: 1.4898
step 360 , total_loss: 1.4233, data_loss: 1.4233
step 380 , total_loss: 1.4516, data_loss: 1.4516
step 400 , total_loss: 1.4411, data_loss: 1.4411
step 420 , total_loss: 1

eval valid at epoch 1: auc:0.799,logloss:0.708,mean_mrr:0.7345,ndcg@2:0.6963,ndcg@4:0.7837,ndcg@6:0.801,group_auc:0.7948
step 20 , total_loss: 1.0572, data_loss: 1.0572
step 40 , total_loss: 1.0475, data_loss: 1.0475
step 60 , total_loss: 1.0667, data_loss: 1.0667
step 80 , total_loss: 1.0161, data_loss: 1.0161
step 100 , total_loss: 1.0342, data_loss: 1.0342
step 120 , total_loss: 1.0101, data_loss: 1.0101
step 140 , total_loss: 1.0430, data_loss: 1.0430
step 160 , total_loss: 1.0717, data_loss: 1.0717
step 180 , total_loss: 1.0128, data_loss: 1.0128
step 200 , total_loss: 1.0701, data_loss: 1.0701
step 220 , total_loss: 1.0144, data_loss: 1.0144
step 240 , total_loss: 1.0659, data_loss: 1.0659
step 260 , total_loss: 0.9965, data_loss: 0.9965
step 280 , total_loss: 1.0010, data_loss: 1.0010
step 300 , total_loss: 0.9819, data_loss: 0.9819
step 320 , total_loss: 1.0348, data_loss: 1.0348
step 340 , total_loss: 1.0105, data_loss: 1.0105
step 360 , total_loss: 1.0943, data_loss: 1.0943
s

step 3280 , total_loss: 0.9243, data_loss: 0.9243
step 3300 , total_loss: 1.0097, data_loss: 1.0097
eval valid at epoch 2: auc:0.8207,logloss:0.7483,mean_mrr:0.7616,ndcg@2:0.7298,ndcg@4:0.807,ndcg@6:0.8214,group_auc:0.8185
step 20 , total_loss: 0.9879, data_loss: 0.9879
step 40 , total_loss: 0.9269, data_loss: 0.9269
step 60 , total_loss: 0.9587, data_loss: 0.9587
step 80 , total_loss: 0.9065, data_loss: 0.9065
step 100 , total_loss: 0.9041, data_loss: 0.9041
step 120 , total_loss: 0.8675, data_loss: 0.8675
step 140 , total_loss: 0.9343, data_loss: 0.9343
step 160 , total_loss: 0.9113, data_loss: 0.9113
step 180 , total_loss: 0.9460, data_loss: 0.9460
step 200 , total_loss: 0.9052, data_loss: 0.9052
step 220 , total_loss: 0.9408, data_loss: 0.9408
step 240 , total_loss: 0.9501, data_loss: 0.9501
step 260 , total_loss: 0.8492, data_loss: 0.8492
step 280 , total_loss: 0.8868, data_loss: 0.8868
step 300 , total_loss: 0.9194, data_loss: 0.9194
step 320 , total_loss: 0.8355, data_loss: 0.83

step 3240 , total_loss: 0.9337, data_loss: 0.9337
step 3260 , total_loss: 0.9383, data_loss: 0.9383
step 3280 , total_loss: 1.0179, data_loss: 1.0179
step 3300 , total_loss: 0.9379, data_loss: 0.9379
eval valid at epoch 3: auc:0.8307,logloss:0.8383,mean_mrr:0.7732,ndcg@2:0.7445,ndcg@4:0.8173,ndcg@6:0.8301,group_auc:0.829
step 20 , total_loss: 0.8084, data_loss: 0.8084
step 40 , total_loss: 0.8672, data_loss: 0.8672
step 60 , total_loss: 0.8894, data_loss: 0.8894
step 80 , total_loss: 0.8788, data_loss: 0.8788
step 100 , total_loss: 0.8729, data_loss: 0.8729
step 120 , total_loss: 0.8556, data_loss: 0.8556
step 140 , total_loss: 0.8022, data_loss: 0.8022
step 160 , total_loss: 0.8857, data_loss: 0.8857
step 180 , total_loss: 0.8365, data_loss: 0.8365
step 200 , total_loss: 0.7918, data_loss: 0.7918
step 220 , total_loss: 0.7948, data_loss: 0.7948
step 240 , total_loss: 0.7439, data_loss: 0.7439
step 260 , total_loss: 0.8582, data_loss: 0.8582
step 280 , total_loss: 0.8830, data_loss: 0.

step 3200 , total_loss: 0.9042, data_loss: 0.9042
step 3220 , total_loss: 0.8444, data_loss: 0.8444
step 3240 , total_loss: 0.7959, data_loss: 0.7959
step 3260 , total_loss: 0.8967, data_loss: 0.8967
step 3280 , total_loss: 0.8532, data_loss: 0.8532
step 3300 , total_loss: 0.8374, data_loss: 0.8374
eval valid at epoch 4: auc:0.8365,logloss:0.9544,mean_mrr:0.781,ndcg@2:0.754,ndcg@4:0.8239,ndcg@6:0.836,group_auc:0.8357
step 20 , total_loss: 0.7827, data_loss: 0.7827
step 40 , total_loss: 0.8881, data_loss: 0.8881
step 60 , total_loss: 0.8327, data_loss: 0.8327
step 80 , total_loss: 0.7891, data_loss: 0.7891
step 100 , total_loss: 0.9026, data_loss: 0.9026
step 120 , total_loss: 0.7891, data_loss: 0.7891
step 140 , total_loss: 0.7610, data_loss: 0.7610
step 160 , total_loss: 0.8584, data_loss: 0.8584
step 180 , total_loss: 0.7657, data_loss: 0.7657
step 200 , total_loss: 0.8394, data_loss: 0.8394
step 220 , total_loss: 0.8333, data_loss: 0.8333
step 240 , total_loss: 0.8084, data_loss: 0.

step 3160 , total_loss: 0.8248, data_loss: 0.8248
step 3180 , total_loss: 0.9180, data_loss: 0.9180
step 3200 , total_loss: 0.8468, data_loss: 0.8468
step 3220 , total_loss: 0.8541, data_loss: 0.8541
step 3240 , total_loss: 0.8176, data_loss: 0.8176
step 3260 , total_loss: 0.7283, data_loss: 0.7283
step 3280 , total_loss: 0.9002, data_loss: 0.9002
step 3300 , total_loss: 0.8975, data_loss: 0.8975
eval valid at epoch 5: auc:0.8404,logloss:1.0122,mean_mrr:0.7849,ndcg@2:0.7582,ndcg@4:0.8272,ndcg@6:0.839,group_auc:0.8389
step 20 , total_loss: 0.8164, data_loss: 0.8164
step 40 , total_loss: 0.7103, data_loss: 0.7103
step 60 , total_loss: 0.8200, data_loss: 0.8200
step 80 , total_loss: 0.7990, data_loss: 0.7990
step 100 , total_loss: 0.8179, data_loss: 0.8179
step 120 , total_loss: 0.7839, data_loss: 0.7839
step 140 , total_loss: 0.8140, data_loss: 0.8140
step 160 , total_loss: 0.7524, data_loss: 0.7524
step 180 , total_loss: 0.8643, data_loss: 0.8643
step 200 , total_loss: 0.7769, data_loss

step 3120 , total_loss: 0.8093, data_loss: 0.8093
step 3140 , total_loss: 0.7917, data_loss: 0.7917
step 3160 , total_loss: 0.7843, data_loss: 0.7843
step 3180 , total_loss: 0.9064, data_loss: 0.9064
step 3200 , total_loss: 0.8207, data_loss: 0.8207
step 3220 , total_loss: 0.8258, data_loss: 0.8258
step 3240 , total_loss: 0.6979, data_loss: 0.6979
step 3260 , total_loss: 0.7859, data_loss: 0.7859
step 3280 , total_loss: 0.8908, data_loss: 0.8908
step 3300 , total_loss: 0.7399, data_loss: 0.7399
eval valid at epoch 6: auc:0.8408,logloss:1.1115,mean_mrr:0.7862,ndcg@2:0.7602,ndcg@4:0.8282,ndcg@6:0.8399,group_auc:0.8398
step 20 , total_loss: 0.7533, data_loss: 0.7533
step 40 , total_loss: 0.6925, data_loss: 0.6925
step 60 , total_loss: 0.7580, data_loss: 0.7580
step 80 , total_loss: 0.7481, data_loss: 0.7481
step 100 , total_loss: 0.8131, data_loss: 0.8131
step 120 , total_loss: 0.8613, data_loss: 0.8613
step 140 , total_loss: 0.6903, data_loss: 0.6903
step 160 , total_loss: 0.7156, data_l

step 3080 , total_loss: 0.7875, data_loss: 0.7875
step 3100 , total_loss: 0.8060, data_loss: 0.8060
step 3120 , total_loss: 0.7813, data_loss: 0.7813
step 3140 , total_loss: 0.7712, data_loss: 0.7712
step 3160 , total_loss: 0.8539, data_loss: 0.8539
step 3180 , total_loss: 0.7701, data_loss: 0.7701
step 3200 , total_loss: 0.7948, data_loss: 0.7948
step 3220 , total_loss: 0.7263, data_loss: 0.7263
step 3240 , total_loss: 0.7728, data_loss: 0.7728
step 3260 , total_loss: 0.7915, data_loss: 0.7915
step 3280 , total_loss: 0.8011, data_loss: 0.8011
step 3300 , total_loss: 0.7517, data_loss: 0.7517
eval valid at epoch 7: auc:0.8426,logloss:1.1948,mean_mrr:0.7879,ndcg@2:0.7621,ndcg@4:0.8299,ndcg@6:0.8412,group_auc:0.8414
step 20 , total_loss: 0.7236, data_loss: 0.7236
step 40 , total_loss: 0.7164, data_loss: 0.7164
step 60 , total_loss: 0.6860, data_loss: 0.6860
step 80 , total_loss: 0.6937, data_loss: 0.6937
step 100 , total_loss: 0.7263, data_loss: 0.7263
step 120 , total_loss: 0.7088, data

step 3040 , total_loss: 0.8062, data_loss: 0.8062
step 3060 , total_loss: 0.7633, data_loss: 0.7633
step 3080 , total_loss: 0.8233, data_loss: 0.8233
step 3100 , total_loss: 0.7654, data_loss: 0.7654
step 3120 , total_loss: 0.7174, data_loss: 0.7174
step 3140 , total_loss: 0.7693, data_loss: 0.7693
step 3160 , total_loss: 0.7891, data_loss: 0.7891
step 3180 , total_loss: 0.8283, data_loss: 0.8283
step 3200 , total_loss: 0.7937, data_loss: 0.7937
step 3220 , total_loss: 0.7701, data_loss: 0.7701
step 3240 , total_loss: 0.7877, data_loss: 0.7877
step 3260 , total_loss: 0.7510, data_loss: 0.7510
step 3280 , total_loss: 0.8133, data_loss: 0.8133
step 3300 , total_loss: 0.8501, data_loss: 0.8501
eval valid at epoch 8: auc:0.8418,logloss:1.2995,mean_mrr:0.7886,ndcg@2:0.7627,ndcg@4:0.83,ndcg@6:0.8417,group_auc:0.8413
step 20 , total_loss: 0.7587, data_loss: 0.7587
step 40 , total_loss: 0.6555, data_loss: 0.6555
step 60 , total_loss: 0.7051, data_loss: 0.7051
step 80 , total_loss: 0.7870, data

step 3000 , total_loss: 0.7020, data_loss: 0.7020
step 3020 , total_loss: 0.7673, data_loss: 0.7673
step 3040 , total_loss: 0.8051, data_loss: 0.8051
step 3060 , total_loss: 0.7736, data_loss: 0.7736
step 3080 , total_loss: 0.6747, data_loss: 0.6747
step 3100 , total_loss: 0.8295, data_loss: 0.8295
step 3120 , total_loss: 0.7831, data_loss: 0.7831
step 3140 , total_loss: 0.6773, data_loss: 0.6773
step 3160 , total_loss: 0.7131, data_loss: 0.7131
step 3180 , total_loss: 0.7478, data_loss: 0.7478
step 3200 , total_loss: 0.7321, data_loss: 0.7321
step 3220 , total_loss: 0.8114, data_loss: 0.8114
step 3240 , total_loss: 0.7735, data_loss: 0.7735
step 3260 , total_loss: 0.7445, data_loss: 0.7445
step 3280 , total_loss: 0.7527, data_loss: 0.7527
step 3300 , total_loss: 0.8443, data_loss: 0.8443
eval valid at epoch 9: auc:0.842,logloss:1.3998,mean_mrr:0.7898,ndcg@2:0.764,ndcg@4:0.8309,ndcg@6:0.8426,group_auc:0.8421
step 20 , total_loss: 0.6450, data_loss: 0.6450
step 40 , total_loss: 0.7445, 

step 2960 , total_loss: 0.6766, data_loss: 0.6766
step 2980 , total_loss: 0.7158, data_loss: 0.7158
step 3000 , total_loss: 0.7296, data_loss: 0.7296
step 3020 , total_loss: 0.7687, data_loss: 0.7687
step 3040 , total_loss: 0.8041, data_loss: 0.8041
step 3060 , total_loss: 0.7245, data_loss: 0.7245
step 3080 , total_loss: 0.7861, data_loss: 0.7861
step 3100 , total_loss: 0.8063, data_loss: 0.8063
step 3120 , total_loss: 0.7044, data_loss: 0.7044
step 3140 , total_loss: 0.7131, data_loss: 0.7131
step 3160 , total_loss: 0.8323, data_loss: 0.8323
step 3180 , total_loss: 0.7047, data_loss: 0.7047
step 3200 , total_loss: 0.6762, data_loss: 0.6762
step 3220 , total_loss: 0.6924, data_loss: 0.6924
step 3240 , total_loss: 0.7174, data_loss: 0.7174
step 3260 , total_loss: 0.7716, data_loss: 0.7716
step 3280 , total_loss: 0.7263, data_loss: 0.7263
step 3300 , total_loss: 0.6975, data_loss: 0.6975
eval valid at epoch 10: auc:0.841,logloss:1.4935,mean_mrr:0.7885,ndcg@2:0.7628,ndcg@4:0.8297,ndcg@6:

In [9]:
res_syn = model.run_eval(test_file, num_ngs=test_num_ngs)
print(res_syn)

{'auc': 0.8333, 'logloss': 1.6876, 'mean_mrr': 0.6691, 'ndcg@2': 0.6184, 'ndcg@4': 0.6914, 'ndcg@6': 0.7216, 'group_auc': 0.8318}


In [10]:
model_best_trained = SeqModel(hparams, input_creator, seed=RANDOM_SEED)
path_best_trained = os.path.join(hparams.MODEL_DIR, "best_model")
print('loading saved model in {0}'.format(path_best_trained))
model_best_trained.load_model(path_best_trained)

  curr_hidden_nn_layer = tf.compat.v1.layers.batch_normalization(


loading saved model in ../../tests/resources/deeprec/slirec/model/din_op/best_model


2022-06-05 13:21:11.750419: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-05 13:21:11.750616: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-05 13:21:11.750757: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-05 13:21:11.750917: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-05 13:21:11.751057: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from S

In [11]:
model_best_trained.run_eval(test_file, num_ngs=test_num_ngs)

{'auc': 0.834,
 'logloss': 1.5783,
 'mean_mrr': 0.6692,
 'ndcg@2': 0.6185,
 'ndcg@4': 0.6916,
 'ndcg@6': 0.7221,
 'group_auc': 0.8321}