# Session-based Recs with Transformers4Rec: RNN - Gated Recurrent Networks

Followed a step by step tutorial:
https://nvidia-merlin.github.io/Transformers4Rec/main/examples/tutorial/index.html

## Imports

In [1]:
import os
import glob
import pandas as pd
import numpy as np

from transformers4rec import tf as tr
import tensorflow as tf
from transformers4rec.tf.ranking_metric import NDCGAt, RecallAt

## Instantiates Schema object from schema file

In [2]:
# define the input file path
INPUT_DATA_DIR = os.environ.get("INPUT_DATA_DIR", '../data/')
# define the output file path
OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "../data/sessions_by_day")
# features chosen to train on
chosen_features = ['product_id-list_seq']
from merlin_standard_lib import Schema
# define schema object to pass it to the TabularSeqeunceFeatures class
SCHEMA_PATH = os.path.join(INPUT_DATA_DIR, 'schema.pb')
schema = Schema().from_proto_text(SCHEMA_PATH)
schema = schema.select_by_name(chosen_features)

## Define Input Block

use MLM as the training method

In [3]:
# Input
sequence_length, d_model = 20, 192
# Define input module to process tabular input-features and to prepare masked inputs
inputs = tr.TabularSequenceFeatures.from_schema(
    schema,
    max_sequence_length = sequence_length,
    d_output = d_model,
    masking = 'mlm'
)

2021-12-09 09:49:41.511867: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Build Transformer Block

In [5]:
# define XLNetConfig class and set default parameters for HF XLNet config
transformer_config = tr.XLNetConfig.build(
    d_model = d_model, n_head=4, n_layer=2, total_seq_length=sequence_length
)

# define the model block including: inputs, masking, projection and transformer block.

body = tr.SequentialBlock(
    [inputs,
    tr.MLPBlock([192]),
    tr.TransformerBlock(transformer_config, masking=inputs.masking)]
)

# define the head for to the next item prediction task

head = tr.Head(
    body,
    tr.NextItemPredictionTask(
        weight_tying=True, 
        # hf_format=True, 
        metrics=[NDCGAt(top_ks=[10, 20], labels_onehot=True),RecallAt(top_ks=[10, 20], labels_onehot=True)])
)

# get the end-to-end Model class

model = tr.Model(head)

## Build Datasets

In [7]:
def iterate_over_df(
    ### iterator function as input for the tensorflow generator `from_generator` function
    df: pd.DataFrame
):  
    def caller():
        for _,j in df.iterrows():
            yield(j['product_id-list_seq'])
    return caller

In [8]:
def ds_from_df(
    ### generate tensorflow object from dataframe
    df: pd.DataFrame
):
    output_shape_x = (
        tf.TensorShape([None,])
    )
    df = tf.data.Dataset.from_generator(
        iterate_over_df(df),
        output_types=(tf.int32),
        output_shapes = (output_shape_x)
    )
    return df

In [9]:
def pad_dataset(
        ## pad dataset so all session sequence data have length 20
        df,
        batch_size: int,
):
    df = df.shuffle(5)
    df = df.padded_batch(batch_size, padded_shapes = (([20,])), padding_values = ((0)),drop_remainder=True)
    df = df.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    return df

In [10]:
def data_to_dict(
    ### create a dictionary tensor dataframe as input into the model
    df_list: list,
    chosen_features: list
):
    df_dictionary = {}
    if len(chosen_features) == 1:
        df_dictionary[chosen_features[0]] = df_list[0]
    else:
        
        for i in range(len(df_list[0])):
            df_dictionary[chosen_features[i]] = df_list[0][i]
    return df_dictionary

In [11]:
def get_dataset(
    ### function to call all other functions necessary to build the dataset
    ### to input into the model
    df,
    batch_size,
    df_len
):
    df = ds_from_df(df)
    df = pad_dataset(df,df_len)
    df = data_to_dict(list(df),chosen_features)
    # targets = {"target": tf.cast(tf.random.uniform((df_len,), maxval=2, dtype=tf.int32), tf.float32)}
    targets = df["product_id-list_seq"]
    ds = tf.data.Dataset.from_tensor_slices((df, targets)).batch(50)
    steps = int(np.floor(df_len/batch_size))

    return ds, steps