# Session-based Recs with Transformers4Rec: RNN - Gated Recurrent Networks

Followed a step by step tutorial:
https://nvidia-merlin.github.io/Transformers4Rec/main/examples/tutorial/index.html

## Imports

In [1]:
import os
import glob
import pandas as pd
import numpy as np

from transformers4rec import tf as tr
import tensorflow as tf
from transformers4rec.tf.ranking_metric import NDCGAt, RecallAt

## Instantiates Schema object from schema file

In [2]:
INPUT_DATA_DIR = os.environ.get("INPUT_DATA_DIR", '../data/')

In [3]:
# define the output of the processed parquet files
OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "../data/sessions_by_day")

In [4]:
# features chosen to train on
chosen_features = ['product_id-list_seq']

In [5]:
from merlin_standard_lib import Schema
# define schema object to pass it to the TabularSeqeunceFeatures class
SCHEMA_PATH = os.path.join(INPUT_DATA_DIR, 'schema.pb')
schema = Schema().from_proto_text(SCHEMA_PATH)
schema = schema.select_by_name(chosen_features)

In [6]:
# inspect the first lines of schema.pb
!head -30 $SCHEMA_PATH

feature {
  name: "price_log_norm-list_seq"
  value_count {
    min: 2
    max: 20
  }
  type: FLOAT
  float_domain {
    name: "price_log_norm-list_seq"
    min: -17.176351827798428
    max: 1.7566816406751988
  }
  annotation {
  }
}
feature {
  name: "product_recency_days_log_norm-list_seq"
  value_count {
    min: 2
    max: 20
  }
  type: FLOAT
  float_domain {
    name: "product_recency_days_log_norm-list_seq"
    min: -6.913329620541532
    max: 0.44860732556877836
  }
  annotation {
  }
}


### Defining the input block: `TabularSequenceFeatures`

In [7]:
sequence_length = 20
inputs = tr.TabularSequenceFeatures.from_schema(
    schema,
    max_sequence_length = sequence_length,
    masking = 'causal'
)

2021-12-06 16:25:57.067041: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Connecting the blocks with `SequentialBlock`
when using tensorflow inplace of pytorch means replace block with a one layer sequential block as block has no constructor in tf but does in torch

In [8]:
# d_model = 128
# body = tr.SequentialBlock(
#     [inputs,
#     tr.MLPBlock([d_model]),
#     tf.keras.layers.GRU(units=d_model)]
# )
body = tr.SequentialBlock([inputs, tr.MLPBlock([64])])

### Item Prediction head and tying embeddings
hf_format = True argument removed because it is not a keyword argument recognised by tensorflow

In [9]:
# head = tr.Head(
#     body,
#     tr.NextItemPredictionTask(weight_tying=True,
#                               metrics=[NDCGAt(top_ks=[10, 20], labels_onehot=True),
#                                        RecallAt(top_ks=[10, 20], labels_onehot=True)]),
# )
# model = tr.Model(head)
# head = tr.Head(
#     body,
#     tr.NextItemPredictionTask(weight_tying=True,
#                               metrics=[tf.keras.metrics.AUC])
# )
# model = tr.Model(head)

targets = {"target": tf.cast(tf.random.uniform((111916,), maxval=2, dtype=tf.int32), tf.float32)}
model = tr.BinaryClassificationTask("target").to_model(body, inputs)

***disregard the dataloader function from schema used in tutorial as this is used in the transformers4rec.torch trainer class which doesn't exist for tf***

### Model Fine-tuning and Incremental evaluation

In [10]:
def iterate_over_df(
    df: pd.DataFrame
):  
    def caller():
        for _,j in df.iterrows():
            yield(j['product_id-list_seq'])
    return caller

In [11]:
def ds_from_df(
    df: pd.DataFrame
):
    output_shape_x = (
        tf.TensorShape([None,])
    )
    df = tf.data.Dataset.from_generator(
        iterate_over_df(df),
        output_types=(tf.int32),
        output_shapes = (output_shape_x)
    )
    return df

In [12]:
def pad_dataset(
        df,
        batch_size: int,
):
    df = df.shuffle(5)
    df = df.padded_batch(batch_size, padded_shapes = (([20,])), padding_values = ((0)),drop_remainder=True)
    df = df.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    return df

In [13]:
def data_to_dict(df_list,chosen_features):
    df_dictionary = {}
    if len(chosen_features) == 1:
        df_dictionary[chosen_features[0]] = df_list[0]
    else:
        for i in range(len(df_list[0])):
            df_dictionary[chosen_features[i]] = df_list[0][i]
    return df_dictionary

In [27]:
def get_dataset(df,batch_size,df_len):
    df = ds_from_df(df)
    df = pad_dataset(df,df_len)
    df = data_to_dict(list(df),chosen_features)
    targets = {"target": tf.cast(tf.random.uniform((df_len,), maxval=2, dtype=tf.int32), tf.float32)}
    ds = tf.data.Dataset.from_tensor_slices((df, targets)).batch(50)
    steps = int(np.floor(df_len/batch_size))

    return ds, steps

In [28]:
model.compile(
    optimizer="adam",
    loss = "binary_crossentropy"
)

In [29]:
train_batch_size = 256
eval_batch_size = 32

In [30]:
%%time
start_time_window_index = 1
final_time_window_index = 4
for time_index in range(start_time_window_index, final_time_window_index):
    # Set data
    time_index_train = time_index
    time_index_eval = time_index + 1
    train_paths = os.path.join(OUTPUT_DIR, f"{time_index_train}/train.parquet")
    eval_paths = os.path.join(OUTPUT_DIR, f"{time_index_eval}/valid.parquet")
    # Initialize dataloaders
    train_df = pd.read_parquet(train_paths)
    train_df = train_df[['product_id-list_seq']]
    train_len = len(train_df)
    eval_df = pd.read_parquet(eval_paths)
    eval_df = eval_df[['product_id-list_seq']]
    eval_len = len(eval_df)
    train_dataset, train_steps = get_dataset(train_df, train_batch_size,train_len)
    eval_dataset, eval_steps = get_dataset(eval_df, eval_batch_size,eval_len)
    # Train on day related to time_index 
    print('*'*20)
    print("Launch training for day %s are:" %time_index)
    print('*'*20 + '\n')
    
    n_epochs = 3
    history = model.fit(
        train_dataset.repeat(n_epochs),
        steps_per_epoch=train_steps,
        epochs=n_epochs,
        initial_epoch=0,
        verbose=1,
    )
    print('*'*20)
    print("Launch evaluation for day %s are:" %time_index)
    print('*'*20 + '\n')
    results = model.evaluate(eval_dataset, batch_size=eval_batch_size)

********************
Launch training for day 1 are:
********************

Epoch 1/3
Epoch 2/3
Epoch 3/3
********************
Launch evaluation for day 1 are:
********************

********************
Launch training for day 2 are:
********************

Epoch 1/3
Epoch 2/3
Epoch 3/3
********************
Launch evaluation for day 2 are:
********************

********************
Launch training for day 3 are:
********************

Epoch 1/3
Epoch 2/3
Epoch 3/3
********************
Launch evaluation for day 3 are:
********************

CPU times: user 7min 39s, sys: 1min 20s, total: 9min
Wall time: 3min 2s
