# Session-based Recs with Transformers4Rec: RNN - Gated Recurrent Networks

Followed a step by step tutorial:
https://nvidia-merlin.github.io/Transformers4Rec/main/examples/tutorial/index.html

## Imports

In [1]:
import os
import glob
import pandas as pd
import numpy as np

from transformers4rec import tf as tr
import tensorflow as tf
from transformers4rec.tf.ranking_metric import NDCGAt, RecallAt

## Instantiates Schema object from schema file

In [2]:
# define the input file path
INPUT_DATA_DIR = os.environ.get("INPUT_DATA_DIR", '../data/')

In [3]:
# define the output file path
OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "../data/sessions_by_day")

In [4]:
# features chosen to train on
chosen_features = ['product_id-list_seq']

In [5]:
from merlin_standard_lib import Schema
# define schema object to pass it to the TabularSeqeunceFeatures class
SCHEMA_PATH = os.path.join(INPUT_DATA_DIR, 'schema.pb')
schema = Schema().from_proto_text(SCHEMA_PATH)
schema = schema.select_by_name(chosen_features)

In [6]:
# inspect the first lines of schema.pb
!head -30 $SCHEMA_PATH

feature {
  name: "price_log_norm-list_seq"
  value_count {
    min: 2
    max: 20
  }
  type: FLOAT
  float_domain {
    name: "price_log_norm-list_seq"
    min: -17.176351827798428
    max: 1.7566816406751988
  }
  annotation {
  }
}
feature {
  name: "product_recency_days_log_norm-list_seq"
  value_count {
    min: 2
    max: 20
  }
  type: FLOAT
  float_domain {
    name: "product_recency_days_log_norm-list_seq"
    min: -6.913329620541532
    max: 0.44860732556877836
  }
  annotation {
  }
}


### Defining the input block: `TabularSequenceFeatures`

In [7]:
sequence_length = 20
inputs = tr.TabularSequenceFeatures.from_schema(
    schema,
    max_sequence_length = sequence_length,
    masking = 'causal'
)

2021-12-07 12:10:13.826547: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Connecting the blocks with `SequentialBlock`
when using tensorflow inplace of pytorch means replace block with a one layer sequential block as block has no constructor in tf but does in torch

In [8]:
d_model = 128
body = tr.SequentialBlock(
    [inputs,
    tr.MLPBlock([d_model]),
    tf.keras.layers.GRU(units=d_model,return_sequences=True)]
)

In [9]:
body

SequentialBlock(
  (layers): List(
    (0): TabularSequenceFeatures(
      (to_merge): Dict(
        (categorical_layer): SequenceEmbeddingFeatures(
          (feature_config): Dict(
            (product_id-list_seq): TableConfig(vocabulary_size=118335, dim=64, initializer=None, optimizer=None, combiner='mean', name='product_id-list_seq')
          )
          (_pre): SequentialTabularTransformations(
            (layers): List(
              (0): FilterFeatures(
                (to_include): List(
                  (0): 'product_id-list_seq'
                )
              )
              (1): AsSparseFeatures()
            )
          )
        )
      )
      (_aggregation): ConcatFeatures()
      (_masking): CausalLanguageModeling()
    )
    (1): MLPBlock(
      (layers): List(
        (0): Dense(128, activation=relu, use_bias=True)
      )
    )
    (2): GRU(
      (cell): GRUCell()
    )
  )
)

### Item Prediction head and tying embeddings
hf_format = True argument removed because it is not a keyword argument recognised by tensorflow

In [10]:
# head = tr.Head(
#     body,
#     tr.NextItemPredictionTask(weight_tying=True,
#                               metrics=[NDCGAt(top_ks=[10, 20], labels_onehot=True),
#                                        RecallAt(top_ks=[10, 20], labels_onehot=True)]),
# )
# model = tr.Model(head)
head = tr.Head(
    body,
    tr.NextItemPredictionTask(weight_tying=True,
                              metrics=[tf.keras.metrics.AUC])
)
model = tr.Model(head)

### Model Compile

In [11]:
model.compile(
    optimizer="adam",
    loss = "binary_crossentropy"
)

***disregard the dataloader function from schema used in tutorial as this is used in the transformers4rec.torch trainer class which doesn't exist for tf***

### Build Dataset Functions

In [12]:
def iterate_over_df(
    ### iterator function as input for the tensorflow generator `from_generator` function
    df: pd.DataFrame
):  
    def caller():
        for _,j in df.iterrows():
            yield(j['product_id-list_seq'])
    return caller

In [13]:
def ds_from_df(
    ### generate tensorflow object from dataframe
    df: pd.DataFrame
):
    output_shape_x = (
        tf.TensorShape([None,])
    )
    df = tf.data.Dataset.from_generator(
        iterate_over_df(df),
        output_types=(tf.int32),
        output_shapes = (output_shape_x)
    )
    return df

In [14]:
def pad_dataset(
        ## pad dataset so all session sequence data have length 20
        df,
        batch_size: int,
):
    df = df.shuffle(5)
    df = df.padded_batch(batch_size, padded_shapes = (([20,])), padding_values = ((0)),drop_remainder=True)
    df = df.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    return df

In [15]:
def data_to_dict(
    ### create a dictionary tensor dataframe as input into the model
    df_list: list,
    chosen_features: list
):
    df_dictionary = {}
    if len(chosen_features) == 1:
        df_dictionary[chosen_features[0]] = df_list[0]
    else:
        for i in range(len(df_list[0])):
            df_dictionary[chosen_features[i]] = df_list[0][i]
    return df_dictionary

In [16]:
def get_dataset(
    ### function to call all other functions necessary to build the dataset
    ### to input into the model
    df,
    batch_size,
    df_len
):
    df = ds_from_df(df)
    df = pad_dataset(df,df_len)
    df = data_to_dict(list(df),chosen_features)
    targets = {"target": tf.cast(tf.random.uniform((df_len,), maxval=2, dtype=tf.int32), tf.float32)}
    ds = tf.data.Dataset.from_tensor_slices((df, targets)).batch(50)
    steps = int(np.floor(df_len/batch_size))

    return ds, steps

### Model Fine-tuning and Incremental evaluation

In [17]:
train_batch_size = 256
eval_batch_size = 32

In [18]:
%%time
# window
start_time_window_index = 1
final_time_window_index = 4
for time_index in range(start_time_window_index, final_time_window_index):
    # Set data

    time_index_train = time_index
    time_index_eval = time_index + 1
    train_paths = os.path.join(OUTPUT_DIR, f"{time_index_train}/train.parquet")
    eval_paths = os.path.join(OUTPUT_DIR, f"{time_index_eval}/valid.parquet")

    # Initialize dataloaders
    train_df = pd.read_parquet(train_paths)
    train_df = train_df[['product_id-list_seq']]
    eval_df = pd.read_parquet(eval_paths)
    eval_df = eval_df[['product_id-list_seq']]

    # Find length of dataframes for argument into `get_dataset`
    train_len = len(train_df)
    eval_len = len(eval_df)

    # get datasets

    train_dataset, train_steps = get_dataset(train_df, train_batch_size,train_len)
    eval_dataset, eval_steps = get_dataset(eval_df, eval_batch_size,eval_len)

    # Train on day related to time_index 
    # Launch Training
    print('*'*20)
    print("Launch training for day %s are:" %time_index)
    print('*'*20 + '\n')
    
    n_epochs = 3
    history = model.fit(
        train_dataset.repeat(n_epochs),
        steps_per_epoch=train_steps,
        epochs=n_epochs,
        initial_epoch=0,
        verbose=1,
    )
    # Launch Eval
    print('*'*20)
    print("Launch evaluation for day %s are:" %time_index)
    print('*'*20 + '\n')
    results = model.evaluate(eval_dataset, batch_size=eval_batch_size)

********************
Launch training for day 1 are:
********************

Epoch 1/3


Projecting inputs of NextItemPredictionTask to'64' As weight tying requires the input dimension '128' to be equal to the item-id embedding dimension '64'


ValueError: in user code:

    File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 878, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 867, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 860, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/model/base.py", line 456, in train_step
        loss = self.compute_loss(inputs, targets, training=True)
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/model/base.py", line 524, in compute_loss
        [
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/model/base.py", line 525, in <listcomp>
        head.compute_loss(
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/model/base.py", line 396, in compute_loss
        loss = task.compute_loss(
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/model/prediction_task.py", line 250, in compute_loss
        update_ops = self.calculate_metrics(predictions, targets, forward=False, loss=loss)
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/model/prediction_task.py", line 279, in calculate_metrics
        metric.update_state(y_true=targets, y_pred=predictions, sample_weight=sample_weight)
    File "/usr/local/lib/python3.9/site-packages/keras/utils/metrics_utils.py", line 73, in decorated
        update_op = update_state_fn(*args, **kwargs)
    File "/usr/local/lib/python3.9/site-packages/keras/metrics.py", line 177, in update_state_fn
        return ag_update_state(*args, **kwargs)
    File "/usr/local/lib/python3.9/site-packages/keras/metrics.py", line 2343, in update_state  **
        return metrics_utils.update_confusion_matrix_variables(
    File "/usr/local/lib/python3.9/site-packages/keras/utils/metrics_utils.py", line 625, in update_confusion_matrix_variables
        y_pred.shape.assert_is_compatible_with(y_true.shape)

    ValueError: Shapes (None, 118335) and (None,) are incompatible
