# Session-based Recs with Transformers4Rec: RNN - Gated Recurrent Networks

Followed a step by step tutorial:
https://nvidia-merlin.github.io/Transformers4Rec/main/examples/tutorial/index.html

## Imports

In [1]:
import os
import glob
import pandas as pd
import numpy as np

from transformers4rec import tf as tr
import tensorflow as tf
from transformers4rec.tf.ranking_metric import NDCGAt, RecallAt

## Instantiates Schema object from schema file

In [2]:
INPUT_DATA_DIR = os.environ.get("INPUT_DATA_DIR", '../data/')

In [3]:
from merlin_standard_lib import Schema
# define schema object to pass it to the TabularSeqeunceFeatures class
SCHEMA_PATH = os.path.join(INPUT_DATA_DIR, 'schema.pb')
schema = Schema().from_proto_text(SCHEMA_PATH)
schema = schema.select_by_name(['product_id-list_seq'])

In [4]:
# inspect the first lines of schema.pb
!head -30 $SCHEMA_PATH

feature {
  name: "price_log_norm-list_seq"
  value_count {
    min: 2
    max: 20
  }
  type: FLOAT
  float_domain {
    name: "price_log_norm-list_seq"
    min: -17.176351827798428
    max: 1.7566816406751988
  }
  annotation {
  }
}
feature {
  name: "product_recency_days_log_norm-list_seq"
  value_count {
    min: 2
    max: 20
  }
  type: FLOAT
  float_domain {
    name: "product_recency_days_log_norm-list_seq"
    min: -6.913329620541532
    max: 0.44860732556877836
  }
  annotation {
  }
}


### Defining the input block: `TabularSequenceFeatures`

In [5]:
sequence_length = 20
inputs = tr.TabularSequenceFeatures.from_schema(
    schema,
    max_sequence_length = sequence_length,
    masking = 'causal',
)

2021-11-30 16:57:26.390310: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Connecting the blocks with `SequentialBlock`
when using tensorflow inplace of pytorch means replace block with a one layer sequential block as block has no constructor in tf but does in torch

In [6]:
d_model = 128
body = tr.SequentialBlock(
    [inputs,
    tr.MLPBlock([d_model]),
    tf.keras.layers.GRU(units=d_model)]
)

### Item Prediction head and tying embeddings
hf_format = True argument removed because it is not a keyword argument recognised by tensorflow

In [247]:
# head = tr.Head(
#     body,
#     tr.NextItemPredictionTask(weight_tying=True,
#                               metrics=[NDCGAt(top_ks=[10, 20], labels_onehot=True),
#                                        RecallAt(top_ks=[10, 20], labels_onehot=True)]),
# )
# model = tr.Model(head)
head = tr.Head(
    body,
    tr.NextItemPredictionTask(weight_tying=True,
                              metrics=[tf.keras.metrics.AUC])
)
model = tr.Model(head)

***disregard the dataloader function from schema used in tutorial as this is used in the transformers4rec.torch trainer class which doesn't exist for tf***

### Daily Fine-Tuning: Training over a time window


In [248]:
# define the output of the processed parquet files
OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "../data/sessions_by_day")

### Model Fine-tuning and Incremental evaluation

In [249]:
from random import seed
from random import randint
def iterate_over_df(
    df: pd.DataFrame
):  
    seed(1)
    y_value = randint(0, 10)
    def caller():
        for _,j in df.iterrows():
            yield(j['user_session'],
            j['product_id-list_seq']), y_value
    return caller

In [250]:
def ds_from_df(
    df: pd.DataFrame
):
    output_shape_x = (
        tf.TensorShape([]),
        tf.TensorShape([None, ])
    )
    df = tf.data.Dataset.from_generator(
        iterate_over_df(df),
        output_types=((tf.int32, tf.int32), tf.int32),
        output_shapes = (output_shape_x, tf.TensorShape([]))
    )
    return df

In [251]:
def batch_dataset(
        df,
        batch_size: int,
        # gender: str,
        # z='all'
):
    df = df.shuffle(5)
    # datset = dataset.batch(batch_size, drop_remainder=True)
    df = df.padded_batch(batch_size, padded_shapes = (([],[20,]),[]), padding_values = ((0, 0),0),drop_remainder=True)
    df = df.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    return df


In [252]:
def get_dataset(        
    df: pd.DataFrame,
    # candidates,
    # product_id_to_type: pd.Series,
    batch_size: int,
    # gender: str,
    # show_history: bool,
    # z='all'
):
    df = ds_from_df(df)
    df = batch_dataset(df,batch_size)
    df = df.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    return df
    

In [253]:
def get_train_or_test(
        df: pd.DataFrame,
        batch_size: int
        # candidates,
        # product_id_to_type: pd.Series,
        # gender: str,
        # show_history: bool,
        # z='all'
):
    # calculate steps relative to batch size
    steps = int(np.floor(df.shape[0] / batch_size))
    # remove incomplete batches
    df = df.iloc[:steps * batch_size]
    df = get_dataset(
        df,
        # candidates=candidates,
        # product_id_to_type=product_id_to_type,
        batch_size=batch_size
        # gender=gender,
        # show_history=show_history,
        # z=z
    )
    return df, steps


In [254]:
model.compile(
    optimizer="adam",
    loss = tf.keras.losses.BinaryCrossentropy()
)

In [255]:
train_batch_size = 256
eval_batch_size = 32

In [256]:
%%time
# start_time_window_index = 1
# final_time_window_index = 4
# for time_index in range(start_time_window_index, final_time_window_index):
#     # Set data
time_index = 1
time_index_train = time_index
time_index_eval = time_index + 1
train_paths = os.path.join(OUTPUT_DIR, f"{time_index_train}/train.parquet")
eval_paths = os.path.join(OUTPUT_DIR, f"{time_index_eval}/valid.parquet")

# Initialize dataloaders
train_df = pd.read_parquet(train_paths)
train_df = train_df[['user_session', 'product_id-list_seq']]
eval_df = pd.read_parquet(eval_paths)
eval_df = eval_df[['user_session', 'product_id-list_seq']]
train_dataset, train_steps = get_train_or_test(train_df, train_batch_size)
eval_dataset, eval_steps = get_train_or_test(eval_df, eval_batch_size)

# # Train on day related to time_index
# print('*'*20)
# print("Launch training for day %s are:" %time_index)
# print('*'*20 + '\n')

# n_epochs = 3
# history = model.fit(
#     train_dataset.repeat(n_epochs),
#     steps_per_epoch=train_steps,
#     epochs=n_epochs,
#     initial_epoch=0,  # , callbacks=[pc],
#     verbose=1
# )

CPU times: user 495 ms, sys: 66.2 ms, total: 562 ms
Wall time: 452 ms


In [257]:
# Train on day related to time_index
print('*'*20)
print("Launch training for day %s are:" %time_index)
print('*'*20 + '\n')

n_epochs = 3
history = model.fit(
    train_dataset.repeat(n_epochs),
    steps_per_epoch=train_steps,
    epochs=n_epochs,
    initial_epoch=0,  # , callbacks=[pc],
    verbose=1
)

********************
Launch training for day 1 are:
********************

Epoch 1/3


AttributeError: in user code:

    File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 878, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 867, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 860, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/model/base.py", line 456, in train_step
        loss = self.compute_loss(inputs, targets, training=True)
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/model/base.py", line 524, in compute_loss
        [
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/model/base.py", line 525, in <listcomp>
        head.compute_loss(
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/model/base.py", line 392, in compute_loss
        body_outputs = self.body(body_outputs)
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/config/schema.py", line 50, in __call__
        return super().__call__(*args, **kwargs)
    File "/usr/local/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/block/base.py", line 127, in build
        input_shape = layer.compute_output_shape(input_shape)
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/features/sequence.py", line 281, in compute_output_shape
        output_shapes = super().compute_output_shape(input_shapes)
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/tabular/base.py", line 398, in compute_output_shape
        output_shapes = self._check_post_output_size(self.compute_call_output_shape(input_shapes))
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/features/sequence.py", line 276, in compute_call_output_shape
        output_shapes.update(layer.compute_output_shape(input_shape))
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/tabular/base.py", line 396, in compute_output_shape
        input_shapes = self.pre.compute_output_shape(input_shapes)
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/block/base.py", line 103, in compute_output_shape
        output_shape = layer.compute_output_shape(output_shape)
    File "/usr/local/lib/python3.9/site-packages/transformers4rec/tf/tabular/base.py", line 564, in compute_output_shape
        return {k: v for k, v in input_shape.items() if k in self.to_include}

    AttributeError: 'tuple' object has no attribute 'items'


In [229]:
train = list(train_dataset)

In [230]:
train[0][1]

<tf.Tensor: shape=(256,), dtype=int32, numpy=
array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int32)>