In [1]:
# Copyright 2022 NVIDIA Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =====

## 3. Customize and Extend Merlin Models

Merlin Models provides common and state-of-the-art RecSys architectures in a high-level API as well as all the required low-level building blocks (e.g., input blocks, MLP layers, prediction tasks, loss functions, etc.) or you to create your own architecture. 

In this lab, we define DLRM model architecture from scratch and customize it with Merlin Models.

**Learning Objectives of this lab**

- Understand the building blocks of Merlin Models
- Define DLRM model architecture from scratch and customize it with Merlin Models

**Import Required Libraries**

In [2]:
import os

import glob
import cudf 
import pandas as pd
import numpy as np
import nvtabular as nvt
from nvtabular.ops import *
import gc

from merlin.schema.tags import Tags
import merlin.models.tf as mm
from merlin.io.dataset import Dataset

import tensorflow as tf

2022-08-25 16:19:42.224269: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-25 16:19:42.225779: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-25 16:19:42.226754: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:991] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-25 16:19:42.248948: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate 

In [3]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

In [6]:
data_path = '/workspace/data/ecom/'
output_path = os.path.join(data_path,'processed_nvt')

Read processed parquet files as Dataset objects.

In [10]:
train = Dataset(os.path.join(output_path, "train", "*.parquet"), part_size="500MB")
valid = Dataset(os.path.join(output_path, "valid", "*.parquet"), part_size="500MB")

# define schema object
schema = train.schema.without(['event_time_ts', 'user_id_raw', 'product_id_raw'])

In [11]:
target_column = schema.select_by_tag(Tags.TARGET).column_names[0]
target_column

'target'

In [12]:
schema

Unnamed: 0,name,tags,dtype,is_list,is_ragged,properties.num_buckets,properties.freq_threshold,properties.max_size,properties.start_index,properties.cat_path,properties.embedding_sizes.cardinality,properties.embedding_sizes.dimension,properties.domain.min,properties.domain.max
0,user_id,"(Tags.CATEGORICAL, Tags.USER, Tags.USER_ID)",int32,False,False,,0.0,0.0,0.0,.//categories/unique.user_id.parquet,351050.0,512.0,0.0,351049.0
1,ts_weekday,"(Tags.CATEGORICAL, Tags.USER)",int32,False,False,,0.0,0.0,0.0,.//categories/unique.ts_weekday.parquet,8.0,16.0,0.0,7.0
2,ts_hour,"(Tags.CATEGORICAL, Tags.USER)",int32,False,False,,0.0,0.0,0.0,.//categories/unique.ts_hour.parquet,25.0,16.0,0.0,24.0
3,product_id,"(Tags.ITEM, Tags.ITEM_ID, Tags.CATEGORICAL)",int32,False,False,,0.0,0.0,0.0,.//categories/unique.product_id.parquet,51425.0,512.0,0.0,51424.0
4,cat_0,"(Tags.ITEM, Tags.CATEGORICAL)",int32,False,False,,0.0,0.0,0.0,.//categories/unique.cat_0.parquet,14.0,16.0,0.0,13.0
5,cat_1,"(Tags.ITEM, Tags.CATEGORICAL)",int32,False,False,,0.0,0.0,0.0,.//categories/unique.cat_1.parquet,61.0,16.0,0.0,60.0
6,cat_2,"(Tags.ITEM, Tags.CATEGORICAL)",int32,False,False,,0.0,0.0,0.0,.//categories/unique.cat_2.parquet,90.0,20.0,0.0,89.0
7,brand,"(Tags.ITEM, Tags.CATEGORICAL)",int32,False,False,,0.0,0.0,0.0,.//categories/unique.brand.parquet,2654.0,132.0,0.0,2653.0
8,price,"(Tags.ITEM, Tags.CONTINUOUS)",float32,False,False,,,,,,,,,
9,relative_price,"(Tags.ITEM, Tags.CONTINUOUS)",float32,False,False,,,,,,,,,


In [13]:
batch = mm.sample_batch(train, batch_size=16, shuffle=False, include_targets=False)

### 1. Add HashedCross features to DLRM Model

Let's explain the functions and blocks that we use to build our DLRM model from scratch. The `Block` is the core abstraction in Merlin Models and is the class from which all blocks inherit. The class extends the tf.keras.layers.Layer base class and implements a number of properties that simplify the creation of custom blocks and models. These properties include the Schema object for determining the embedding dimensions, input shapes, and output shapes. Additionally, the Block has a ModelContext instance to store and retrieve public variables and share them with other blocks in the same model as additional meta-data.

**Features Blocks** <br>

`Embeddings:` Creates a ParallelBlock with an EmbeddingTable for each categorical feature in the schema. <br>
`ContinuousFeatures:` Input block for continuous features.

**Connects Methods** <br>
The base class Block implements different connects methods that control how to link a given block to other blocks:

- `connect:` Connect the block to other blocks sequentially. The output is a tensor returned by the last block.
- `connect_branch:` Link the block to other blocks in parallel. The output is a dictionary containing the output tensor of each block.
- `connect_with_shortcut:` Connect the block to other blocks sequentially and apply a skip connection with the block's output.
- `connect_with_residual:` Connect the block to other blocks sequentially and apply a residual sum with the block's output.

In [14]:
continuous_block = mm.ContinuousFeatures.from_schema(schema, tags=Tags.CONTINUOUS)

In [None]:
Define bottom block where we 

In [None]:
bottom_block = continuous_block.connect(mm.MLPBlock([128,64]))

In [15]:
from merlin.models.utils.schema_utils import infer_embedding_dim

embeddings_block = mm.Embeddings(
    schema.select_by_tag(Tags.CATEGORICAL),
    dim = 64,
    infer_dim_fn = infer_embedding_dim
)

In [17]:
dlrm_input_block = mm.ParallelBlock(
    {"embeddings": embeddings_block, "bottom_block": bottom_block}
)

In [18]:
from merlin.models.tf.blocks.dlrm import DotProductInteractionBlock

dlrm_interaction = dlrm_input_block.connect_with_shortcut(
    DotProductInteractionBlock(), shortcut_filter=mm.Filter("bottom_block"), aggregation="concat"
)

In [19]:
cross_schema = schema.select_by_name(names=["cat_0", "cat_1"])
cross = mm.HashedCross(cross_schema, num_bins=10, output_mode="one_hot")

In [20]:
cross(batch)

{'cross_cat_0_cat_1': <tf.Tensor: shape=(16, 10), dtype=float32, numpy=
 array([[0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]], dtype=float32)>}

In [21]:
'''
feature crossing with HashedCross class, creates a new feature
take the weighted some 
'''

cross_body = mm.HashedCross(cross_schema, num_bins=1000, output_mode="one_hot").connect(
    mm.MLPBlock([1], no_activation_last_layer=True), block_name='cross_model'
)

In [22]:
dlrm_with_crossbody = mm.ParallelBlock(
    {"dlrm_interaction": dlrm_interaction, "cross_body": cross_body},
    aggregation="concat"
)

In [23]:
dlrm_with_cross = dlrm_with_crossbody.connect(mm.MLPBlock([64, 128, 256]))

In [24]:
from merlin.models.tf.core.transformations import LogitsTemperatureScaler

binary_task = mm.BinaryClassificationTask(
    schema,
    pre=LogitsTemperatureScaler(temperature=2),
)

**LogitsTemperatureScaler:** It scales the output tensor of predicted logits to lower the model's confidence.

In [25]:
model = mm.Model(dlrm_with_cross, binary_task)

In [26]:
%%time 
model.compile(optimizer='adam', run_eagerly=False, metrics=[tf.keras.metrics.AUC()])
model.fit(train, validation_data=valid, batch_size=4096, epochs=2)

Epoch 1/2
Epoch 2/2
CPU times: user 53.2 s, sys: 8.23 s, total: 1min 1s
Wall time: 30.6 s


<keras.callbacks.History at 0x7f00aece8f70>

### 2. Replace `DotProductInteractionBlock` with `CrossBlock`

In [27]:
continuous_block = mm.ContinuousFeatures.from_schema(schema, tags=Tags.CONTINUOUS)
bottom_block = continuous_block.connect(mm.MLPBlock([128,64]))

In [28]:
emb_init = tf.keras.initializers.TruncatedNormal(
    mean=0.0, stddev=0.05
)

embeddings_block = mm.Embeddings(
    schema.select_by_tag(Tags.CATEGORICAL),
    dim = 64
)

In [29]:
embeddings_block

ParallelBlock(
  (parallel_layers): Dict(
    (user_id): EmbeddingTable(
      (table): Embedding()
    )
    (ts_weekday): EmbeddingTable(
      (table): Embedding()
    )
    (ts_hour): EmbeddingTable(
      (table): Embedding()
    )
    (product_id): EmbeddingTable(
      (table): Embedding()
    )
    (cat_0): EmbeddingTable(
      (table): Embedding()
    )
    (cat_1): EmbeddingTable(
      (table): Embedding()
    )
    (cat_2): EmbeddingTable(
      (table): Embedding()
    )
    (brand): EmbeddingTable(
      (table): Embedding()
    )
  )
)

In [30]:
embeddings = embeddings_block(batch)
embeddings.keys(), embeddings["user_id"].shape

(dict_keys(['user_id', 'ts_weekday', 'ts_hour', 'product_id', 'cat_0', 'cat_1', 'cat_2', 'brand']),
 TensorShape([16, 64]))

In [31]:
embeddings.keys(), embeddings["cat_0"].shape

(dict_keys(['user_id', 'ts_weekday', 'ts_hour', 'product_id', 'cat_0', 'cat_1', 'cat_2', 'brand']),
 TensorShape([16, 64]))

In [32]:
dlrm_input_block = mm.ParallelBlock(
    {"embeddings": embeddings_block, "bottom_block": bottom_block},
    aggregation="concat"
)

In [35]:
dlrm_input_block(batch)

<tf.Tensor: shape=(16, 576), dtype=float32, numpy=
array([[ 0.01392121,  0.45913607,  0.2622238 , ...,  0.04994252,
         0.03101527,  0.04198844],
       [ 0.        ,  0.41646555,  0.19901349, ...,  0.04994252,
         0.03101527,  0.04198844],
       [ 0.52526206,  0.20520255,  0.15573373, ...,  0.04994252,
         0.03101527,  0.04198844],
       ...,
       [ 0.        ,  0.        ,  0.        , ..., -0.01256059,
        -0.02746077,  0.03332857],
       [ 0.        ,  0.        ,  0.        , ..., -0.01256059,
        -0.02746077,  0.03332857],
       [ 0.4295384 ,  0.12559405,  0.37185472, ..., -0.0059145 ,
        -0.00363963, -0.04372158]], dtype=float32)>

In [36]:
#stacked
dcn_body = dlrm_input_block.connect(mm.CrossBlock(2))

In [37]:
dcn_body(batch)

<tf.Tensor: shape=(16, 576), dtype=float32, numpy=
array([[ 0.0170048 ,  0.5523888 ,  0.23968421, ...,  0.03922804,
         0.0300741 ,  0.0384398 ],
       [ 0.        ,  0.5237767 ,  0.18938303, ...,  0.03893476,
         0.03084259,  0.03894754],
       [ 0.55330867,  0.20533781,  0.14267543, ...,  0.04886024,
         0.02984576,  0.03495263],
       ...,
       [ 0.        ,  0.        ,  0.        , ..., -0.0125942 ,
        -0.02835404,  0.03097234],
       [ 0.        ,  0.        ,  0.        , ..., -0.01287693,
        -0.02807443,  0.03240041],
       [ 0.51761013,  0.1155797 ,  0.3613554 , ..., -0.00556562,
        -0.003835  , -0.04395641]], dtype=float32)>

In [38]:
dlrm_interaction = mm.ParallelBlock(
    {"dcn_body": dcn_body, "bottom_block": bottom_block},
    aggregation="concat"
)                                                

In [39]:
deep_dlrm_interaction = dlrm_interaction.connect(mm.MLPBlock([64, 128, 512]))
deep_dlrm_interaction(batch)

<tf.Tensor: shape=(16, 512), dtype=float32, numpy=
array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.00122097],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.01111526, 0.01226175, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.01572819, 0.        , ..., 0.00240139, 0.        ,
        0.00226653],
       [0.        , 0.01243099, 0.00301217, ..., 0.01021571, 0.00198259,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.02407214, 0.        ,
        0.03056245]], dtype=float32)>

In [40]:
from merlin.models.tf.core.transformations import LogitsTemperatureScaler

binary_task = mm.BinaryClassificationTask(
    schema,
    pre=LogitsTemperatureScaler(temperature=2),
)

In [41]:
model = mm.Model(deep_dlrm_interaction, binary_task)

In [42]:
%%time 
model.compile(optimizer='adam', run_eagerly=False, metrics=[tf.keras.metrics.AUC()])
model.fit(train, validation_data=valid, batch_size=4096, epochs=2)

Epoch 1/2
Epoch 2/2
CPU times: user 46.2 s, sys: 8.2 s, total: 54.4 s
Wall time: 27.5 s


<keras.callbacks.History at 0x7f00a3f5f5b0>

### Summary 

In this hands-on lab we learned

- how to use a subset of these pre-existing blocks to create the DLRM model
- how to add HashedCross features to DLRM Model
- how to replace DotProductInteractionBlock with CrossBlock

Please execute the cell below to shut down the kernel before moving on to the next notebook, `04-Building-multi-stage-RecSys-with-Merlin-Systems`.

In [None]:
import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)