In [1]:
# Copyright 2021 NVIDIA Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =====

## 3. Customize and Extend Merlin Models

**Learning Objectives of this lab**

- Customize and extend recommende models with Merlin Models

**Import Required Libraries**

In [2]:
import os

import glob
import cudf 
import pandas as pd
import numpy as np
import nvtabular as nvt
from nvtabular.ops import *
import gc

from merlin.schema.tags import Tags
import merlin.models.tf as mm
from merlin.io.dataset import Dataset

import tensorflow as tf

2022-08-16 14:49:07.693200: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-16 14:49:10.076191: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 8080 MB memory:  -> device: 0, name: Tesla V100-SXM2-16GB-N, pci bus id: 0000:0a:00.0, compute capability: 7.0


In [3]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

In [4]:
data_path = '/workspace/data/ecom/'
output_path = os.path.join(data_path,'processed_nvt')

Read processed parquet files as Dataset objects.

In [5]:
train = Dataset(os.path.join(output_path, "train", "*.parquet"), part_size="500MB")
valid = Dataset(os.path.join(output_path, "valid", "*.parquet"), part_size="500MB")

# define schema object
schema = train.schema.without(['product_id_count'])



In [6]:
#schema

In [7]:
target_column = schema.select_by_tag(Tags.TARGET).column_names[0]
target_column

'target'

In [8]:
schema

Unnamed: 0,name,tags,dtype,is_list,is_ragged,properties.num_buckets,properties.freq_threshold,properties.max_size,properties.start_index,properties.cat_path,properties.embedding_sizes.cardinality,properties.embedding_sizes.dimension,properties.domain.min,properties.domain.max
0,user_id,"(Tags.CATEGORICAL, Tags.USER_ID, Tags.USER)",int64,False,False,,0.0,0.0,0.0,.//categories/unique.user_id.parquet,1587741.0,512.0,0.0,1587740.0
1,ts_hour,"(Tags.CATEGORICAL, Tags.USER)",int64,False,False,,0.0,0.0,0.0,.//categories/unique.ts_hour.parquet,25.0,16.0,0.0,24.0
2,ts_weekday,"(Tags.CATEGORICAL, Tags.USER)",int64,False,False,,0.0,0.0,0.0,.//categories/unique.ts_weekday.parquet,8.0,16.0,0.0,7.0
3,ts_day,"(Tags.CATEGORICAL, Tags.USER)",int64,False,False,,0.0,0.0,0.0,.//categories/unique.ts_day.parquet,32.0,16.0,0.0,31.0
4,product_id,"(Tags.ITEM, Tags.ITEM_ID, Tags.CATEGORICAL)",int64,False,False,,0.0,0.0,0.0,.//categories/unique.product_id.parquet,123168.0,512.0,0.0,123167.0
5,cat_0,"(Tags.ITEM, Tags.CATEGORICAL)",int64,False,False,,0.0,0.0,0.0,.//categories/unique.cat_0.parquet,14.0,16.0,0.0,13.0
6,cat_1,"(Tags.ITEM, Tags.CATEGORICAL)",int64,False,False,,0.0,0.0,0.0,.//categories/unique.cat_1.parquet,61.0,16.0,0.0,60.0
7,cat_2,"(Tags.ITEM, Tags.CATEGORICAL)",int64,False,False,,0.0,0.0,0.0,.//categories/unique.cat_2.parquet,91.0,20.0,0.0,90.0
8,brand,"(Tags.ITEM, Tags.CATEGORICAL)",int64,False,False,,0.0,0.0,0.0,.//categories/unique.brand.parquet,4140.0,170.0,0.0,4139.0
9,price,"(Tags.ITEM, Tags.CONTINUOUS)",float64,False,False,,,,,,,,,


### 1. Add HashedCross features to DLRM Model

In [40]:
batch = mm.sample_batch(train, batch_size=16, shuffle=False, include_targets=False)

In [41]:
cross_schema = schema.select_by_name(names=["cat_0", "cat_1"])
cross = mm.HashedCross(cross_schema, num_bins=10, output_mode="one_hot")

In [43]:
cross(batch)

{'cross_cat_0_cat_1': <tf.Tensor: shape=(16, 10), dtype=float32, numpy=
 array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)>}

In [25]:
'''
feature crossing with HashedCross class, creates a new feature
take the weighted some 
'''

cross_body = mm.HashedCross(cross_schema, num_bins=1000, output_mode="one_hot").connect(
    mm.MLPBlock([1], no_activation_last_layer=True), block_name='cross_model'
)

In [27]:
continuous_block = mm.ContinuousFeatures.from_schema(schema, tags=Tags.CONTINUOUS)

In [28]:
bottom_block = continuous_block.connect(mm.MLPBlock([128,64]))

In [29]:
emb_init = tf.keras.initializers.TruncatedNormal(
    mean=0.0, stddev=0.05
)

embeddings_block = mm.Embeddings(
    schema,
    infer_embedding_sizes=False,
    embedding_dim_default = 64
)

In [30]:
dlrm_input_block = mm.ParallelBlock(
    {"embeddings": embeddings_block, "bottom_block": bottom_block}
)

In [31]:
from merlin.models.tf.blocks.dlrm import DotProductInteractionBlock

dlrm_interaction = dlrm_input_block.connect_with_shortcut(
    DotProductInteractionBlock(), shortcut_filter=mm.Filter("bottom_block"), aggregation="concat"
)

In [32]:
dlrm_with_crossbody = mm.ParallelBlock(
    {"dlrm_interaction": dlrm_interaction, "cross_body": cross_body},
    aggregation="concat"
)

In [33]:
dlrm_with_cross = dlrm_with_crossbody.connect(mm.MLPBlock([64, 128, 256]))

In [34]:
from merlin.models.tf.core.transformations import LogitsTemperatureScaler

binary_task = mm.BinaryClassificationTask(
    schema,
    pre=LogitsTemperatureScaler(temperature=2),
)

In [35]:
model = mm.Model(dlrm_with_cross, binary_task)

In [36]:
%%time 
model.compile(optimizer='adam', run_eagerly=False, metrics=[tf.keras.metrics.AUC()])
model.fit(train, validation_data=valid, batch_size=4096, epochs=2)

Epoch 1/2
Epoch 2/2
CPU times: user 5min 46s, sys: 58.4 s, total: 6min 44s
Wall time: 2min 44s


<keras.callbacks.History at 0x7f19fa257880>

### Replace `DotProductInteractionBlock` with `CrossBlock`

### Summary 

In this hands-on lab we learned ...