# Store the model

- **Resources**
    - Tf efficient serving [blog](https://www.tensorflow.org/recommenders/examples/efficient_serving)

In [1]:
import os
import tempfile

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_recommenders as tfrs

from os.path import join

In [2]:
dataset_parsed_path = "/home/jupyter/mlspec-blackfriday/dataset/parsed/202104130952/"
train_path = join(dataset_parsed_path, "train.csv")
test_path = join(dataset_parsed_path, "eval.csv")

df_train=pd.read_csv(train_path)
df_test=pd.read_csv(test_path)

# Treat al columns as strings
df_train = df_train.astype(str)
df_test = df_test.astype(str)

print(f"df_train shape:{df_train.shape}")
print(f"df_test shape:{df_test.shape}")

df_train shape:(78366, 12)
df_test shape:(156733, 12)


In [3]:
# Tf dataset
tf.random.set_seed(42)
train = tf.data.Dataset.from_tensor_slices(dict(df_train)) # [!] dict is important
train = train.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

test = tf.data.Dataset.from_tensor_slices(dict(df_test))

cached_train = train.shuffle(100_000).batch(2048) # TODO: double shuffle?
cached_test = test.batch(4096).cache()

In [4]:
# Define used features

product_features = ["Product_ID"]

user_features = ["Gender", 
                 "Age", 
                 "Occupation", 
                 "City_Category", 
                 "Stay_In_Current_City_Years",
                 "Marital_Status"
                ]

# Remove columns not used
all_features = user_features + product_features

df_train = df_train[all_features]
df_test = df_test[all_features]

# Extract unique info
df = df_train.append(df_test)

product_unique_values = {
    feature: df[feature].unique() for feature in product_features
}

user_unique_values = {
    feature: df[feature].unique() for feature in user_features
}

## Model 

In [5]:
class UserModel(tf.keras.Model):

    def __init__(self, unique_values:dict): # [Gender, [M, F]]
        super().__init__()
        
        self.user_features = {}
        for feature_name, unique_list in unique_values.items():
            feature_layer = tf.keras.Sequential([
                                tf.keras.layers.experimental.preprocessing.StringLookup(
                                    vocabulary=unique_list, mask_token=None),
                                    tf.keras.layers.Embedding(len(unique_list) + 1, 32),
                            ], feature_name )
            self.user_features[feature_name] = feature_layer
            


    def call(self, inputs):
        layers_stack = []
        for feature_name, feature_layer in self.user_features.items():
            print(f"Creating layer for feature {feature_name}")
            layer_valorized = feature_layer(inputs[feature_name])
            layers_stack.append(layer_valorized)
        return tf.concat(layers_stack, axis=1)

In [6]:
class ProductModel(tf.keras.Model):
    
    def __init__(self, product_unique_ids:np.ndarray):
        super().__init__()
        
        self.product_embedding = tf.keras.Sequential([
          tf.keras.layers.experimental.preprocessing.StringLookup(
              vocabulary=product_unique_ids, mask_token=None),
          tf.keras.layers.Embedding(len(product_unique_ids) + 1, 32)
        ])


    def call(self, products_id):
        return tf.concat([
            self.product_embedding(products_id)
        ], axis=1)

In [7]:
class BlackFridayModel(tfrs.models.Model): # note the main package is tfrs
    """
    Note:
    - no closure required
    """
    def __init__(self, 
                 user_model,
                 product_model,
                 topk_candidates: tf.data.Dataset,
                 user_unique_values: dict,
                 product_unique_ids: np.ndarray,
                ):
        super().__init__()
        self.product_embedder = product_model
        self.user_embedder = user_model
        
        self.user_features = user_unique_values.keys()
        self.query_model = tf.keras.Sequential([
                              user_model(user_unique_values),
                              tf.keras.layers.Dense(32)
                            ])
        self.candidate_model = tf.keras.Sequential([
                              product_model(product_unique_ids),
                              tf.keras.layers.Dense(32)
                            ])
        # See https://www.tensorflow.org/recommenders/api_docs/python/tfrs/tasks/Retrieval
        self.task = tfrs.tasks.Retrieval( # Loss function. Defaults to tf.keras.losses.CategoricalCrossentropy.
            metrics=tfrs.metrics.FactorizedTopK( # TODO are we forced to use batch? - REMOVE for training
                candidates=topk_candidates.batch(128).map(self.candidate_model), # dataset of candidate embeddings from which candidates should be retrieved (embedded)
            ),
        )
    def get_user_tower(self):
        return self.query_model
    
    def get_product_tower(self):
        return self.candidate_model
    
    def compute_loss(self, features, training=False):
        query_data = {feature_name: features[feature_name] for feature_name in self.user_features}
        query_embeddings = self.query_model(query_data)
        product_embeddings = self.candidate_model(features["Product_ID"])
        
        # Retrieval call: https://www.tensorflow.org/recommenders/api_docs/python/tfrs/tasks/Retrieval
        # "The task will try to maximize the affinity of these query, candidate pairs while minimizing 
        # the affinity between the query and candidates belonging to other queries in the batch."
        return self.task(query_embeddings=query_embeddings, 
                         candidate_embeddings=product_embeddings,
                         compute_metrics=True, # disable for better performances
                         candidate_ids = None
                        )

## Train

In [8]:
# Candidates to use for metrics
tf_unique_products = tf.data.Dataset.from_tensor_slices(product_unique_values["Product_ID"])
tf_unique_products.element_spec

model = BlackFridayModel(UserModel, ProductModel,
                        topk_candidates = tf_unique_products,
                        user_unique_values = user_unique_values,
                        product_unique_ids = product_unique_values["Product_ID"]
                        )

In [9]:
# Create a callback that saves the model's weights
from datetime import datetime
run_id = datetime.today().strftime('%Y%m%d%H%M%S')
run_path = f"./models/{run_id}/"

cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=run_path,
                                                 save_weights_only=False,
                                                 verbose=1)

In [10]:
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model.fit(cached_train, epochs=1, callbacks=cp_callback)

Consider rewriting this model with the Functional API.
Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Consider rewriting this model with the Functional API.
Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status

Epoch 00001: saving model to ./models/20210422124258/


<tensorflow.python.keras.callbacks.History at 0x7fb891e52550>

## Create the ScaNN model
- **Note**
    - Based on [Deploying the approximate model](https://www.tensorflow.org/recommenders/examples/efficient_serving#building_a_scann-powered_model)

**Simple test**
- Use the naive BruteForce class

In [18]:
!pip freeze | grep scann

scann==1.2.1


In [11]:
index = tfrs.layers.factorized_top_k.BruteForce(model.get_user_tower())

index.index(tf_unique_products.batch(100).map(model.get_product_tower()), tf_unique_products)

<tensorflow_recommenders.layers.factorized_top_k.BruteForce at 0x7fb8918e2b10>

**ScaNN model:**

In [12]:
# Create the ScaNN with user embedder
scann = tfrs.layers.factorized_top_k.ScaNN(model.get_user_tower(), num_reordering_candidates=1000)

# Load candidate data
scann.index(tf_unique_products.batch(100).map(model.get_product_tower()), tf_unique_products) # [!] how many data are we storing? - All!

<tensorflow_recommenders.layers.factorized_top_k.ScaNN at 0x7fb8912836d0>

In [13]:
# Get recommendations:

input_data = {
    "Gender": tf.constant(["M"]), # <-- [!] require list inside constant
    "Age": tf.constant(["26-35"]),
    "Occupation": tf.constant(["0"]),
    "City_Category": tf.constant(["B"]),
    "Stay_In_Current_City_Years": tf.constant(["4+"]),
    "Marital_Status": tf.constant(["0"]),
}
scann(input_data)


Consider rewriting this model with the Functional API.
Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status


(<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
 array([[0.7119358 , 0.64604574, 0.620901  , 0.6093435 , 0.5951483 ,
         0.5938488 , 0.58764935, 0.57615477, 0.56247985, 0.5602325 ]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 10), dtype=string, numpy=
 array([[b'P00221142', b'P00299442', b'P00021142', b'P00016842',
         b'P00303142', b'P00168742', b'P00150442', b'P00222442',
         b'P00280842', b'P00004442']], dtype=object)>)

In [17]:
scann._identifiers.shape

TensorShape([3484])

In [14]:
print(f"Number of candidates inside ScaNN:{scann._identifiers.shape}")
assert scann._identifiers.shape[0] == len(product_unique_values["Product_ID"]), f"Not all candidate are stored into the ScaNN system"

Number of candidates inside ScaNN:(3484,)


## Store the models

## Store ScaNN model

In [20]:
scann_path = join(run_path, "Scann")

scann.save(
    scann_path,
    options=tf.saved_model.SaveOptions(namespace_whitelist=["Scann"])
  )

Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Consider rewriting this model with the F

Consider rewriting this model with the Functional API.


Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status




INFO:tensorflow:Assets written to: ./models/20210422124258/Scann/assets


INFO:tensorflow:Assets written to: ./models/20210422124258/Scann/assets


## Store ML model

In [16]:
# Store the user embedding model
user_model = model.get_user_tower()
user_path = join(run_path, "user")
user_model.save(user_path, options=tf.saved_model.SaveOptions(namespace_whitelist=["UserModel"]))

Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Consider rewriting this model with the F

Consider rewriting this model with the Functional API.


Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
Creating layer for feature Gender
Creating layer for feature Age
Creating layer for feature Occupation
Creating layer for feature City_Category
Creating layer for feature Stay_In_Current_City_Years
Creating layer for feature Marital_Status
INFO:tensorflow:Assets written to: ./models/20210419093152/user/assets


INFO:tensorflow:Assets written to: ./models/20210419093152/user/assets


In [17]:
# Store the user embedding model
candidate_model = model.get_product_tower()
candidate_path = join(run_path, "candidate")
candidate_model.save(candidate_path, options=tf.saved_model.SaveOptions(namespace_whitelist=["CandidateModel"]))

INFO:tensorflow:Assets written to: ./models/20210419093152/candidate/assets


INFO:tensorflow:Assets written to: ./models/20210419093152/candidate/assets


---
# Load the models

## ScaNN load

In [18]:
scann_loaded = tf.keras.models.load_model(scann_path)













In [19]:
# Get recommendations:

input_data = {
    "Gender": tf.constant(["M"]), # <-- [!] require list inside constant
    "Age": tf.constant(["26-35"]),
    "Occupation": tf.constant(["0"]),
    "City_Category": tf.constant(["B"]),
    "Stay_In_Current_City_Years": tf.constant(["4+"]),
    "Marital_Status": tf.constant(["0"]),
}
scann_loaded(input_data)






(<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
 array([[0.7119358 , 0.64604574, 0.620901  , 0.6093435 , 0.5951483 ,
         0.5938488 , 0.58764935, 0.57615477, 0.56247985, 0.5602325 ]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 10), dtype=string, numpy=
 array([[b'P00221142', b'P00299442', b'P00021142', b'P00016842',
         b'P00303142', b'P00168742', b'P00150442', b'P00222442',
         b'P00280842', b'P00004442']], dtype=object)>)

## Embedding models

In [23]:
# Test user embedding
user_model = tf.keras.models.load_model(user_path)

input_data = {
    "Gender": tf.constant(["M"]), # <-- [!] require list inside constant
    "Age": tf.constant(["26-35"]),
    "Occupation": tf.constant(["0"]),
    "City_Category": tf.constant(["B"]),
    "Stay_In_Current_City_Years": tf.constant(["4+"]),
    "Marital_Status": tf.constant(["0"]),
}
user_model(input_data)





























Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.






<tf.Tensor: shape=(1, 32), dtype=float32, numpy=
array([[ 0.20700587,  0.0947971 , -0.11562292,  0.27679503, -0.14745897,
         0.08679863,  0.07959864, -0.01188067,  0.16708054,  0.10946328,
         0.03467712,  0.16648084, -0.27423215, -0.0900174 ,  0.03211596,
         0.17735957, -0.10775164, -0.13040575, -0.00611744, -0.11471772,
        -0.4367543 ,  0.2719013 , -0.02833449,  0.16547915,  0.10113018,
        -0.03219921, -0.05203754, -0.11744717, -0.04397772, -0.0646404 ,
        -0.23835519, -0.0671583 ]], dtype=float32)>

In [34]:
# Test product embedding
candidate_model = tf.keras.models.load_model(candidate_path)

input_data = {
    "Product_ID": tf.constant(["ProductNeverShowed"]), # <-- [!] require list inside constant
    }

candidate_model(input_data)













<tf.Tensor: shape=(1, 32), dtype=float32, numpy=
array([[ 0.03294051,  0.02109322,  0.00466959,  0.00893892, -0.01463873,
         0.00661785,  0.02543953, -0.04188783,  0.01310662,  0.01502049,
         0.00212025,  0.02856631, -0.00602527, -0.00968197,  0.02498633,
         0.0018321 ,  0.00453932, -0.04844395, -0.01046825,  0.02039592,
        -0.02211715,  0.01750563, -0.03150826,  0.00307698, -0.00459771,
        -0.01313829,  0.01194914, -0.00198265,  0.01471668, -0.06453553,
         0.00610681, -0.01482611]], dtype=float32)>

-----------