<div style="border-radius:10px; border:#DEB887 solid; padding: 15px; background-color: #FFFAF0; font-size:100%; text-align:left">
<h3 align="left"><font color='#DEB887'>Info</font></h3>
    
In this notebook the model for the Recommender System is created.

In [529]:
import mlflow
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_recommenders as tfrs

from typing import Dict, Text

# MLFlow setup

In [530]:
experiment_name = "Joropo_Expreriment"

if not mlflow.get_experiment_by_name(experiment_name):
    mlflow.create_experiment(name=experiment_name) 

experiment = mlflow.get_experiment_by_name(experiment_name)

# Variables

In [531]:
path_data = "../data/processed/Final_data_for_ML.parquet"
path_model = '../models'
epochs = 1000
embedding_dimension = 64
learning_rate = 0.5

# Function definition

In [532]:
def find_lowest_value(array):
  lowest_value = array[0]
  for value in array:
    if value < lowest_value:
      lowest_value = value
  return lowest_value

# Load data

In [533]:
df = pd.read_parquet(path_data)
df = df[:1000]
total_ratings= len(df.index)

# Prepare data

In [534]:
ratings = tf.data.Dataset.from_tensor_slices( {"reviewerID":tf.cast( df.reviewerID.values, tf.string),
                                                "title" : tf.cast( df.title.values, tf.string)} )                                           

In [535]:
ratings = ratings.map(lambda x: {
    "title": x["title"],
    "reviewerID": x["reviewerID"],
})

In [536]:
products = ratings.map(lambda x: x["title"])

In [537]:
products

<_MapDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

In [538]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)
train = shuffled.take( int(total_ratings*0.8) )
test = shuffled.skip(int(total_ratings*0.8)).take(int(total_ratings*0.2))

In [539]:
product_titles = products.batch(1_000)
user_ids = ratings.batch(1_000_000).map(lambda x: x["reviewerID"])

unique_product_titles = np.unique(np.concatenate(list(product_titles)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

# Create the model

In [540]:
product_title_lookup = tf.keras.layers.StringLookup()
product_title_lookup.adapt(ratings.map(lambda x: x["title"]))
product_title_embedding = tf.keras.layers.Embedding(input_dim=product_title_lookup.vocabulary_size(),output_dim=64)
product_model = tf.keras.Sequential([product_title_lookup, product_title_embedding])

In [541]:
user_id_lookup = tf.keras.layers.StringLookup()
user_id_lookup.adapt(ratings.map(lambda x: x["reviewerID"]))
user_id_embedding = tf.keras.layers.Embedding(user_id_lookup.vocabulary_size(), 64)
user_model = tf.keras.Sequential([user_id_lookup, user_id_embedding])

In [542]:
metrics = tfrs.metrics.FactorizedTopK(
  candidates=products.batch(128).map(user_model)
)

In [543]:
task = tfrs.tasks.Retrieval(
  #metrics=metrics
  metrics=[]
)

In [544]:
class MovielensModel(tfrs.Model):

  def __init__(self, user_model, movie_model):
    super().__init__()
    self.product_model: tf.keras.Model = product_model
    self.user_model: tf.keras.Model = user_model
    self.task: tf.keras.layers.Layer = task

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    user_embeddings = self.user_model(features["reviewerID"])
    positive_movie_embeddings = self.product_model(features["title"])

    return self.task(user_embeddings, positive_movie_embeddings)

In [545]:
model = MovielensModel(user_model, product_model)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=learning_rate))

In [546]:
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()

In [547]:
Loss_call_back = tf.keras.callbacks.EarlyStopping(monitor="loss", patience=3, verbose=1, mode='min')

# Fit the model

In [548]:
model_hist = model.fit(cached_train, 
                        epochs=epochs, 
                        callbacks=[Loss_call_back])

Epoch 1/1000


Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 14: early stopping


# Evaluate the model

In [549]:
evaluate_history = model.evaluate(cached_test, 
                return_dict=True)



# Log params and metrics in MLFlow

In [550]:
mlflow.start_run(experiment_id=experiment.experiment_id)

mlflow.log_param("epochs", epochs)
mlflow.log_param("embedding_dimension", embedding_dimension)
mlflow.log_param("learning_rate", learning_rate)

mlflow.log_metric("Loss", find_lowest_value(model_hist.history['loss']))
mlflow.log_metric("y_Loss", evaluate_history["loss"])

mlflow.end_run()

# Manual prediction

In [551]:
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)

index.index_from_dataset(tf.data.Dataset.zip((products.batch(100), products.batch(100).map(model.product_model))))

_, titles = index(tf.constant(["42"]))

print(f"Recommendations for user 42: {titles[0, :3]}")

Recommendations for user 42: [b'Fast &amp; Fresh Baby Food Cookbook: 120 Ridiculously Simple and Naturally Wholesome Baby Food Recipes'
 b'Baking for Two: The Small-Batch Baking Cookbook for Sweet and Savory Treats'
 b'Baking for Two: The Small-Batch Baking Cookbook for Sweet and Savory Treats']


# Save model for deployment

In [552]:
tf.saved_model.save(index, path_model)
loaded = tf.saved_model.load(path_model)
scores, titles = loaded(["42"])

print(f"Recommendations: {titles[0][:3]}")









INFO:tensorflow:Assets written to: ../models\assets


INFO:tensorflow:Assets written to: ../models\assets


Recommendations: [b'Fast &amp; Fresh Baby Food Cookbook: 120 Ridiculously Simple and Naturally Wholesome Baby Food Recipes'
 b'Baking for Two: The Small-Batch Baking Cookbook for Sweet and Savory Treats'
 b'Baking for Two: The Small-Batch Baking Cookbook for Sweet and Savory Treats']
