## Recommendation model - Two tower model

### Import TFRS

First, install and import TFRS:

In [9]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets

In [10]:
from typing import Dict, Text
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

### Load tha Data

In [11]:
ratings = tfds.load('movielens/100k-ratings', split="train")
movies = tfds.load('movielens/100k-movies', split="train")

# Select the basic features
ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"]
})
movies = movies.map(lambda x: x["movie_title"])

Build vocabularies to convert user ids and movie titles into integer indices for embedding layers:

In [12]:
user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x: x["user_id"]))

movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
movie_titles_vocabulary.adapt(movies)

### Define a model

We can define a TFRS model by inheriting from `tfrs.Model` and implementing the `compute_loss` method:

In [13]:
class TwoTowerModel(tfrs.Model):
    def __init__(self, user_model: tf.keras.Model, movie_model: tf.keras.Model, task: tfrs.tasks.Retrieval):
        super().__init__()
        self.user_model = user_model
        self.movie_model = movie_model
        self.task = task

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        user_embeddings = self.user_model(features["user_id"])
        movie_embeddings = self.movie_model(features["movie_title"])
        return self.task(user_embeddings, movie_embeddings)


Define the two models and the retrieval task.

In [14]:
# Define user and movie models
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocabulary_size(), 64)
])
movie_model = tf.keras.Sequential([
    movie_titles_vocabulary,
    tf.keras.layers.Embedding(movie_titles_vocabulary.vocabulary_size(), 64)
])



### Fit and evaluate it.

Create the model, train it, and generate predictions:



In [32]:
# Define the retrieval task
task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
    movies.batch(128).map(movie_model)
))

# Create the retrieval model
model = TwoTowerModel(user_model, movie_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

# Train the model for 3 epochs
model.fit(ratings.batch(4096), epochs=3)

# Use brute-force search to set up retrieval using the trained representations
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
index.index_from_dataset(
    movies.batch(100).map(lambda title: (title, model.movie_model(title)))
)

# Get some recommendations
_, titles = index(np.array(["42"]))

# Convert titles tensor to a list
top_5_titles = titles[0, :5].numpy()

# Print the recommendations in the desired format
print("Top 5 recommendations for user 42:")
for rank, title in enumerate(top_5_titles, 1):
    print(f"{rank}. {title.decode('utf-8')}")


Epoch 1/3
Epoch 2/3
Epoch 3/3
Top 5 recommendations for user 42:
1. Princess Caraboo (1994)
2. Desperado (1995)
3. Mallrats (1995)
4. Christmas Carol, A (1938)
5. Muriel's Wedding (1994)


## Recommendation model - Wide & Deep model

## Import TRFS

In [17]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets

In [18]:
from typing import Dict, Text

import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

## Load and preprocess the data:

In [19]:
# Ratings data.
ratings = tfds.load('movielens/100k-ratings', split="train")
# Features of all the available movies.
movies = tfds.load('movielens/100k-movies', split="train")

# Select the basic features.
ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"]
})
movies = movies.map(lambda x: x["movie_title"])

## Build vocabularies:

In [20]:
user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x: x["user_id"]))

movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
movie_titles_vocabulary.adapt(movies)

## Define the Wide & Deep model:

In [21]:
class MovieLensModel(tfrs.Model):
    def __init__(
        self,
        user_model: tf.keras.Model,
        movie_model: tf.keras.Model,
        task: tfrs.tasks.Retrieval):
        super().__init__()

        self.user_model = user_model
        self.movie_model = movie_model
        self.task = task

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        user_embeddings = self.user_model(features["user_id"])
        movie_embeddings = self.movie_model(features["movie_title"])

        return self.task(user_embeddings, movie_embeddings)


## Define user and movie models for the Wide & Deep architecture:

In [22]:
embedding_dimension = 32

user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocabulary_size(), embedding_dimension),
    tf.keras.layers.Dense(embedding_dimension)
])

movie_model = tf.keras.Sequential([
    movie_titles_vocabulary,
    tf.keras.layers.Embedding(movie_titles_vocabulary.vocabulary_size(), embedding_dimension),
    tf.keras.layers.Dense(embedding_dimension)
])

# Define your objectives.
task = tfrs.tasks.Retrieval(
    metrics=tfrs.metrics.FactorizedTopK(
        candidates=movies.batch(128).map(movie_model)
    )
)

## Compile and train the model:

In [33]:
# Define the retrieval task
task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
    movies.batch(128).map(movie_model)
))

# Create the retrieval model
model = MovieLensModel(user_model, movie_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

# Train the model for 3 epochs
model.fit(ratings.batch(4096), epochs=3)

# Use brute-force search to set up retrieval using the trained representations
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
index.index_from_dataset(
    movies.batch(100).map(lambda title: (title, model.movie_model(title)))
)

# Get recommendations for user 42
_, titles = index(np.array(["42"]))

# Convert titles tensor to a list
top_5_titles = titles[0, :5].numpy()

# Print the recommendations in the desired format
print("Top 5 recommendations for user 42:")
for rank, title in enumerate(top_5_titles, 1):
    print(f"{rank}. {title.decode('utf-8')}")



Epoch 1/3
Epoch 2/3
Epoch 3/3
Top 5 recommendations for user 42:
1. Reluctant Debutante, The (1958)
2. He Walked by Night (1948)
3. Dolores Claiborne (1994)
4. Terminal Velocity (1994)
5. Tank Girl (1995)


## Recommendation model - LightFM model

## Install LightFM

In [25]:
!pip install -q lightfm

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/316.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m307.2/316.4 kB[0m [31m9.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.4/316.4 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for lightfm (setup.py) ... [?25l[?25hdone


In [26]:
from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from lightfm import LightFM

## Read the Data

In [27]:
# Ratings data.
ratings = tfds.load('movielens/100k-ratings', split="train")
# Features of all the available movies.
movies = tfds.load('movielens/100k-movies', split="train")

# Select the basic features.
ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"]
})
movies = movies.map(lambda x: x["movie_title"])


## Build vocabularies:

In [28]:
user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x: x["user_id"]))

movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
movie_titles_vocabulary.adapt(movies)


## Define LightFM Model

In [29]:
lightfm_model = LightFM(no_components=64, loss='warp')

## Train the LightFM Model

In [30]:
# Initialize lists to store user_ids, item_ids, and ratings_values
user_ids = []
item_ids = []
ratings_values = []

# Iterate over the dataset and extract relevant information
for interaction in ratings.as_numpy_iterator():
    user_id = user_ids_vocabulary(interaction['user_id'])
    item_id = movie_titles_vocabulary(interaction['movie_title'])

    # Check if 'rating' field is present
    if 'rating' in interaction:
        rating_value = interaction['rating']
    else:
        # Handle the case where 'rating' field is missing or undefined
        rating_value = 0  # Replace with an appropriate default value

    user_ids.append(user_id)
    item_ids.append(item_id)
    ratings_values.append(rating_value)

# Build the interactions matrix
from scipy.sparse import coo_matrix

interactions_matrix = coo_matrix((ratings_values, (user_ids, item_ids)))

# Initialize and fit LightFM model
from lightfm import LightFM

lightfm_model = LightFM(no_components=64, loss='warp')
lightfm_model.fit(interactions_matrix, epochs=10)


<lightfm.lightfm.LightFM at 0x7c7d05f2bfd0>

## Get Recommendations

In [34]:
from tabulate import tabulate

# Example: Get top 5 recommendations for user 42
user_index = user_ids_vocabulary('42').numpy()  # Convert tensor to integer
n_items = len(movie_titles_vocabulary.get_vocabulary())

# Repeat user index for all item indices
user_indices = np.full(n_items, user_index)

# Predict scores for all items for the given user
item_indices = np.arange(n_items)
scores = lightfm_model.predict(user_indices, item_indices)

# Get indices of top 5 recommendations
top_item_indices = np.argsort(-scores)[:5]  # Indices of top 5 recommendations

# Get titles of top 5 recommendations
recommended_titles = [movie_titles_vocabulary.get_vocabulary()[idx] for idx in top_item_indices]

# Prepare data for tabulate
table_data = []
for rank, title in enumerate(recommended_titles, 1):
    table_data.append([rank, title])

# Print the recommendations in a table format
print("Top 5 recommendations for user 42:")
print(tabulate(table_data, headers=['Rank', 'Movie Title'], tablefmt='grid'))



Top 5 recommendations for user 42:
+--------+-------------------------------------------------------------------+
|   Rank | Movie Title                                                       |
|      1 | Return of Martin Guerre, The (Retour de Martin Guerre, Le) (1982) |
+--------+-------------------------------------------------------------------+
|      2 | Terminator 2: Judgment Day (1991)                                 |
+--------+-------------------------------------------------------------------+
|      3 | Gordy (1995)                                                      |
+--------+-------------------------------------------------------------------+
|      4 | Pretty Woman (1990)                                               |
+--------+-------------------------------------------------------------------+
|      5 | Wedding Gift, The (1994)                                          |
+--------+-------------------------------------------------------------------+
