This notebook tries to follow the resources of the following link, although it is unsuccesfull
https://blog.tensorflow.org/2020/09/introducing-tensorflow-recommenders.html

In [None]:
pip install tensorflow_recommenders
pip install tensorflow-datasets
pip install numpy==1.24.4

In [12]:
import tqdm as notebook_tqdm
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

In [30]:
from typing import Dict, Text
import pprint

In [None]:
# Ratings data.
ratings = tfds.load("movie_lens/100k-ratings", split="train")
# Features of all the available movies.
movies = tfds.load("movie_lens/100k-movies", split="train")

Out of all the features available in the dataset, the most useful are user ids and movie titles. While TFRS can use arbitrarily rich features, let's only use those to keep things simple.

In [14]:
ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
})
movies = movies.map(lambda x: x["movie_title"])

In [27]:
class TwoTowerMovielensModel(tfrs.Model):
  """MovieLens prediction model."""
 
  def __init__(self):
    # The `__init__` method sets up the model architecture.
    super().__init__()
 
    # How large the representation vectors are for inputs: larger vectors make
    # for a more expressive model but may cause over-fitting.
    embedding_dim = 32
    num_unique_users = 1000
    num_unique_movies = 1700
    eval_batch_size = 128

    # Set up user and movie representations.
    self.user_model = tf.keras.Sequential([
      # We first turn the raw user ids into contiguous integers by looking them
      # up in a vocabulary.
      tf.keras.layers.experimental.preprocessing.StringLookup(
          max_tokens=num_unique_users),
      # We then map the result into embedding vectors.
      tf.keras.layers.Embedding(num_unique_users, embedding_dim)
    ])
    self.movie_model = tf.keras.Sequential([
      tf.keras.layers.experimental.preprocessing.StringLookup(
          max_tokens=num_unique_movies),
      tf.keras.layers.Embedding(num_unique_movies, embedding_dim)
    ])

    # The `Task` objects has two purposes: (1) it computes the loss and (2)
    # keeps track of metrics.
    self.task = tfrs.tasks.Retrieval(
        # In this case, our metrics are top-k metrics: given a user and a known
        # watched movie, how highly would the model rank the true movie out of
        # all possible movies?
        metrics=tfrs.metrics.FactorizedTopK(
            candidates=movies.batch(eval_batch_size).map(self.movie_model)
        )
    )
    
    def compute_loss(self, features, training=False):
        # The `compute_loss` method determines how loss is computed.
    
        # Compute user and item embeddings.
        user_embeddings = self.user_model(features["user_id"])
        movie_embeddings = self.movie_model(features["movie_title"])
    
        # Pass them into the task to get the resulting loss. The lower the loss is, the
        # better the model is at telling apart true watches from watches that did
        # not happen in the training data.
        return self.task(user_embeddings, movie_embeddings)

    

In [None]:
model = TwoTowerMovielensModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
 
model.fit(ratings.batch(4096), verbose=False)

In [None]:
index = tfrs.layers.ann.BruteForce(model.user_model)
index.index(movies.batch(100).map(model.movie_model), movies)
 
# Get recommendations.
_, titles = index(tf.constant(["42"]))
print(f"Recommendations for user 42: {titles[0, :3]}")

In [31]:
class MovieLensModel(tfrs.Model):
 
  def __init__(
      self,
      user_model: tf.keras.Model,
      movie_model: tf.keras.Model,
      task: tfrs.tasks.Retrieval):
    super().__init__()
 
    # Set up user and movie representations.
    self.user_model = user_model
    self.movie_model = movie_model
 
    # Set up a retrieval task.
    self.task = task
 
  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    # Define how the loss is computed.
 
    user_embeddings = self.user_model(features["user_id"])
    movie_embeddings = self.movie_model(features["movie_title"])
 
    return self.task(user_embeddings, movie_embeddings)

In [33]:
users_model = tf.keras.Sequential([user_id_vocabulary,
                                   tf.keras.layers.Embedding(user_id_vocabulary.vocab_size(),64)])
movie_model = tf.keras.Sequential([movies_title_vocabulary,                                   tf.keras.layers.Embedding(movies_title_vocabulary.vocab_size(),64)])
 
task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
    movies.batch(128).map(movie_model)))
# Now let us create, compile, and train a retrieval model.

model = MovieLensModel(users_model,movie_model,task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))
model.fit(rating.batch(4096), epochs=3)


NameError: name 'user_id_vocabulary' is not defined