In [1]:
from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

import tensorflow_recommenders as tfrs

In [2]:
ratings = tfds.load("movielens/100k-ratings", split="train")

ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
    "user_rating": x["user_rating"]
})

In [3]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(100000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80000)
test = shuffled.skip(80000).take(20000)

In [4]:
movie_titles = ratings.batch(100000).map(lambda x: x["movie_title"])
user_ids = ratings.batch(10000).map(lambda x: x["user_id"])

unique_movie_titles = np.unique(np.concatenate(list(movie_titles)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

In [5]:
class RankingModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        embedding_dimension = 32

        self.user_embeddings = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=unique_user_ids, mask_token=None
            ),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
        ])

        self.movie_embeddings = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=unique_movie_titles, mask_token=None
            ),
            tf.keras.layers.Embedding(len(unique_movie_titles) + 1, embedding_dimension)
        ])

        self.ratings = tf.keras.Sequential([
            tf.keras.layers.Dense(256, activation='relu'),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(1)
        ])

    def call(self, inputs):
        user_id, movie_title = inputs

        user_embedding = self.user_embeddings(user_id)
        movie_embedding = self.movie_embeddings(movie_title)

        return self.ratings(tf.concat([user_embedding, movie_embedding], axis=1))

In [6]:
before_train = RankingModel()

In [7]:
before_train((["42"], ["One Flew Over the Cuckoo's Nest (1975)"]))

Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.03740937]], dtype=float32)>

In [8]:
before_train((["42"], ["One Flew Over the Cuckoo's Nest (1975)"]))

Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.03740937]], dtype=float32)>

In [9]:
task = tfrs.tasks.Ranking(
    loss=tf.keras.losses.MeanSquaredError(),
    metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

In [10]:
class MovielensModel(tfrs.models.Model):
    def __init__(self):
        super().__init__()
        self.ranking_model: tf.keras.Model = RankingModel()
        self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
            loss = tf.keras.losses.MeanSquaredError(),
            metrics=[tf.keras.metrics.RootMeanSquaredError()]
        )

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        rating_predictions = self.ranking_model(
            (features["user_id"], features["movie_title"])
        )
        return self.task(labels=features['user_rating'], predictions=rating_predictions)

In [11]:
model = MovielensModel()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01))

In [12]:
cached_train = train.shuffle(100000).batch(8192).cache()
cached_test = test.batch(4096).cache()

In [13]:
model.fit(cached_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7ff3a0485e48>

In [14]:
model.evaluate(cached_test, return_dict=True)



{'root_mean_squared_error': 1.0302114486694336,
 'loss': 1.0618079900741577,
 'regularization_loss': 0,
 'total_loss': 1.0618079900741577}

In [15]:
for dict_batch in cached_test:
    break

In [16]:
dict_batch['user_id'][0], dict_batch['movie_title'][0]

(<tf.Tensor: shape=(), dtype=string, numpy=b'346'>,
 <tf.Tensor: shape=(), dtype=string, numpy=b'M*A*S*H (1970)'>)

In [17]:
dict_batch_ranking = model.ranking_model((dict_batch['user_id'], dict_batch['movie_title']))

In [18]:
dict_batch_ranking[0], dict_batch['user_rating'][0]

(<tf.Tensor: shape=(1,), dtype=float32, numpy=array([4.0402102], dtype=float32)>,
 <tf.Tensor: shape=(), dtype=float32, numpy=4.0>)

In [19]:
dict_batch['user_id'][1], dict_batch['movie_title'][1]

(<tf.Tensor: shape=(), dtype=string, numpy=b'602'>,
 <tf.Tensor: shape=(), dtype=string, numpy=b'Volcano (1997)'>)

In [20]:
dict_batch_ranking = model.ranking_model((dict_batch['user_id'], dict_batch['movie_title']))

In [21]:
dict_batch_ranking[1], dict_batch['user_rating'][1]

(<tf.Tensor: shape=(1,), dtype=float32, numpy=array([3.6237125], dtype=float32)>,
 <tf.Tensor: shape=(), dtype=float32, numpy=4.0>)