In [1]:
import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

import tensorflow_recommenders as tfrs

In [2]:
ratings = tfds.load("movielens/100k-ratings", split="train")
movies = tfds.load("movielens/100k-movies", split="train")

In [3]:
for x in ratings.take(1).as_numpy_iterator():
    pprint.pprint(x)

{'bucketized_user_age': 45.0,
 'movie_genres': array([7]),
 'movie_id': b'357',
 'movie_title': b"One Flew Over the Cuckoo's Nest (1975)",
 'raw_user_age': 46.0,
 'timestamp': 879024327,
 'user_gender': True,
 'user_id': b'138',
 'user_occupation_label': 4,
 'user_occupation_text': b'doctor',
 'user_rating': 4.0,
 'user_zip_code': b'53211'}


In [4]:
for x in movies.take(1).as_numpy_iterator():
    pprint.pprint(x)

{'movie_genres': array([4]),
 'movie_id': b'1681',
 'movie_title': b'You So Crazy (1994)'}


In [5]:
ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"]
})

movies = movies.map(lambda x: x["movie_title"])

In [6]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(100000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80000)
test = shuffled.skip(80000).take(20000)

In [7]:
movie_titles = movies.batch(1000)
user_ids = ratings.batch(50000).map(lambda x: x["user_id"])

unique_movie_titles = np.unique(np.concatenate(list(movie_titles)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

In [8]:
unique_movie_titles[:10]

array([b"'Til There Was You (1997)", b'1-900 (1994)',
       b'101 Dalmatians (1996)', b'12 Angry Men (1957)', b'187 (1997)',
       b'2 Days in the Valley (1996)',
       b'20,000 Leagues Under the Sea (1954)',
       b'2001: A Space Odyssey (1968)',
       b'3 Ninjas: High Noon At Mega Mountain (1998)',
       b'39 Steps, The (1935)'], dtype=object)

In [9]:
unique_user_ids[:10]

array([b'1', b'10', b'100', b'101', b'102', b'103', b'104', b'105',
       b'106', b'107'], dtype=object)

In [10]:
embedding_dimension = 32

In [11]:
user_model = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.StringLookup(
        vocabulary=unique_user_ids, mask_token=None
    ),
    tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
])

movie_model = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.StringLookup(
        vocabulary=unique_movie_titles, mask_token=None
    ),
    tf.keras.layers.Embedding(len(unique_movie_titles) + 1, embedding_dimension)
])

In [12]:
metrics = tfrs.metrics.FactorizedTopK(
    candidates=movies.batch(128).map(movie_model)
)

In [13]:
task = tfrs.tasks.Retrieval(
    metrics=metrics
)

In [14]:
class MovielensModel(tfrs.Model):
    def __init__(self, user_model, movie_model):
        super().__init__()
        self.movie_model: tf.keras.Model = movie_model
        self.user_model: tf.keras.Model = user_model
        self.task: tf.keras.layers.Layer = task

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        user_embeddings = self.user_model(features["user_id"])
        positive_movie_embeddings = self.movie_model(features["movie_title"])

        return self.task(user_embeddings, positive_movie_embeddings)

In [15]:
model = MovielensModel(user_model, movie_model)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [16]:
cached_train = train.shuffle(80000).batch(1024).cache()
cached_test = test.batch(1024).cache()

In [17]:
model.fit(cached_train, epochs=3)

Epoch 1/3
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.


Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.


Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f5b8c2706d8>

In [18]:
model.evaluate(cached_test, return_dict=True)



{'factorized_top_k/top_1_categorical_accuracy': 0.0009500000160187483,
 'factorized_top_k/top_5_categorical_accuracy': 0.007449999917298555,
 'factorized_top_k/top_10_categorical_accuracy': 0.019050000235438347,
 'factorized_top_k/top_50_categorical_accuracy': 0.12060000002384186,
 'factorized_top_k/top_100_categorical_accuracy': 0.23375000059604645,
 'loss': 3203.1728515625,
 'regularization_loss': 0,
 'total_loss': 3203.1728515625}

In [19]:
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
index.index(movies.batch(100).map(model.movie_model), movies)

<tensorflow_recommenders.layers.factorized_top_k.BruteForce at 0x7f5b30146240>

In [20]:
_, titles = index(tf.constant(["42"]))
print(f"Recommendations for user 42: {titles[0, :10]}")

Recommendations for user 42: [b'Michael (1996)' b"Preacher's Wife, The (1996)"
 b"Kid in King Arthur's Court, A (1995)" b'Rudy (1993)'
 b'Up Close and Personal (1996)' b'Father of the Bride Part II (1995)'
 b'Forget Paris (1995)' b'Homeward Bound: The Incredible Journey (1993)'
 b'Rent-a-Kid (1995)' b'While You Were Sleeping (1995)']


In [21]:
with tempfile.TemporaryDirectory() as tmp:
    path = os.path.join(tmp, "model")

    index.save(path)

    loaded = tf.keras.models.load_model(path)

    scores, titles = loaded(["42"])

    print(f"Recommendations: {titles[0, :3]}")



INFO:tensorflow:Assets written to: /tmp/tmp10d6kao3/model/assets


INFO:tensorflow:Assets written to: /tmp/tmp10d6kao3/model/assets






Recommendations: [b'Michael (1996)' b"Preacher's Wife, The (1996)"
 b"Kid in King Arthur's Court, A (1995)"]


In [29]:
scann_index = tfrs.layers.factorized_top_k.ScaNN(model.user_model)
scann_index.index(movies.batch(100).map(model.movie_model), movies)

<tensorflow_recommenders.layers.factorized_top_k.ScaNN at 0x7f5ac4104be0>

In [30]:
_, titles = scann_index(tf.constant(["42"]))
print(f"Recommendations for user 42: {titles[0, :3]}")

Recommendations for user 42: [b"Kid in King Arthur's Court, A (1995)"
 b'Homeward Bound: The Incredible Journey (1993)'
 b'While You Were Sleeping (1995)']


In [33]:
tmp = './'
path = os.path.join(tmp, "model/1/")

scann_index.save(
    path,
    options=tf.saved_model.SaveOptions(namespace_whitelist=["Scann"])
)

loaded = tf.keras.models.load_model(path)

scores, titles = loaded(["42"])

print(f"Recommendations: {titles[0, :3]}")



INFO:tensorflow:Assets written to: ./model/1/assets


INFO:tensorflow:Assets written to: ./model/1/assets






Recommendations: [b"Kid in King Arthur's Court, A (1995)"
 b'Homeward Bound: The Incredible Journey (1993)'
 b'While You Were Sleeping (1995)']


In [34]:
export_path = './scann_recommend/1/'
tf.keras.models.save_model(
    scann_index,
    export_path,
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=tf.saved_model.SaveOptions(namespace_whitelist=["Scann"])
)
print("Saved model")



INFO:tensorflow:Assets written to: ./scann_recommend/1/assets


INFO:tensorflow:Assets written to: ./scann_recommend/1/assets


Saved model


In [22]:
for dict_batch in cached_test.take(1):
    print(dict_batch['movie_title'], dict_batch['user_id'])

tf.Tensor(
[b'M*A*S*H (1970)' b'Volcano (1997)' b'2001: A Space Odyssey (1968)' ...
 b'Return of the Jedi (1983)' b'Godfather, The (1972)'
 b'Time to Kill, A (1996)'], shape=(1024,), dtype=string) tf.Tensor([b'346' b'602' b'393' ... b'402' b'222' b'825'], shape=(1024,), dtype=string)


In [23]:
user_id_np = [user.decode("utf-8") for user in dict_batch['user_id'][0:3].numpy()]

In [24]:
import json
data = json.dumps({"signature_name": "serving_default", "instances": user_id_np})
print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))

Data: {"signature_name": "serving_default", "instances": ... erving_default", "instances": ["346", "602", "393"]}


In [37]:
import requests
headers = {"content-type": "application/json"}
json_response = requests.post('http://localhost:8501/v1/models/scann_recommend:predict', data=data, headers=headers)
predictions = json.loads(json_response.text)['predictions']

In [28]:
predictions

[{'output_2': ['Hard Target (1993)',
   'Under Siege 2: Dark Territory (1995)',
   'Rising Sun (1993)',
   'Marked for Death (1990)',
   'Mortal Kombat (1995)',
   'Assassins (1995)',
   'Under Siege (1992)',
   'Surviving the Game (1994)',
   'Bad Boys (1995)',
   'Desperado (1995)'],
  'output_1': [2.41352797,
   2.36275244,
   2.36152029,
   2.32232022,
   2.29543829,
   2.25833893,
   2.2193,
   2.1612978,
   2.13318849,
   2.12153292]},
 {'output_2': ["Dante's Peak (1997)",
   'Volcano (1997)',
   'Fire Down Below (1997)',
   'Saint, The (1997)',
   'Liar Liar (1997)',
   'Jungle2Jungle (1997)',
   'Flubber (1997)',
   'Shadow Conspiracy (1997)',
   'George of the Jungle (1997)',
   "Gone Fishin' (1997)"],
  'output_1': [4.11519241,
   3.91854596,
   3.74097061,
   3.73197603,
   3.67217159,
   3.63971233,
   3.60989952,
   3.58282113,
   3.50525093,
   3.49305725]},
 {'output_2': ['Tom and Huck (1995)',
   'First Kid (1996)',
   'Little Big League (1994)',
   'In the Army Now (19