In [1]:
! pip install tensorflow-recommenders

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-recommenders
  Downloading tensorflow_recommenders-0.7.0-py3-none-any.whl (88 kB)
[K     |████████████████████████████████| 88 kB 8.3 MB/s 
[?25hCollecting tensorflow>=2.9.0
  Downloading tensorflow-2.10.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (578.0 MB)
[K     |████████████████████████████████| 578.0 MB 15 kB/s 
Collecting tensorflow-estimator<2.11,>=2.10.0
  Downloading tensorflow_estimator-2.10.0-py2.py3-none-any.whl (438 kB)
[K     |████████████████████████████████| 438 kB 69.9 MB/s 
[?25hCollecting tensorboard<2.11,>=2.10
  Downloading tensorboard-2.10.0-py3-none-any.whl (5.9 MB)
[K     |████████████████████████████████| 5.9 MB 35.7 MB/s 
Collecting keras<2.11,>=2.10.0
  Downloading keras-2.10.0-py2.py3-none-any.whl (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 55.4 MB/s 
Collecting gast<=0.4.0,>=0.2.1
  Downloading gast-0.

In [2]:
from typing import Dict, Text
import tensorflow_recommenders as tfrs
import tensorflow as tf
import tensorflow_datasets as tfds
import pprint
import numpy as np

In [3]:
ratings = tfds.load('movielens/100k-ratings',split='train')


[1mDownloading and preparing dataset 4.70 MiB (download: 4.70 MiB, generated: 32.41 MiB, total: 37.10 MiB) to ~/tensorflow_datasets/movielens/100k-ratings/0.1.1...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/100000 [00:00<?, ? examples/s]

Shuffling ~/tensorflow_datasets/movielens/100k-ratings/0.1.1.incomplete5YLU8N/movielens-train.tfrecord*...:   …

[1mDataset movielens downloaded and prepared to ~/tensorflow_datasets/movielens/100k-ratings/0.1.1. Subsequent calls will reuse this data.[0m


In [4]:
for value in ratings.take(1).as_numpy_iterator():
  pprint.pprint(value)

{'bucketized_user_age': 45.0,
 'movie_genres': array([7]),
 'movie_id': b'357',
 'movie_title': b"One Flew Over the Cuckoo's Nest (1975)",
 'raw_user_age': 46.0,
 'timestamp': 879024327,
 'user_gender': True,
 'user_id': b'138',
 'user_occupation_label': 4,
 'user_occupation_text': b'doctor',
 'user_rating': 4.0,
 'user_zip_code': b'53211'}


In [5]:
ratings = ratings.map(lambda x:{
    'movie_title':x['movie_title'],
    'user_id':x['user_id'],
    'rating':x['user_rating'],
})

In [6]:
tf.random.set_seed(43)
shuffled = ratings.shuffle(100_000,seed=43,reshuffle_each_iteration=False)
train  = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)


In [7]:
movie_ratings = ratings.batch(1_000_000).map(lambda x:x['movie_title'])
user_ratings = ratings.batch(1_000_000).map(lambda x:x['user_id'])

In [8]:
movie_unique = np.unique(np.concatenate(list(movie_ratings)))
user_unique = np.unique(np.concatenate(list(user_ratings)))


In [9]:
class MoviesRanking(tf.keras.Model):
  def __init__(self):
    super().__init__()
    embedding_dimension = 32
    self.userModel = tf.keras.Sequential([
        tf.keras.layers.StringLookup(vocabulary=user_unique,),
        tf.keras.layers.Embedding(len(user_unique)+1,embedding_dimension)
    ])

    self.movieModel = tf.keras.Sequential([
        tf.keras.layers.StringLookup(vocabulary=movie_unique,),
        tf.keras.layers.Embedding(len(movie_unique)+1,embedding_dimension)
    ])

    self.ratingModel =  tf.keras.Sequential([
        tf.keras.layers.Dense(256,activation='relu'),
        tf.keras.layers.Dense(64,'relu'),
        tf.keras.layers.Dense(1)
    ])
  def call(self,inputs):
    user_id,movie_title = inputs
    user_embeddings = self.userModel(user_id)
    movie_embeddings = self.movieModel(movie_title)
    return self.ratingModel(tf.concat([user_embeddings,movie_embeddings],axis=1))
  

In [10]:
class MovieLensModel(tfrs.models.Model):
  def __init__(self):
    super().__init__()
    self.rating_model = MoviesRanking()
    self.task = tfrs.tasks.Ranking(loss = tf.keras.losses.MeanSquaredError(),metrics = [tf.keras.metrics.RootMeanSquaredError()])

  def call(self,features):
    return self.rating_model((features['user_id'],features['movie_title']))

  def compute_loss(self,features,training = False):
    labels = features.pop('rating')
    ratings_predictions = self(features)

    return self.task(labels=labels,predictions = ratings_predictions)



In [11]:
model = MovieLensModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))

In [12]:
cached_train = train.batch(8192).cache()
cached_test = test.batch(4096).cache()

In [13]:
model.fit(cached_train,epochs = 3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f4f3650a190>

In [14]:
model.evaluate(cached_test,return_dict=True)



{'root_mean_squared_error': 1.1134268045425415,
 'loss': 1.215317726135254,
 'regularization_loss': 0,
 'total_loss': 1.215317726135254}

In [20]:
ratings ={}
test_movies = ["M*A*S*H (1970)", "Dances with Wolves (1990)", "Speed (1994)"]
for movie in test_movies:
  ratings[movie]= model({
      'user_id':np.array([str(42)]),
      'movie_title':np.array([movie]),
 })

In [27]:
ratings

{'M*A*S*H (1970)': <tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[3.5505297]], dtype=float32)>,
 'Dances with Wolves (1990)': <tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[3.5769758]], dtype=float32)>,
 'Speed (1994)': <tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[3.580376]], dtype=float32)>}

In [25]:
for  title, score in sorted(ratings.items(),key=lambda x:x[1],reverse= True): 
  print(f'title is {title} and score is {score}')

title is Speed (1994) and score is [[3.580376]]
title is Dances with Wolves (1990) and score is [[3.5769758]]
title is M*A*S*H (1970) and score is [[3.5505297]]


In [29]:
tf.saved_model.save(model,"rankingModel")



In [31]:
! zip -r rankingModel  rankingModel

  adding: rankingModel/ (stored 0%)
  adding: rankingModel/assets/ (stored 0%)
  adding: rankingModel/variables/ (stored 0%)
  adding: rankingModel/variables/variables.index (deflated 62%)
  adding: rankingModel/variables/variables.data-00000-of-00001 (deflated 33%)
  adding: rankingModel/saved_model.pb (deflated 84%)
