Tasks 
   1. Get our data and split it into a training and test set.
   2. Implement a ranking model.
   3. Fit and evaluate it.

Import the packages 

In [1]:
import pandas as pd 
import tensorflow as tf 
import tensorflow_datasets as tfds 

In [13]:
import tensorflow_recommenders as tfrs

In [2]:
import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np

# Prepare the data 

In [49]:
data=tfds.load('amazon_us_reviews/Beauty_v1_00', split='train')


In [50]:
for sample in data.take(1).as_numpy_iterator():
    pprint.pprint(sample)

{'data': {'customer_id': b'18239070',
          'helpful_votes': 0,
          'marketplace': b'US',
          'product_category': b'Beauty',
          'product_id': b'B00LJ86MAY',
          'product_parent': b'823234087',
          'product_title': b'The Original Curly Tee Towel - T-Shirt Hair Dryi'
                           b'ng Towel Wrap (Extra Long)',
          'review_body': b'Great product, quick ship and packaged nicely with a'
                         b'ttention to detail. Thank you!',
          'review_date': b'2014-10-04',
          'review_headline': b'Very pleased!',
          'review_id': b'R24WHRN0BMM2K7',
          'star_rating': 5,
          'total_votes': 0,
          'verified_purchase': 1,
          'vine': 1}}


Keep the features: Customer_id, product_id, and rating. 

In [51]:
data = data.map(lambda x: {
        "customer_id": x["data"]["customer_id"],
        "product_id": x["data"]["product_id"],
        "star_rating":x["data"]["star_rating"]
       })

Split the data 80-20

In [52]:
tf.random.set_seed(42)
shuffled = data.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)

Use the unique customer and product ids. Also, convert the columns to integers for the model.

In [54]:
products = data.batch(1_000_000).map(lambda x: x["product_id"])
customers = data.batch(1_000_000).map(lambda x: x["customer_id"])

unique_products = np.unique(np.concatenate(list(products)))
unique_customers = np.unique(np.concatenate(list(customers)))

# Implementing the Model

Architecture 

In [73]:
class RankingModel(tf.keras.Model):

    def __init__(self):
        super().__init__()
        embedding_dimension = 32

    # Compute embeddings for users.
        self.user_embeddings = tf.keras.Sequential([
             tf.keras.layers.experimental.preprocessing.StringLookup(
        vocabulary=unique_customers, mask_token=None),
      tf.keras.layers.Embedding(len(unique_customers) + 1, embedding_dimension)
    ])

    # Compute embeddings for products.
        self.product_embeddings = tf.keras.Sequential([
      tf.keras.layers.experimental.preprocessing.StringLookup(
        vocabulary=unique_products, mask_token=None),
      tf.keras.layers.Embedding(len(unique_products) + 1, embedding_dimension)
    ])

    # Compute predictions.
        self.ratings = tf.keras.Sequential([
      # Learn multiple dense layers.
            tf.keras.layers.Dense(256, activation="relu"),
            tf.keras.layers.Dense(64, activation="relu"),
      # Make rating predictions in the final layer.
            tf.keras.layers.Dense(1)
  ])

    def call(self, inputs):
        customer_id, product_id = inputs
        user_embedding = self.user_embeddings(customer_id)
        product_embedding = self.product_embeddings(product_id)
        return self.ratings(tf.concat([user_embedding, product_embedding], axis=1))

Make a prediction for a specific customer and product

In [74]:
RankingModel()((["18239070"], ["B00LJ86MAY"]))

Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.0135858]], dtype=float32)>

Loss and Metrics 

We'll use it together with the MeanSquaredError Keras loss in order to predict the ratings.

In [75]:
task = tfrs.tasks.Ranking(
  loss = tf.keras.losses.MeanSquaredError(),
  metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

# The Full Model


In [76]:
class AmazonModel(tfrs.models.Model):

    def __init__(self):
        super().__init__()
        self.ranking_model: tf.keras.Model = RankingModel()
        self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
        loss = tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.metrics.RootMeanSquaredError()]
       )

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        rating_predictions = self.ranking_model(
          (features["customer_id"], features["product_id"]))

    # The task computes the loss and the metrics.
        return self.task(labels=features["star_rating"], predictions=rating_predictions)

# Fitting and Evaluating 

After defining the model, we can use standard Keras fitting and evaluation routines to fit and evaluate the model.

Let's first instantiate the model.

In [77]:
model = AmazonModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

Then shuffle, batch, and cache the training and evaluation data.

In [78]:
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()

Train the model

In [79]:
model.fit(cached_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x142d0a4b4c8>

As the model trains, the loss is falling and the RMSE metric is improving.

Finally, we can evaluate our model on the test set:

In [80]:
model.evaluate(cached_test, return_dict=True)



{'root_mean_squared_error': 1.2963738441467285,
 'loss': 1.6715973615646362,
 'regularization_loss': 0,
 'total_loss': 1.6715973615646362}