## Multi-task recommenders

In [1]:
from typing import Dict, Text
from firebase import firebase

import os
import pprint
import tempfile
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_recommenders as tfrs

### Preparing the dataset

In [2]:
firebase = firebase.FirebaseApplication('https://thesis-bd8c8-default-rtdb.europe-west1.firebasedatabase.app/', None)
firebase_ratings = firebase.get('/User_Book', None)
firebase_books = firebase.get('/Books', None)

In [3]:
ratings_df = pd.DataFrame.from_dict(firebase_ratings, orient='index')
books_df = pd.DataFrame.from_dict(firebase_books, orient='index')

In [4]:
ratings_df.head()

Unnamed: 0,bookId,isbn,myRate,userId
-NWcEFgMwmdaYc-4ndml,-NWcEFeoVaEgjMwiboCA,9781565841000,5.0,3avc3TUJioP8XGD0bLK9xtV7uIG3
-NWcEGniRUJ0hRQmvclo,-NWcEGmQeH3eiHRZ0w7G,9781929610259,4.5,3avc3TUJioP8XGD0bLK9xtV7uIG3
-NWcEHtzsgGbN7rpUZ6V,-NWcEHsFfJ3yqXud1NGo,9780814326114,4.0,3avc3TUJioP8XGD0bLK9xtV7uIG3
-NWcEJunG4PfX-aAVxiF,-NWcEJqjxIyMQv5izHrS,9780312010447,4.5,3avc3TUJioP8XGD0bLK9xtV7uIG3
-NWcELFol68JZpsfvkyF,-NWcELEEV0QR6EHobxIe,9780143036357,3.5,3avc3TUJioP8XGD0bLK9xtV7uIG3


In [5]:
len(set(books_df["isbn"]))

696

In [6]:
books_df.tail()

Unnamed: 0,author,description,documentId,genre,image,isbn,myRate,pages,publishedYear,rating,title,finishDate,pagesRead,startDate,notes
-NZ6TnNp0OS0Mn3DCvjt,Barbara Fairchild,"""You can always tell a Bon App?tit recipe: It'...",,Cooking,https://books.google.com/books/content?id=7VE-...,9780764596865,,816,2006,4.06,The Bon Appetit Cookbook,,,,
-NZ6To1MjcsmbyVRQ791,Giada De Laurentiis,"Presents techniques of Italian home cookery, i...",,Cooking,https://books.google.com/books/content?id=OVsc...,9781400052585,,256,2005,3.95,Everyday Italian,,,,
-NZ6Tod5icltvscrhwzK,Sophie Braimbridge;Erica Jankovich,A noted chef and a dietician join forces to cr...,,Cooking,https://books.google.com/books/content?id=cWLm...,9781584794943,,144,2006,2.62,Healthy Cooking for IBS,,,,
-NZ6TpPNzFU5ADOOyZJg,Garth Ennis;Jeff Youngquist;John McCrea,The most deadly man in the Marvel Universe--Th...,,Comics & Graphic Novels,https://books.google.com/books/content?id=XD2w...,9780785113447,,144,2004,4.03,The Punisher,,,,
-NZ6TpwbVYK6Pq_jXMwW,Robert Kirkman;Charlie Adlard;Cliff Rathburn,Rick mounts a mission to rescue a crashed heli...,,Comics & Graphic Novels,https://books.google.com/books/content?id=r30q...,9781582406121,,136,2006,4.29,The Best Defense,,,,


In [7]:
len(books_df)

1014

In [8]:
#drop manually added books
books_df = books_df[books_df['rating'] != '']

In [9]:
len(books_df)

1013

In [10]:
books_df = pd.DataFrame(set(books_df["isbn"]),columns=["isbn"])

In [11]:
books_df

Unnamed: 0,isbn
0,09781880685358
1,09781558321250
2,09780451185976
3,09788179921623
4,09780970312532
...,...
690,09780802714626
691,09781561583768
692,09780684854465
693,09780786868698


In [12]:
#transforms dataframes in datasets with tensor
ratings_dataset = tf.data.Dataset.from_tensor_slices(dict(ratings_df))
books_dataset = tf.data.Dataset.from_tensor_slices(dict(books_df))

In [13]:
ratings = ratings_dataset.map(lambda x: {
    "book_isbn": x["isbn"],
    "user_id": x["userId"],
    "user_rating": x["myRate"],
})
books = books_dataset.map(lambda x: x["isbn"])

In [14]:
ratings

<_MapDataset element_spec={'book_isbn': TensorSpec(shape=(), dtype=tf.string, name=None), 'user_id': TensorSpec(shape=(), dtype=tf.string, name=None), 'user_rating': TensorSpec(shape=(), dtype=tf.float64, name=None)}>

In [15]:
books

<_MapDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

In [16]:
len(books)

695

In [17]:
len(ratings)

1014

In [18]:
# Randomly shuffle data and split between train and test.
tf.random.set_seed(42)
shuffled = ratings.shuffle(len(ratings_df), seed=42, reshuffle_each_iteration=False)

train = shuffled.take(800)
test = shuffled.skip(800).take(210)

book_isbns = books.batch(25) #100 50
user_ids = ratings.batch(10_000).map(lambda x: x["user_id"])

unique_book_isbns = np.unique(np.concatenate(list(book_isbns)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

## A multi-task model

In [19]:
class BookModel(tfrs.models.Model):

    def __init__(self, rating_weight: float, retrieval_weight: float) -> None:
        super().__init__()

        embedding_dimension = 64 #32

        # User and book models.
        self.book_model: tf.keras.layers.Layer = tf.keras.Sequential([
          tf.keras.layers.StringLookup(
            vocabulary=unique_book_isbns, mask_token=None),
          tf.keras.layers.Embedding(len(unique_book_isbns) + 1, embedding_dimension)
        ])
        self.user_model: tf.keras.layers.Layer = tf.keras.Sequential([
          tf.keras.layers.StringLookup(
            vocabulary=unique_user_ids, mask_token=None),
          tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
        ])

        # Rating model
        self.rating_model = tf.keras.Sequential([
            tf.keras.layers.Dense(512, activation="relu"),
            tf.keras.layers.Dropout(0.3),
            tf.keras.layers.Dense(256, activation="relu"),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(128, activation="relu"),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(32, activation="relu"),
            tf.keras.layers.Dense(1),
        ])

        # The tasks.
        self.rating_task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
            loss=tf.keras.losses.MeanSquaredError(),
            metrics=[tf.keras.metrics.RootMeanSquaredError()],
        )
        self.retrieval_task: tf.keras.layers.Layer = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=books.batch(16).map(self.book_model) #128 32
            )
        )

        # The loss weights.
        self.rating_weight = rating_weight
        self.retrieval_weight = retrieval_weight

    def call(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
       
        user_embeddings = self.user_model(features["user_id"])
       
        book_embeddings = self.book_model(features["book_isbn"])

        return (
            user_embeddings,
            book_embeddings,
            # We apply the multi-layered rating model to a concatentation of
            # user and book embeddings.
            self.rating_model(
                tf.concat([user_embeddings, book_embeddings], axis=1)
            ),
        )

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:

        ratings = features.pop("user_rating")

        user_embeddings, book_embeddings, rating_predictions = self(features)

        # We compute the loss for each task.
        rating_loss = self.rating_task(
            labels=ratings,
            predictions=rating_predictions,
        )
        retrieval_loss = self.retrieval_task(user_embeddings, book_embeddings)
        # And combine them using the loss weights.
        return (self.rating_weight * rating_loss
                + self.retrieval_weight * retrieval_loss)

## Rating-specialized model

In [20]:
model = BookModel(rating_weight=1.0, retrieval_weight=0.0)
model.compile(optimizer=tf.keras.optimizers.AdamW(0.005))

In [21]:
cached_train = train.shuffle(800).batch(32).cache() #128 64
cached_test = test.batch(16).cache() #64 32

In [None]:
history_rating_model = model.fit(cached_train, epochs= 100)
metrics = model.evaluate(cached_test, return_dict=True)

print(f"Retrieval top-100 accuracy: {metrics['factorized_top_k/top_100_categorical_accuracy']:.3f}.")
print(f"Ranking RMSE: {metrics['root_mean_squared_error']:.3f}.")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100


Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100


Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100

## Retrieval-specialized model

In [None]:
model = BookModel(rating_weight=0.0, retrieval_weight=1.0)
model.compile(optimizer=tf.keras.optimizers.AdamW(0.005))

In [None]:
history_retrieval_model = model.fit(cached_train, epochs= 100)
metrics = model.evaluate(cached_test, return_dict=True)

print(f"Retrieval top-100 accuracy: {metrics['factorized_top_k/top_100_categorical_accuracy']:.3f}.")
print(f"Ranking RMSE: {metrics['root_mean_squared_error']:.3f}.")

### Joint model

In [None]:
model = BookModel(rating_weight=1.0, retrieval_weight=1.0)
model.compile(optimizer=tf.keras.optimizers.AdamW(0.005))

In [None]:
history_joint_model = model.fit(cached_train, epochs= 100)
metrics = model.evaluate(cached_test, return_dict=True)

print(f"Retrieval top-100 accuracy: {metrics['factorized_top_k/top_100_categorical_accuracy']:.3f}.")
print(f"Ranking RMSE: {metrics['root_mean_squared_error']:.3f}.")

### DATA

In [None]:
def draw_plot(name, joint, retrieval, rating):
    epochs = range(1, len(joint) + 1)
    
    plt.plot(epochs, joint, 'b', label= name +' Joint')
    plt.plot(epochs, retrieval, 'y', label=name + ' Retrieval')
    plt.plot(epochs, rating, 'g', label=name + ' Rating')
    plt.title(name)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

In [None]:
RMSE_joint = history_joint_model.history['root_mean_squared_error']
RMSE_retrieval = history_retrieval_model.history['root_mean_squared_error']
RMSE_rating = history_rating_model.history['root_mean_squared_error']

draw_plot('RMSE', RMSE_joint,RMSE_retrieval,RMSE_rating)

In [None]:
total_loss_values_joint = history_joint_model.history['total_loss']
total_loss_values_retrieval = history_retrieval_model.history['total_loss']
total_loss_values_rating = history_rating_model.history['total_loss']

draw_plot('Training Loss', total_loss_values_joint,total_loss_values_retrieval,total_loss_values_rating)

## Making predictions

In [None]:
trained_movie_embeddings, trained_user_embeddings, predicted_rating = model({
      "user_id": np.array(["3avc3TUJioP8XGD0bLK9xtV7uIG3"]),
      "book_isbn": np.array(["09781880685358"])
  })
print("Predicted rating:")
print(predicted_rating)

In [None]:
user_id = ["3avc3TUJioP8XGD0bLK9xtV7uIG3"]
isbn_list = set(books_df["isbn"])
for isbn in isbn_list:
    trained_movie_embeddings, trained_user_embeddings, predicted_rating = model({
      "user_id": np.array(user_id),
      "book_isbn": np.array([isbn])
    })
    print(predicted_rating)

### Save the model 

In [32]:
model.retrieval_task = tfrs.tasks.Retrieval()  # Removes the metrics.
model.compile()
model.save("final_model")

INFO:tensorflow:Assets written to: final_model\assets


INFO:tensorflow:Assets written to: final_model\assets


In [33]:
# load model
model = tf.keras.models.load_model("final_model")

In [34]:
# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model("final_model") # path to the SavedModel directory
tflite_model = converter.convert()

# Save the model.
with open('final_model.tflite', 'wb') as f:
    f.write(tflite_model)

### Tf Lite

In [35]:
interpreter = tf.lite.Interpreter(model_path="final_model.tflite")
interpreter.allocate_tensors()

#Get input details
input_details = interpreter.get_input_details()
for input_tensor in input_details:
    print("Input name:", input_tensor["name"])
    print("Input shape:", input_tensor["shape"])
    print("Input data type:", input_tensor["dtype"])
    print()
#Get output details
output_details = interpreter.get_output_details()
for output_tensor in output_details:
    print("Output name:", output_tensor["name"])
    print("Output shape:", output_tensor["shape"])
    print("Output data type:", output_tensor["dtype"])
    print()

Input name: serving_default_book_isbn:0
Input shape: [1]
Input data type: <class 'numpy.bytes_'>

Input name: serving_default_user_id:0
Input shape: [1]
Input data type: <class 'numpy.bytes_'>

Output name: StatefulPartitionedCall:0
Output shape: []
Output data type: <class 'numpy.float32'>

Output name: StatefulPartitionedCall:2
Output shape: [1 1]
Output data type: <class 'numpy.float32'>

Output name: StatefulPartitionedCall:1
Output shape: []
Output data type: <class 'numpy.float32'>



In [36]:
# Prepare the input data
input_data_isbn = np.array([b'09780143036357'], dtype=np.bytes_)
input_data_user_id = np.array(['3avc3TUJioP8XGD0bLK9xtV7uIG3'], dtype=np.bytes_)

input_details = interpreter.get_input_details()
interpreter.set_tensor(input_details[0]['index'], input_data_isbn)
interpreter.set_tensor(input_details[1]['index'], input_data_user_id)

# Run the inference
interpreter.invoke()

# Retrieve the output results
output_details = interpreter.get_output_details()

output_data_prediction = interpreter.get_tensor(output_details[0]['index'])
output_data_probabilities = interpreter.get_tensor(output_details[1]['index'])
output_data_score = interpreter.get_tensor(output_details[2]['index'])

# Process the output
#prediction = output_data_prediction.squeeze()
probabilities = output_data_probabilities.squeeze()
#score = output_data_score.squeeze()

# Print the results
#print("Prediction:", prediction)
print("Probability:", probabilities)
#print("Score:", score)

Probability: 3.3923666


### Tensorflow recommenders 

#### Brute Force

In [37]:
# Create a model that takes in raw query features, and
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
# recommends books out of the entire books dataset.
index.index_from_dataset(
  tf.data.Dataset.zip((books.batch(100), books.batch(100).map(model.book_model)))
)

# Get recommendations.
_, isbns = index(np.array(["Pgzb07La4DUNOhYPzYXHA7CdfNi1"]))
print(f"Recommendations for user: {isbns[0, :10]}")

Recommendations for user: [b'09781857024074' b'09780375701801' b'09780465014903' b'09780140286014'
 b'09781592289806' b'09781859843406' b'09780618257768' b'09780590428880'
 b'09780226142814' b'09780195309683']


#### Save the brute force model

In [38]:
index.save("final_model")









INFO:tensorflow:Assets written to: final_model\assets


INFO:tensorflow:Assets written to: final_model\assets










In [39]:
# test loading 
loaded = tf.saved_model.load("final_model")

# Pass a user id in, get top predicted movie titles back.
scores, isbns = loaded(["Pgzb07La4DUNOhYPzYXHA7CdfNi1"])

print(f"Recommendations: {isbns[0][:5]}")

Recommendations: [b'09781857024074' b'09780375701801' b'09780465014903' b'09780140286014'
 b'09781592289806']


#### TFLite for the model 

In [40]:
# Convert the model
converter = tf.lite.TFLiteConverter.from_saved_model("final_model") # path to the SavedModel directory
tflite_model = converter.convert()

# Save the model.
with open('final_model.tflite', 'wb') as f:
    f.write(tflite_model)

#### Tf lite testing 

In [41]:
interpreter = tf.lite.Interpreter(model_path="final_model.tflite")
interpreter.allocate_tensors()

#Get input details
input_details = interpreter.get_input_details()
for input_tensor in input_details:
    print("Input name:", input_tensor["name"])
    print("Input shape:", input_tensor["shape"])
    print("Input data type:", input_tensor["dtype"])
    print()
#Get output details
output_details = interpreter.get_output_details()
for output_tensor in output_details:
    print("Output name:", output_tensor["name"])
    print("Output shape:", output_tensor["shape"])
    print("Output data type:", output_tensor["dtype"])
    print()

Input name: serving_default_input_1:0
Input shape: [1]
Input data type: <class 'numpy.bytes_'>

Output name: StatefulPartitionedCall_1:0
Output shape: [ 1 10]
Output data type: <class 'numpy.float32'>

Output name: StatefulPartitionedCall_1:1
Output shape: [ 1 10]
Output data type: <class 'numpy.bytes_'>



In [42]:
# Prepare the input data
input_data = np.array(["zwVJUfdC0oa9hWWp9uK0hRTM71j1"], dtype=np.bytes_)

input_details = interpreter.get_input_details()
interpreter.set_tensor(input_details[0]['index'], input_data)

# Run the inference
interpreter.invoke()

# Retrieve the output results
output_details = interpreter.get_output_details()

output_data_prediction = interpreter.get_tensor(output_details[0]['index'])
output_data_classes = interpreter.get_tensor(output_details[1]['index'])

# Process the output
#prediction = output_data_prediction.squeeze()
classes = output_data_classes.squeeze().astype(str)

# Print the results
#print("Prediction:", prediction)
print("Classes:", classes)

Classes: ['09780851621814' '09780446617451' '09780253203182' '09780747573623'
 '09780143104902' '09781841492667' '09781572244252' '09780452270848'
 '09781582406930' '09780618710539']


In [43]:
import firebase_admin
from firebase_admin import ml
from firebase_admin import credentials

firebase_admin.initialize_app(
  credentials.Certificate('thesis-bd8c8-firebase-adminsdk-fqj6e-e1d094b473.json'),
  options={
      'storageBucket': 'thesis-bd8c8.appspot.com',
  })


<firebase_admin.App at 0x135ea8eb190>

In [44]:
#Upload model

In [45]:
source = ml.TFLiteGCSModelSource.from_tflite_model_file('final_model.tflite')
tflite_format = ml.TFLiteFormat(model_source=source)
model = ml.Model(display_name="final_model", model_format=tflite_format)
new_model = ml.create_model(model)
ml.publish_model(new_model.model_id)
print(new_model.model_id)

21877933


In [46]:
#Update model

In [47]:
model = ml.get_model(new_model.model_id)
source = ml.TFLiteGCSModelSource.from_tflite_model_file('final_model.tflite')
model.mode_format = ml.TFLiteFormat(model_source=source)
model.display_name = "final_model"
updated_model = ml.update_model(model)
ml.publish_model(updated_model.model_id)

<firebase_admin.ml.Model at 0x135e9707210>