In [None]:
!pip install tensorflow-recommenders

In [2]:
import numpy as np
import pandas as pd
from typing import Dict, Text

import tensorflow as tf
import tensorflow_recommenders as tfrs

from sklearn.preprocessing import OrdinalEncoder

In [3]:
df = pd.read_csv('/content/drive/MyDrive/Notebook files./Recommender system datasets/Digital_Music.csv')

df = df[:1000]
df.head()

Unnamed: 0,0001388703,A1ZCPG3D3HGRSS,5.0,1387670400
0,1388703,AC2PL52NKPL29,5.0,1378857600
1,1388703,A1SUZXBDZSDQ3A,5.0,1362182400
2,1388703,A3A0W7FZXM0IZW,5.0,1354406400
3,1388703,A12R54MKO17TW0,5.0,1325894400
4,1388703,A25ZT87OMIPLNX,5.0,1247011200


In [4]:
df.columns = ['BookID', 'ReviewerID', 'Rating', 'UnixReviewTime']
df

Unnamed: 0,BookID,ReviewerID,Rating,UnixReviewTime
0,0001388703,AC2PL52NKPL29,5.0,1378857600
1,0001388703,A1SUZXBDZSDQ3A,5.0,1362182400
2,0001388703,A3A0W7FZXM0IZW,5.0,1354406400
3,0001388703,A12R54MKO17TW0,5.0,1325894400
4,0001388703,A25ZT87OMIPLNX,5.0,1247011200
...,...,...,...,...
995,5559166928,A1WVUHB2O38GMP,5.0,1508803200
996,5559166928,A3N4YNH7EZJH31,3.0,1508371200
997,5559166928,A7TEC03UV5IIQ,5.0,1507507200
998,5559166928,A3E6PN9V6XJVC5,5.0,1507075200


In [5]:
# #Encode columns with categorical variables.
# s = (df.dtypes == 'object')
# object_cols = list(s[s].index)

# print(f'Categorical variables are:{object_cols}')

In [6]:
# ordinal_encoder = OrdinalEncoder()
# df[object_cols] = ordinal_encoder.fit_transform(df[object_cols])

In [7]:
combined = df.drop_duplicates(['ReviewerID', 'BookID'])
user_book_matrix = combined.pivot(index = 'ReviewerID', columns = 'BookID', values = 'Rating')
user_book_matrix.fillna(0, inplace = True)

user_book_matrix.head()

BookID,0001377647,0001388703,0001526146,0001527134,0006920055,0006935257,0615897398,0760103453,0760131694,0830838015,...,555757266X,5557575758,5557585400,5557706259,5557721835,555820690X,5558870850,5558925922,5559166928,5559571814
ReviewerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A106GSY0H5E2R4,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A106RJ1JBUA0TO,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A10CSSGW3ESBCA,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A10GWU3EY05QBN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0
A10LP9BBQWYP6N,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
df['Rating'].value_counts()

5.0    851
4.0     74
3.0     39
1.0     24
2.0     12
Name: Rating, dtype: int64

In [9]:
#Build vocabularies to convert user IDs and book titles to integers for embedding layers.
user_ids_vocabulary = tf.keras.layers.StringLookup()
user_ids_vocabulary.adapt(df['ReviewerID'])

book_titles_vocabulary = tf.keras.layers.StringLookup()
book_titles_vocabulary.adapt(df['BookID'])

In [10]:
#Define a tfrs model.
class BookReviewModel(tfrs.Model):

  def __init__(
      self, user_model: tf.keras.Model, book_model: tf.keras.Model, task: tfrs.tasks.Retrieval):
    super().__init__()

    #Set up user and book representations.
    self.user_model = user_model
    self.book_model = book_model

    #Set up a retrieval task.
    self.task = task

  def compute_loss(self, features: Dict[Text, tf.Tensor], training = False):
    #Define how the loss is computed.
    user_embeddings = self.user_model(df['ReviewerID'])  
    book_embeddings = self.book_model(df['BookID'])

    return self.task(user_embeddings, book_embeddings)

In [19]:
#Convert 'book_id' to a tf.data object.

book_ID = tf.data.Dataset.from_tensor_slices(df['BookID'])
reviews = tf.data.Dataset.from_tensor_slices(df['ReviewerID'])

In [12]:
#Define the user and book models.
user_model = tf.keras.Sequential([
                                  user_ids_vocabulary,
                                  tf.keras.layers.Embedding(user_ids_vocabulary.vocab_size(), 64)
])

book_model = tf.keras.Sequential([
                                  book_titles_vocabulary,
                                  tf.keras.layers.Embedding(book_titles_vocabulary.vocab_size(), 64)
])

#Define your objectives.
task = tfrs.tasks.Retrieval(metrics = tfrs.metrics.FactorizedTopK(
    book_ID.batch(128).map(book_model)
))



In [13]:
#Create a retrieval model.
model = BookReviewModel(user_model, book_model, task)
model.compile(optimizer = tf.keras.optimizers.Adagrad(learning_rate = 0.5))



In [14]:
#Train for 3 epochs.
model.fit(reviews.batch(100), epochs = 3)

Epoch 1/3


UnimplementedError: ignored

In [15]:
print(reviews)

<TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>


In [16]:
df['ReviewerID'].dtypes

dtype('O')

In [17]:
df.dtypes

BookID             object
ReviewerID         object
Rating            float64
UnixReviewTime      int64
dtype: object