In [1]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets
!pip install -q scann

[K     |████████████████████████████████| 85 kB 2.7 MB/s 
[K     |████████████████████████████████| 4.3 MB 4.0 MB/s 
[K     |████████████████████████████████| 98 kB 4.6 MB/s 
[K     |████████████████████████████████| 11.2 MB 3.8 MB/s 
[K     |████████████████████████████████| 511.7 MB 5.7 kB/s 
[K     |████████████████████████████████| 5.8 MB 28.6 MB/s 
[K     |████████████████████████████████| 438 kB 50.0 MB/s 
[K     |████████████████████████████████| 1.6 MB 40.3 MB/s 
[?25h

In [2]:
import os
import tempfile

%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

import tensorflow_recommenders as tfrs

plt.style.use('seaborn-whitegrid')

import pathlib
import pandas as pd

np.set_printoptions(precision=4)

# DATASET

In [3]:
dataset = 'dataset.csv'
userid = pd.read_csv(dataset, sep=';', dtype={'User_id':str, 'Class_id':str})
userid.head()

Unnamed: 0,User_id,Class_id,Order_Rating,Subject_id
0,93,19,4.5,1
1,32,3,2.3,3
2,75,83,2.8,5
3,18,12,4.9,1
4,79,28,3.5,2


In [4]:
kelas = 'kelas.csv'
kelas = pd.read_csv(kelas, sep=';', dtype={'User_id':str, 'Class_id':str})
kelas.head()

Unnamed: 0,Class_id
0,68
1,27
2,82
3,90
4,31


In [5]:
userid = tf.data.Dataset.from_tensor_slices(dict(userid))

for feature_batch in userid.take(1):
  for key, value in feature_batch.items():
    print("  {!r:20s}: {}".format(key, value))

  'User_id'           : b'93'
  'Class_id'          : b'19'
  'Order_Rating'      : 4.5
  'Subject_id'        : 1


In [6]:
kelas = tf.data.Dataset.from_tensor_slices(dict(kelas))

for feature_batch in kelas.take(1):
  for key, value in feature_batch.items():
    print("  {!r:20s}: {}".format(key, value))

  'Class_id'          : b'68'


In [7]:
user_id = userid.map(lambda x: {
    "Class_id": x["Class_id"],
    "User_id": x["User_id"],
    "Order_Rating": x["Order_Rating"],
})
class_id = kelas.map(lambda x: x["Class_id"])

In [8]:
rating = np.concatenate(list(userid.map(lambda x: x["Order_Rating"]).batch(100)))

max_rating = rating.max()
min_rating = rating.min()

rating_buckets = np.linspace(
    min_rating, max_rating, num=1000,
)

unique_class_ids = np.unique(np.concatenate(list(class_id.batch(1000))))
unique_user_ids = np.unique(np.concatenate(list(user_id.batch(1_000).map(
    lambda x: x["User_id"]))))

## MODEL

query model

In [9]:
class UserModel(tf.keras.Model):
  
  def __init__(self):
    super().__init__()

    self.user_embedding = tf.keras.Sequential([
        tf.keras.layers.StringLookup(
            vocabulary=unique_user_ids, mask_token=None),
        tf.keras.layers.Embedding(len(unique_user_ids) + 1, 32),
    ])
    self.rating_embedding = tf.keras.Sequential([
        tf.keras.layers.Discretization(rating_buckets.tolist()),
        tf.keras.layers.Embedding(len(rating_buckets) + 1, 32),
    ])
    self.normalized_rating = tf.keras.layers.Normalization(
        axis=None
    )

    self.normalized_rating.adapt(rating)

  def call(self, inputs):
    # Take the input dictionary, pass it through each input layer,
    # and concatenate the result.
    return tf.concat([
        self.user_embedding(inputs["User_id"]),
        self.rating_embedding(inputs["Order_Rating"]),
        tf.reshape(self.normalized_rating(inputs["Order_Rating"]), (-1, 1)),
    ], axis=1)

In [10]:
class QueryModel(tf.keras.Model):

  def __init__(self, layer_sizes):
    
    super().__init__()

    # We first use the user model for generating embeddings.
    self.embedding_model = UserModel()

    # Then construct the layers.
    self.dense_layers = tf.keras.Sequential()

    # Use the ReLU activation for all but the last layer.
    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    # No activation for the last layer.
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))
    
  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

candidate Model

In [11]:
class kelasModel(tf.keras.Model):
  
  def __init__(self):
    super().__init__()

    max_tokens = 10_000

    self.class_embedding = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
          vocabulary=unique_class_ids,mask_token=None),
      tf.keras.layers.Embedding(len(unique_class_ids) + 1, 32)
    ])

    self.class_vectorizer = tf.keras.layers.TextVectorization(
        max_tokens=max_tokens)

    self.class_text_embedding = tf.keras.Sequential([
      self.class_vectorizer,
      tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
      tf.keras.layers.GlobalAveragePooling1D(),
    ])

    self.class_vectorizer.adapt(class_id)

  def call(self, titles):
    return tf.concat([
        self.class_embedding(titles),
        self.class_text_embedding(titles),
    ], axis=1)

In [12]:
class CandidateModel(tf.keras.Model):
 
  def __init__(self, layer_sizes):
    super().__init__()

    self.embedding_model = kelasModel()

    self.dense_layers = tf.keras.Sequential()

    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))
    
  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

combined model

In [13]:
class MainModel(tfrs.models.Model):

  def __init__(self, layer_sizes):
    super().__init__()
    self.query_model = QueryModel(layer_sizes)
    self.candidate_model = CandidateModel(layer_sizes)
    self.task = tfrs.tasks.Retrieval(
        metrics=tfrs.metrics.FactorizedTopK(
            candidates=class_id.batch(128).map(self.candidate_model),
        ),
    )

  def compute_loss(self, features, training=False):
   
    query_embeddings = self.query_model({
        "User_id": features["User_id"],
        "Order_Rating": features["Order_Rating"],
    })
    class_embeddings = self.candidate_model(features["Class_id"])

    return self.task(
        query_embeddings, class_embeddings, compute_metrics=not training)

## Training

In [14]:
tf.random.set_seed(42)
shuffled = user_id.shuffle(8000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(8000)
test = shuffled.skip(2000).take(100)

cached_train = train.shuffle(10000).batch(1024).cache()
cached_test = test.batch(512).cache()

In [23]:
model = MainModel([64, 32])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model.fit(cached_train,epochs=200)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x7f497dec0ad0>

In [26]:
retrieval = tfrs.layers.factorized_top_k.ScaNN(model.query_model, k= 3, num_leaves = 10)

retrieval.index_from_dataset(
  tf.data.Dataset.zip((class_id.batch(1000), class_id.batch(1000).map(model.candidate_model)))
)


<tensorflow_recommenders.layers.factorized_top_k.ScaNN at 0x7f497db2f790>

In [27]:
tf.saved_model.save(retrieval, "/content/sample_data/Model", options=tf.saved_model.SaveOptions(namespace_whitelist=["Scann"]))





INFO:tensorflow:Assets written to: /content/sample_data/Model/assets


INFO:tensorflow:Assets written to: /content/sample_data/Model/assets
