<a href="https://colab.research.google.com/github/Malthr/sembago-ml/blob/main/Ranking_stage.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [54]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets

In [55]:
import os
import pprint
import pandas as pd

from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
from google.colab import drive

In [56]:
drive.mount('/content/drive')
%cd /content/drive/Shareddrives/Machine Learning CH2-PS393

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/Shareddrives/Machine Learning CH2-PS393


In [57]:
# Convert CSV Dataset into TensorFlow Dataset
def load_dataset(file_path, features, labels=None):
  df = pd.read_csv(file_path)

  features = df[features]

  if labels:
    labels = df[labels]
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
  else:
    dataset = tf.data.Dataset.from_tensor_slices(dict(features))

  return dataset

In [58]:
ratings = load_dataset('dummy_ratings.csv', ['id_user', 'id_produk', 'rating'])

In [59]:
# Check Dataset Length
print(len(ratings))

500


In [60]:
ratings = ratings.map(lambda x: {
    "id_produk": x["id_produk"],
    "id_user": x["id_user"],
    "rating": x["rating"]
})

In [61]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(1_000, seed=42, reshuffle_each_iteration=False)

train_percentage = 80

train_size = int(train_percentage / 100 * len(ratings))
test_size = int((100 - train_percentage) / 100 * len(ratings))

train = shuffled.take(train_size)
test = shuffled.skip(train_size).take(test_size)

In [62]:
# Check Splitted Data Length
print(len(train))
print(len(test))

400
100


In [63]:
id_produks = ratings.batch(1_000).map(lambda x: x["id_produk"])
id_users = ratings.batch(1_000).map(lambda x: x["id_user"])

unique_id_produks = np.unique(np.concatenate(list(id_produks)))
unique_id_users = np.unique(np.concatenate(list(id_users)))

In [141]:
class RankingModel(tf.keras.Model):

  def __init__(self):
    super().__init__()
    embedding_dimension = 128

    # Compute embeddings for users.
    self.user_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_id_users, mask_token=None),
      tf.keras.layers.Embedding(len(unique_id_users) + 1, embedding_dimension)
    ])

    # Compute embeddings for produks.
    self.produk_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_id_produks, mask_token=None),
      tf.keras.layers.Embedding(len(unique_id_produks) + 1, embedding_dimension)
    ])

    # Compute predictions.
    self.ratings = tf.keras.Sequential([
      # Learn multiple dense layers.
      tf.keras.layers.Dense(32, activation="relu"),
      tf.keras.layers.Dense(32, activation="relu"),
      # Make rating predictions in the final layer.
      tf.keras.layers.Dense(1)
  ])

  def call(self, inputs):

    id_user, id_produk = inputs

    user_embedding = self.user_embeddings(id_user)
    produk_embedding = self.produk_embeddings(id_produk)

    return self.ratings(tf.concat([user_embedding, produk_embedding], axis=1))

In [142]:
task = tfrs.tasks.Ranking(
  loss = tf.keras.losses.MeanSquaredError(),
  metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

In [143]:
class ProdRankingRecModel(tfrs.models.Model):

  def __init__(self):
    super().__init__()
    self.ranking_model: tf.keras.Model = RankingModel()
    self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
      loss = tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )

  def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
    return self.ranking_model(
        (features["id_user"], features["id_produk"]))

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    labels = features.pop("rating")

    rating_predictions = self(features)

    # The task computes the loss and the metrics.
    return self.task(labels=labels, predictions=rating_predictions)

In [144]:
model = ProdRankingRecModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.001))

In [145]:
cached_train = train.shuffle(100).batch(16).cache()
cached_test = test.batch(16).cache()

In [146]:
# Check Cached Data Length
print(len(cached_train))
print(len(cached_test))

25
7


In [147]:
model.fit(cached_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7befb1665cf0>

In [148]:
model.evaluate(cached_test, return_dict=True)



{'root_mean_squared_error': 1.6942249536514282,
 'loss': 2.581789970397949,
 'regularization_loss': 0,
 'total_loss': 2.581789970397949}

In [149]:
tf.saved_model.save(model, "ranking_model")