In [2]:
from typing import Dict, Text
import numpy as np
import tensorflow as tf
import pandas as pd
import tensorflow_recommenders as tfrs

In [3]:
df = pd.read_csv('Dataset/fiction dataset - Sheet1.csv')
df.head(2)

Unnamed: 0,fiction_id,title,overview,language,release_date,latest_update,tags,genres,chapter
0,1,Kejahatan Cinta,"Seorang detektif swasta, Anya, sedang menyelid...",Indonesia,2023-10-01,2023-10-06,"detektif,pembunuhan,misteri,cinta,romansa","romansa,misteri",2
1,2,Raja Terakhir,"Di dunia yang dikuasai oleh iblis, seorang pem...",Indonesia,2023-10-02,2023-10-02,"fantasi,aksi,petualangan,pertarungan,kekuatan","fantasi,aksi",1


In [4]:
# read data rating
r_df = pd.read_csv('Dataset/Rating Dataset - Sheet1.csv')
rating_data = r_df[['user_id', 'fiction_id', 'rating']]
rating_data.head(3)

Unnamed: 0,user_id,fiction_id,rating
0,29,39,0
1,17,44,4
2,94,59,0


Merger

In [5]:
rating_data = rating_data.merge(df[['fiction_id', 'title', 'genres', 'overview']], left_on='fiction_id', right_on='fiction_id')
rating_data.head(3)

Unnamed: 0,user_id,fiction_id,rating,title,genres,overview
0,29,39,0,Pangeran yang Tertukar,"romansa,petualangan,aksi",Seorang gadis muda menemukan pangeran yang ter...
1,100,39,1,Pangeran yang Tertukar,"romansa,petualangan,aksi",Seorang gadis muda menemukan pangeran yang ter...
2,45,39,2,Pangeran yang Tertukar,"romansa,petualangan,aksi",Seorang gadis muda menemukan pangeran yang ter...


In [6]:
fanfic_df = df[['fiction_id', 'title']]
fanfic_df.head(2)

Unnamed: 0,fiction_id,title
0,1,Kejahatan Cinta
1,2,Raja Terakhir


In [70]:
rating_data['user_id'] = rating_data['user_id'].astype(str)

ratings = tf.data.Dataset.from_tensor_slices(dict(rating_data[['user_id', 'title', 'rating']]))
fanfics = tf.data.Dataset.from_tensor_slices(dict(fanfic_df[['title']]))

ratings = ratings.map(lambda x: {
    'title': x['title'],
    'user_id': x['user_id'],
    'rating': float(x['rating'])
})

fanfics = fanfics.map(lambda x: x['title'])

In [71]:
print('Total Data: {}'.format(len(ratings)))

Total Data: 5027


In [72]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(5027, seed=42, reshuffle_each_iteration=False)

train = ratings.take(4017)
test = ratings.skip(4017).take(1010)

In [119]:
fanfic_titles = fanfics.batch(16)
user_ids = ratings.batch(16).map(lambda x: x['user_id'])

In [120]:
unique_fanfics_titles = np.unique(np.concatenate(list(fanfic_titles)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

print('Unique Fanfic: {}'.format(len(unique_fanfics_titles)))
print('Unique users: {}'.format(len(unique_user_ids)))

Unique Fanfic: 99
Unique users: 100


Note: Ada judul yang sama

In [121]:
class FanficsModel(tfrs.models.Model):

  def __init__(self, rating_weight: float, retrieval_weight: float) -> None:

    super().__init__()

    embedding_dimension = 32

    # User and fanfic models.
    self.fanfic_model: tf.keras.layers.Layer = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_fanfics_titles, mask_token=None),
      tf.keras.layers.Embedding(len(unique_fanfics_titles) + 1, embedding_dimension)
    ])
    self.user_model: tf.keras.layers.Layer = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_user_ids, mask_token=None),
      tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
    ])

    self.rating_model = tf.keras.Sequential([
      tf.keras.layers.Dense(256, activation='relu'),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(128, activation='relu'),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(64, activation='relu'),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(32, activation='relu'),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(1),
    ])

    self.rating_task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
        loss=tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.metrics.MeanAbsoluteError()],
    )
    self.retrieval_task: tf.keras.layers.Layer = tfrs.tasks.Retrieval(
        metrics=tfrs.metrics.FactorizedTopK(
            candidates=fanfics.batch(128).map(self.fanfic_model)
        )
    )

    self.rating_weight = rating_weight
    self.retrieval_weight = retrieval_weight

  def call(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
    # We pick out the user features and pass them into the user model.
    user_embeddings = self.user_model(features['user_id'])
    # And pick out the fanfic features and pass them into the fanfic model.
    fanfic_embeddings = self.fanfic_model(features['title'])
    
    return (
        user_embeddings,
        fanfic_embeddings,
        # We apply the multi-layered rating model to a concatentation of
        # user and fanfic embeddings.
        self.rating_model(
            tf.concat([user_embeddings, fanfic_embeddings], axis=1)
        ),
    )

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:

    ratings = features.pop('rating')

    user_embeddings, fanfic_embeddings, rating_predictions = self(features)

    # We compute the loss for each task.
    rating_loss = self.rating_task(
        labels=ratings,
        predictions=rating_predictions,
    )
    retrieval_loss = self.retrieval_task(user_embeddings, fanfic_embeddings)

    # And combine them using the loss weights.
    return (self.rating_weight * rating_loss
            + self.retrieval_weight * retrieval_loss)

In [122]:
model = FanficsModel(rating_weight=0.5, retrieval_weight=0.5)
model.compile(optimizer=tf.keras.optimizers.Adagrad(1e-3))

cached_train = train.shuffle(5027).batch(32).cache()
cached_test = test.batch(32).cache()

model.fit(cached_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x15d09653ad0>

In [123]:
metrics = model.evaluate(cached_test, return_dict=True)

print(f"\nRetrieval top-10 accuracy: {metrics['factorized_top_k/top_10_categorical_accuracy']:.3f}")
print(f"Ranking MAE: {metrics['mean_absolute_error']:.3f}")


Retrieval top-10 accuracy: 0.100
Ranking MAE: 1.496


In [124]:
def predict_fanfic(user, top_n=3):
    # Create a model that takes in raw query features, and
    index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
    # recommends fanfics out of the entire fanfics dataset.
    index.index_from_dataset(
      tf.data.Dataset.zip((fanfics.batch(100), fanfics.batch(100).map(model.fanfic_model)))
    )

    # Get recommendations.
    _, titles = index(tf.constant([str(user)]))
    
    print('Top {} recommendations for user {}:\n'.format(top_n, user))
    for i, title in enumerate(titles[0, :top_n].numpy()):
        print('{}. {}'.format(i+1, title.decode("utf-8")))

def predict_rating(user, fanfic):
    trained_fanfic_embeddings, trained_user_embeddings, predicted_rating = model({
          "userId": np.array([str(user)]),
          "original_title": np.array([fanfic])
      })
    print("Predicted rating for {}: {}".format(fanfic, predicted_rating.numpy()[0][0]))

In [125]:
predict_fanfic(5, 10)

Top 10 recommendations for user 5:

1. Perjalanan ke Dunia Bawah
2. Detektif Hewan
3. Ksatria Tanpa Pedang
4. Pangeran yang Tertukar
5. Perjuangan Seorang Atlet
6. Perjalanan Menembus Dimensi
7. Kisah Cinta yang Tak Terlupakan
8. Keajaiban Cinta
9. Penyelamat Dunia
10. Atlantea
