In [1]:
!pip install -q tensorflow-recommenders

[K     |████████████████████████████████| 85 kB 2.9 MB/s 
[K     |████████████████████████████████| 462 kB 40.5 MB/s 
[?25h

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_recommenders as tfrs

In [3]:
df = pd.read_csv("datakalimat.txt", sep=",")
df.head(5)

Unnamed: 0,user_id,kalimat,kelas
0,1,istri dipukul suami,kekerasan
1,2,anak menerima kekerasan terus menerus oleh ora...,kekerasan
2,3,ayah dianiaya mertua,kekerasan
3,4,bibi dipukul paman,kekerasan
4,5,adik menerima kekerasan terus menerus oleh ora...,kekerasan


In [4]:
df.dtypes

user_id     int64
kalimat    object
kelas      object
dtype: object

In [5]:
df['user_id'] = df['user_id'].astype(str)

In [6]:
df.dtypes

user_id    object
kalimat    object
kelas      object
dtype: object

In [7]:
df.head()

Unnamed: 0,user_id,kalimat,kelas
0,1,istri dipukul suami,kekerasan
1,2,anak menerima kekerasan terus menerus oleh ora...,kekerasan
2,3,ayah dianiaya mertua,kekerasan
3,4,bibi dipukul paman,kekerasan
4,5,adik menerima kekerasan terus menerus oleh ora...,kekerasan


In [8]:
 kalimat_data = df.loc[:,["user_id","kalimat"]]
 kelas_data = df.loc[:,["user_id","kelas"]]

In [9]:
kalimat_data.head(3)

Unnamed: 0,user_id,kalimat
0,1,istri dipukul suami
1,2,anak menerima kekerasan terus menerus oleh ora...
2,3,ayah dianiaya mertua


In [10]:
kelas_data.head(3)

Unnamed: 0,user_id,kelas
0,1,kekerasan
1,2,kekerasan
2,3,kekerasan


In [11]:
ds_kalimat = tf.data.Dataset.from_tensor_slices(dict(kalimat_data))
ds_kelas = tf.data.Dataset.from_tensor_slices(dict(df))

In [12]:
from typing import Dict, Text
classes = ds_kelas.map(lambda x: {
    "kalimat": x["kalimat"],
    "user_id": x["user_id"]
})
sentences = ds_kalimat.map(lambda x: x["kalimat"])

In [13]:
user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(classes.map(lambda x: x["user_id"]))

kalimat_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
kalimat_vocabulary.adapt(sentences)

In [14]:
class KalimatModel(tfrs.Model):
  # We derive from a custom base class to help reduce boilerplate. Under the hood,
  # these are still plain Keras Models.

  def __init__(
      self,
      user_model: tf.keras.Model,
      kalimat_model: tf.keras.Model,
      task: tfrs.tasks.Retrieval):
    super().__init__()

    # Set up user and movie representations.
    self.user_model = user_model
    self.kalimat_model = kalimat_model

    # Set up a retrieval task.
    self.task = task

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    # Define how the loss is computed.

    user_embeddings = self.user_model(features["user_id"])
    kalimat_embeddings = self.kalimat_model(features["kalimat"])

    return self.task(user_embeddings, kalimat_embeddings)

In [15]:
# Define user and movie models.
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocabulary_size(), 64)
])
kalimat_model = tf.keras.Sequential([
    kalimat_vocabulary,
    tf.keras.layers.Embedding(kalimat_vocabulary.vocabulary_size(), 64)
])

# Define your objectives.
task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
    sentences.batch(128).map(kalimat_model)
  )
)

In [20]:
# Create a retrieval model.
model = KalimatModel(user_model, kalimat_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

# Train for 3 epochs.
model.fit(classes.batch(4096), epochs=3)

# Use brute-force search to set up retrieval using the trained representations.
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
index.index_from_dataset(
    sentences.batch(100).map(lambda title: (title, model.kalimat_model(title))))

# Get some recommendations.
_, titles = index(np.array(["5"]))
print(f"Top 3 recommendations for user 5: {titles[0, :3]}")

Epoch 1/3
Epoch 2/3
Epoch 3/3
Top 3 recommendations for user 5: [b'adik menerima kekerasan terus menerus oleh orang tua' b'mata bengkak'
 b'anak menerima kekerasan terus menerus oleh orang tua']
