In [None]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
df_sma = pd.read_csv('/content/dataset_sma.csv')
df_sma.head()

Unnamed: 0,jenjang_student,gender_student,daerah_student,id_tutor,pelajaran
0,10,Male,Jakarta,70,Bahasa Indonesia
1,12,Male,Depok,113,Sosiologi
2,10,Female,Tanggerang,1,Matematika
3,12,Female,Bogor,32,Kimia
4,11,Female,Jakarta,102,Geografi


In [None]:
print(type(df_sma))

<class 'pandas.core.frame.DataFrame'>


In [None]:
df_sma['jenjang_student'] = df_sma['jenjang_student'].astype(float)
df_sma['id_tutor'] = df_sma['id_tutor'].astype(float)

In [None]:
print(type(df_sma))

<class 'pandas.core.frame.DataFrame'>


In [None]:
df_sma.head()
print('\n\n: ', df_sma.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   jenjang_student  1000 non-null   float64
 1   gender_student   1000 non-null   object 
 2   daerah_student   1000 non-null   object 
 3   id_tutor         1000 non-null   float64
 4   pelajaran        1000 non-null   object 
dtypes: float64(2), object(3)
memory usage: 39.2+ KB


:  None


In [None]:
df_sma['gender_student'] = df_sma['gender_student'].map(gender_dict)
df_sma['daerah_student'] = df_sma['daerah_student'].map(region_dict)
df_sma['pelajaran'] = df_sma['pelajaran'].map(subject_dict)
df_sma

Unnamed: 0,jenjang_student,gender_student,daerah_student,id_tutor,pelajaran
0,10,0,0,70,4
1,12,0,2,113,7
2,10,1,3,1,0
3,12,1,1,32,2
4,11,1,0,102,6
...,...,...,...,...,...
995,11,1,3,93,6
996,12,0,0,88,5
997,11,0,4,58,3
998,11,1,0,52,3


In [None]:
df_student = df_sma[['jenjang_student','gender_student', 'daerah_student']]
df_tutor = df_sma[['id_tutor','pelajaran']]
df_student

Unnamed: 0,jenjang_student,gender_student,daerah_student
0,10.0,Male,Jakarta
1,12.0,Male,Depok
2,10.0,Female,Tanggerang
3,12.0,Female,Bogor
4,11.0,Female,Jakarta
...,...,...,...
995,11.0,Female,Tanggerang
996,12.0,Male,Jakarta
997,11.0,Male,Bekasi
998,11.0,Female,Jakarta


In [None]:
df_student.astype(float)

AttributeError: '_TensorSliceDataset' object has no attribute 'astype'

In [None]:
df_tutor

Unnamed: 0,id_tutor,pelajaran
0,70.0,Bahasa Indonesia
1,113.0,Sosiologi
2,1.0,Matematika
3,32.0,Kimia
4,102.0,Geografi
...,...,...
995,93.0,Geografi
996,88.0,Bahasa Inggris
997,58.0,Biologi
998,52.0,Biologi


In [None]:
columns_to_convert = ['jenjang_student', 'gender_student', 'daerah_student']
combined_values_student = df_student[columns_to_convert].values.flatten()

In [None]:
combined_values_student

array([10.0, 'Male', 'Jakarta', ..., 10.0, 'Female', 'Jakarta'],
      dtype=object)

In [None]:
columns_to_convert = ['id_tutor', 'pelajaran']
combined_values_tutor = df_tutor[columns_to_convert].values.flatten()

In [None]:
combined_values_tutor

array([70.0, 'Bahasa Indonesia', 113.0, ..., 'Biologi', 65.0,
       'Bahasa Indonesia'], dtype=object)

In [None]:
from typing import Dict, Text

import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

In [None]:
def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    dataset = tf.data.Dataset.from_tensor_slices(dict(dataframe))
    return dataset

In [None]:
df_tutor = dataframe_to_dataset(df_tutor)
df_student = dataframe_to_dataset(df_student)

In [None]:
class StudentTutor(tfrs.Model):
  def __init__(self, student_model: tf.keras.Model,
      tutor_model: tf.keras.Model,
      task: tfrs.tasks.Retrieval):
    super().__init__()

    self.tutor_model = tutor_model
    self.student_model = student_model
    self.task = task

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
      tutor_embeddings = self.tutor_model(features[["id_tutor", "pelajaran"]])
      student_embeddings = self.student_model(features[["daerah_student", "gender_student", "daerah_student"]])

      return self.task(tutor_embeddings, student_embeddings)

In [None]:
student_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
student_ids_vocabulary.adapt(combined_values_student)

tutor_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
tutor_titles_vocabulary.adapt(combined_values_tutor)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).

In [None]:
# define model
tutor_model = tf.keras.Sequential([df_tutor,
                                   tf.keras.layers.Embedding(df_tutor.vocab_size(), 64)
                                   ])

student_model = tf.keras.Sequential([df_student,
                                   tf.keras.layers.Embedding(df_student.vocab_size(), 64)
                                   ])
task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
    df_tutor.batch(128).map(tutor_model)
  )
)

AttributeError: 'DataFrame' object has no attribute 'vocab_size'

In [None]:
class UserModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.jenjang_student_embedding = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.IntegerLookup(
                max_tokens=len(df_sma['jenjang_student'].unique())+1),
            tf.keras.layers.Embedding(len(df_sma['jenjang_student'].unique())+1, 32)
        ])
        self.gender_student_embedding = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.IntegerLookup(
                max_tokens=len(df_sma['gender_student'].unique())+1),
            tf.keras.layers.Embedding(len(df_sma['gender_student'].unique())+1, 32)
        ])
        self.daerah_student_embedding = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.IntegerLookup(
                max_tokens=len(df_sma['daerah_student'].unique())+1),
            tf.keras.layers.Embedding(len(df_sma['daerah_student'].unique())+1, 32)
        ])
        self.pelajaran_embedding = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.IntegerLookup(
                max_tokens=len(df_sma['pelajaran'].unique())+1),
            tf.keras.layers.Embedding(len(df_sma['pelajaran'].unique())+1, 32)
        ])

    def call(self, inputs):
        return tf.concat([
            self.jenjang_student_embedding(inputs['jenjang_student']),
            self.gender_student_embedding(inputs['gender_student']),
            self.daerah_student_embedding(inputs['daerah_student']),
            self.pelajaran_embedding(inputs['pelajaran'])
        ], axis=1)

In [None]:
class ItemModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.id_tutor_embedding = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.IntegerLookup(
                max_tokens=len(df_sma['id_tutor'].unique())+1),
            tf.keras.layers.Embedding(len(df_sma['id_tutor'].unique())+1, 32)
        ])

    def call(self, inputs):
        return self.id_tutor_embedding(inputs)

In [None]:
class RecommenderModel(tfrs.models.Model):
    def __init__(self):
        super().__init__()
        self.user_model: tf.keras.Model = UserModel()
        self.item_model: tf.keras.Model = ItemModel()
        self.task: tf.keras.layers.Layer = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=tf.data.Dataset.from_tensor_slices(y_train).batch(128).map(self.item_model)
            )
        )

    def compute_loss(self, features, training=False):
        user_embeddings = self.user_model(features)
        positive_item_embeddings = self.item_model(features['id_tutor'])
        return self.task(user_embeddings, positive_item_embeddings)

In [None]:
feature_columns = ['jenjang_student', 'gender_student', 'daerah_student', 'pelajaran']
target_column = 'id_tutor'


X = df_sma[feature_columns]
y = df_sma[target_column]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


train = tf.data.Dataset.from_tensor_slices((dict(X_train), y_train))
test = tf.data.Dataset.from_tensor_slices((dict(X_test), y_test))


train = train.batch(32)
test = test.batch(32)

In [None]:
df_sma.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   jenjang_student  1000 non-null   int64 
 1   gender_student   1000 non-null   object
 2   daerah_student   1000 non-null   object
 3   id_tutor         1000 non-null   int64 
 4   pelajaran        1000 non-null   object
dtypes: int64(2), object(3)
memory usage: 39.2+ KB


In [None]:
model = RecommenderModel()


model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))


model.fit(train, epochs=3)

Epoch 1/3


TypeError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.10/dist-packages/tensorflow_recommenders/models/base.py", line 68, in train_step
        loss = self.compute_loss(inputs, training=True)
    File "<ipython-input-8-a06579a54a2f>", line 13, in compute_loss
        user_embeddings = self.user_model(features)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/tmp/__autograph_generated_file08k8vss8.py", line 12, in tf__call
        retval_ = ag__.converted_call(ag__.ld(tf).concat, ([ag__.converted_call(ag__.ld(self).jenjang_student_embedding, (ag__.ld(inputs)['jenjang_student'],), None, fscope), ag__.converted_call(ag__.ld(self).gender_student_embedding, (ag__.ld(inputs)['gender_student'],), None, fscope), ag__.converted_call(ag__.ld(self).daerah_student_embedding, (ag__.ld(inputs)['daerah_student'],), None, fscope), ag__.converted_call(ag__.ld(self).pelajaran_embedding, (ag__.ld(inputs)['pelajaran'],), None, fscope)],), dict(axis=1), fscope)

    TypeError: Exception encountered when calling layer 'user_model' (type UserModel).
    
    in user code:
    
        File "<ipython-input-6-8c71923d9d6c>", line 30, in call  *
            self.pelajaran_embedding(inputs['pelajaran'])
    
        TypeError: tuple indices must be integers or slices, not str
    
    
    Call arguments received by layer 'user_model' (type UserModel):
      • inputs=({'jenjang_student': 'tf.Tensor(shape=(None,), dtype=int64)', 'gender_student': 'tf.Tensor(shape=(None,), dtype=string)', 'daerah_student': 'tf.Tensor(shape=(None,), dtype=string)', 'pelajaran': 'tf.Tensor(shape=(None,), dtype=string)'}, 'tf.Tensor(shape=(None,), dtype=int64)')
