## Recommender System Model

## Menginisiasi library yang dibutuhkan

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras

## Bagian preprocessing data

In [2]:
# Load item dataset
item_data = pd.read_csv('./data/article.csv')

print(item_data.head())
#check item_data data type and shape
print(item_data.dtypes)
print(item_data.shape)

                                            title  sports  cooking  books  \
0                     How to Train for a Marathon    0.80     0.05   0.05   
1              The Best Recipes for a Cozy Winter    0.05     0.80   0.05   
2                        The Top 10 Books of 2023    0.05     0.05   0.80   
3  The Oscars 2023: Predictions and Controversies    0.05     0.05   0.05   
4  The Rise of Indie Games in the Gaming Industry    0.05     0.05   0.05   

   cinema  gaming  
0    0.05    0.05  
1    0.05    0.05  
2    0.05    0.05  
3    0.80    0.05  
4    0.05    0.80  
title       object
sports     float64
cooking    float64
books      float64
cinema     float64
gaming     float64
dtype: object
(49, 6)


In [3]:
#create user_data dummy dataset
rng = np.random.default_rng(seed=42)
user_data = pd.DataFrame(rng.random((49,5)), columns=['sports', 'cooking', 'books', 'cinema', 'gaming'])
print(user_data.head())
#check user_data data type and shape
#shape must be (49,5), in line with the item_data shape
print(user_data.dtypes)
print(user_data.shape)


     sports   cooking     books    cinema    gaming
0  0.773956  0.438878  0.858598  0.697368  0.094177
1  0.975622  0.761140  0.786064  0.128114  0.450386
2  0.370798  0.926765  0.643865  0.822762  0.443414
3  0.227239  0.554585  0.063817  0.827631  0.631664
4  0.758088  0.354526  0.970698  0.893121  0.778383
sports     float64
cooking    float64
books      float64
cinema     float64
gaming     float64
dtype: object
(49, 5)


In [4]:
#create a dataframe consist of only titles
#for testing purposes
item_data_popped = item_data.pop('title')

In [5]:
#convert item features to numpy array
item_features = item_data[['sports', 'cooking', 'books', 'cinema', 'gaming']].values.astype(np.float32)
print(item_features[:5])
print(item_features.shape)

[[0.8  0.05 0.05 0.05 0.05]
 [0.05 0.8  0.05 0.05 0.05]
 [0.05 0.05 0.8  0.05 0.05]
 [0.05 0.05 0.05 0.8  0.05]
 [0.05 0.05 0.05 0.05 0.8 ]]
(49, 5)


In [6]:
#convert user features to numpy array
user_features = user_data[['sports', 'cooking', 'books', 'cinema', 'gaming']].values.astype(np.float32)
print(user_features[:5])
print(user_features.shape)

[[0.77395606 0.43887845 0.85859793 0.697368   0.09417735]
 [0.97562236 0.7611397  0.7860643  0.12811363 0.45038593]
 [0.37079802 0.92676497 0.6438651  0.8227616  0.4434142 ]
 [0.22723871 0.5545848  0.06381726 0.8276312  0.6316644 ]
 [0.75808775 0.35452595 0.970698   0.8931211  0.7783835 ]]
(49, 5)


In [7]:
#calculate dot product
dot_product = np.dot(user_features, item_features.T)
#dot_product = dot_product.reshape(49, 49)
print(dot_product[:5])
print(dot_product.shape)

[[0.723616   0.4723077  0.7870974  0.66617495 0.21378191 0.71109414
  0.5062182  0.7618793  0.6570799  0.2821625  0.4723077  0.66617495
  0.7870974  0.2821625  0.74555993 0.82196575 0.70910484 0.5152377
  0.78523386 0.7499099  0.723616   0.4723077  0.7870974  0.70910484
  0.2821625  0.74555993 0.5110056  0.8477391  0.78387314 0.723616
  0.4723077  0.7870974  0.70910484 0.2821625  0.74555993 0.5110056
  0.8477391  0.723616   0.4723077  0.7870974  0.70910484 0.2821625
  0.74555993 0.5110056  0.723616   0.4723077  0.7870974  0.70910484
  0.28216246]
 [0.8867831  0.72592115 0.7446146  0.25115153 0.49285576 0.8665811
  0.695516   0.75284624 0.32517824 0.49352604 0.72592115 0.25115153
  0.7446146  0.49352604 0.92484015 0.75102025 0.29045475 0.76522434
  1.1276805  0.62090635 0.8867831  0.72592115 0.7446146  0.29045475
  0.49352604 0.92484015 0.77470225 0.83145267 0.44485074 0.8867831
  0.72592115 0.7446146  0.29045475 0.49352604 0.92484015 0.77470225
  0.83145267 0.8867831  0.72592115 0.7446

## Mendefinisikan dan melakukan training pada model

In [9]:
# Define the TensorFlow model
input_shape = (5,)
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(10, input_shape=input_shape, activation='relu'),
    tf.keras.layers.Dense(49, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
model.summary

<bound method Model.summary of <keras.engine.sequential.Sequential object at 0x00000186862C9ED0>>

In [10]:
model.fit(user_features, np.argmax(dot_product, axis=1), epochs=3000, batch_size=32)

Epoch 1/3000
Epoch 2/3000
Epoch 3/3000
Epoch 4/3000
Epoch 5/3000
Epoch 6/3000
Epoch 7/3000
Epoch 8/3000
Epoch 9/3000
Epoch 10/3000
Epoch 11/3000
Epoch 12/3000
Epoch 13/3000
Epoch 14/3000
Epoch 15/3000
Epoch 16/3000
Epoch 17/3000
Epoch 18/3000
Epoch 19/3000
Epoch 20/3000
Epoch 21/3000
Epoch 22/3000
Epoch 23/3000
Epoch 24/3000
Epoch 25/3000
Epoch 26/3000
Epoch 27/3000
Epoch 28/3000
Epoch 29/3000
Epoch 30/3000
Epoch 31/3000
Epoch 32/3000
Epoch 33/3000
Epoch 34/3000
Epoch 35/3000
Epoch 36/3000
Epoch 37/3000
Epoch 38/3000
Epoch 39/3000
Epoch 40/3000
Epoch 41/3000
Epoch 42/3000
Epoch 43/3000
Epoch 44/3000
Epoch 45/3000
Epoch 46/3000
Epoch 47/3000
Epoch 48/3000
Epoch 49/3000
Epoch 50/3000
Epoch 51/3000
Epoch 52/3000
Epoch 53/3000
Epoch 54/3000
Epoch 55/3000
Epoch 56/3000
Epoch 57/3000
Epoch 58/3000
Epoch 59/3000
Epoch 60/3000
Epoch 61/3000
Epoch 62/3000
Epoch 63/3000
Epoch 64/3000
Epoch 65/3000
Epoch 66/3000
Epoch 67/3000
Epoch 68/3000
Epoch 69/3000
Epoch 70/3000
Epoch 71/3000
Epoch 72/3000
E

<keras.callbacks.History at 0x18686cdb0d0>

## Melakukan prediksi rekomendasi terhadap user

In [11]:
# Predict item ratings using the trained model
custom_user_features = np.array([[0.1, 0.6, 0.1, 0.1, 0.1]])
predicted_dot_product = model.predict(custom_user_features)
#extract 5 most relevant content for the user
top_item_indices = np.argsort(predicted_dot_product, axis=1)[0,::-1][:5]
recommended_item_features = item_features[top_item_indices]



In [12]:
print(predicted_dot_product)
print(top_item_indices)
print(recommended_item_features)

[[1.9250817e-06 1.4155837e-06 3.2640576e-06 2.6894436e-06 2.1572396e-06
  2.4291583e-06 5.8361074e-06 3.8413996e-06 2.1856681e-06 1.6769113e-06
  3.8055673e-06 2.5845729e-06 1.7604931e-06 3.8025887e-06 6.2824024e-06
  1.8765711e-06 9.8754890e-07 9.1251957e-01 7.6945384e-05 3.2969288e-06
  2.6622661e-06 3.9109495e-06 1.0579497e-06 2.1230414e-06 3.2308797e-06
  3.3586605e-06 8.6725943e-02 7.0504284e-05 4.7025122e-04 1.9105833e-06
  3.3285846e-06 2.6765399e-06 3.0660763e-06 2.6513253e-06 3.7517375e-06
  7.5716962e-06 1.3422841e-06 2.8990378e-06 2.3154621e-06 2.4186911e-06
  5.3027243e-06 5.6237491e-06 5.9166427e-06 5.2027845e-06 3.5557748e-06
  3.5168398e-06 2.6000837e-06 5.9965089e-07 2.3444970e-06]]
[17 26 28 18 27]
[[0.05 0.8  0.1  0.05 0.05]
 [0.1  0.8  0.05 0.05 0.05]
 [0.1  0.1  0.1  0.8  0.2 ]
 [0.7  0.05 0.1  0.1  0.7 ]
 [0.1  0.1  0.8  0.05 0.05]]


In [14]:
#print recommendations
for column in top_item_indices:
    item_title = item_data_popped.iloc[[column]]
    print(item_title)

17    The Secrets of Mastering the Art of Cooking
Name: title, dtype: object
26    The Ultimate Guide to Healthy Cooking
Name: title, dtype: object
28    Marvel vs DC: Which Cinematic Universe is Better?
Name: title, dtype: object
18    The Best Sports Games for Mobile Devices in 2023
Name: title, dtype: object
27    How to Write a Bestselling Novel in 2023
Name: title, dtype: object


In [15]:
#save the model
saved_model_path = "./recsys_model.h5"
model.save(saved_model_path)

## Mengubah format .h5 ke JSON

In [16]:
#install tfjs
%pip install tensorflowjs

Collecting protobuf==3.20.0
  Using cached protobuf-3.20.0-cp310-cp310-win_amd64.whl (903 kB)
Collecting tensorflow<3,>=2.1.0
  Using cached tensorflow-2.12.0-cp310-cp310-win_amd64.whl (1.9 kB)
Collecting tensorflow-intel==2.12.0
  Using cached tensorflow_intel-2.12.0-cp310-cp310-win_amd64.whl (272.8 MB)
INFO: pip is looking at multiple versions of tensorflow to determine which version is compatible with other requirements. This could take a while.
Collecting tensorflow<3,>=2.1.0
  Using cached tensorflow-2.11.1-cp310-cp310-win_amd64.whl (1.9 kB)
Collecting tensorflow-intel==2.11.1
  Using cached tensorflow_intel-2.11.1-cp310-cp310-win_amd64.whl (266.3 MB)
Collecting tensorboard<2.12,>=2.11
  Using cached tensorboard-2.11.2-py3-none-any.whl (6.0 MB)
Collecting tensorflow<3,>=2.1.0
  Using cached tensorflow-2.11.0-cp310-cp310-win_amd64.whl (1.9 kB)
Collecting tensorflow-intel==2.11.0
  Using cached tensorflow_intel-2.11.0-cp310-cp310-win_amd64.whl (266.3 MB)
Collecting tensorflow<3,>=2.

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'D:\\anaconda\\Lib\\site-packages\\google\\~-otobuf\\internal\\_api_implementation.cp310-win_amd64.pyd'
Consider using the `--user` option or check the permissions.



In [17]:
#convert .h5 to .json
!tensorflowjs_converter --input_format=keras {saved_model_path} ./

2023-06-05 14:38:25.757441: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2023-06-05 14:38:25.758064: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
