# Train a Neural Network for Content Based Filtering with Synthetic Data

## Import Libraries

In [1]:
import numpy as np
import numpy.ma as ma
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import tabulate
import csv
# from recsysNN_utils import *
pd.set_option("display.precision", 1)

## Load Generated Data

Load generated data from csv

In [2]:
user_rating = np.genfromtxt('synthetic_user_ratings_correlation_nullable.csv', delimiter=',')
jasa_data = np.genfromtxt('synthetic_store_data_100k.csv', delimiter=',')
y_train = np.genfromtxt('y_train_100k.csv', delimiter=',')

with open('synthetic_user_rating_header.txt', newline='') as rating:
  user_rating_header = list(csv.reader(rating))[0]
with open('synthetic_store_data_header.txt', newline='') as jasa:
  jasa_data_header = list(csv.reader(jasa))[0]

num_user_features = user_rating.shape[1] - 3 # remove userid, rating count and ave rating during training
num_jasa_features = jasa_data.shape[1] - 1  # remove jasaid at train time


print(f"Data size: {len(user_rating)}")

Data size: 100000


Display user ratings table (**Only use the categories ratings**)

In [3]:
flist = [".0f",".0f",".1f",".1f",".1f",".1f",".1f",".1f",".1f",".1f",".1f"]
user_head = user_rating_header
user_display = [user_head]
count = 0
for i in range(1, user_rating.shape[0]):
    if count == 15: break
    count += 1
    user_display.append([user_rating[i, 0].astype(int),
                  user_rating[i, 1].astype(int),
                  user_rating[i, 2].astype(float),
                  *user_rating[i, 3:].astype(float)
                ])
user_table = tabulate.tabulate(user_display, headers="firstrow", floatfmt=flist, numalign='center')
print(user_table)

 id_user    num_ratings    avg_rating    cat_1    cat_2    cat_3    cat_4    cat_5    cat_6    cat_7    cat_8
---------  -------------  ------------  -------  -------  -------  -------  -------  -------  -------  -------
    2           83            2.8         4.2      2.6      3.0      3.1      3.0      2.9      3.5      0.0
    3           47            2.2         0.0      0.0      4.4      0.0      3.2      3.0      3.6      3.7
    4           44            1.9         2.0      0.0      0.0      2.9      3.5      3.5      0.0      3.1
    5           41            2.6         0.0      0.0      4.3      3.6      2.4      3.1      3.9      3.4
    6           11            3.3         3.3      3.5      3.3      3.4      3.1      3.1      3.6      3.5
    7           40            3.6         4.1      3.7      4.1      4.0      0.0      3.8      4.5      4.4
    8           48            1.7         0.0      3.0      3.6      0.0      0.0      3.3      3.4      0.0
    9           

Display some sample generated jasa data. (**Only using the one hot classification)**

In [4]:
flist = [".0f",".1f",".1f",".1f",".1f",".1f",".1f",".1f",".1f",".1f"]
jasa_head = jasa_data_header
jasa_display = [jasa_head]
count = 0
for i in range(1, jasa_data.shape[0]):
    if count == 15: break
    count += 1
    jasa_display.append([jasa_data[i, 0].astype(int),
                  jasa_data[i, 1].astype(int),
                  jasa_data[i, 2].astype(float),
                  *jasa_data[i, 3:].astype(float)
                ])
jasa_table = tabulate.tabulate(jasa_display, headers="firstrow", floatfmt=flist, numalign='center')
print(jasa_table)

 store_id    avg_rating    cat_1    cat_2    cat_3    cat_4    cat_5    cat_6    cat_7    cat_8
----------  ------------  -------  -------  -------  -------  -------  -------  -------  -------
    2           2.0         0.0      1.0      0.0      0.0      0.0      1.0      0.0      0.0
    3           3.0         0.0      0.0      0.0      0.0      1.0      0.0      0.0      0.0
    4           1.0         0.0      0.0      1.0      0.0      0.0      0.0      1.0      0.0
    5           5.0         0.0      0.0      0.0      1.0      0.0      1.0      0.0      0.0
    6           3.0         0.0      0.0      1.0      0.0      0.0      0.0      1.0      0.0
    7           5.0         0.0      1.0      1.0      1.0      0.0      0.0      0.0      0.0
    8           5.0         0.0      0.0      0.0      0.0      0.0      0.0      1.0      1.0
    9           4.0         0.0      1.0      0.0      1.0      0.0      0.0      0.0      0.0
    10          4.0         1.0      0.0      1

Show generated y_train

In [5]:
print(f"y_train[:15]: {y_train[:15]}")

y_train[:15]: [3.  2.  3.5 1.  5.  4.  5.  5.  4.5 4.  5.  5.  3.5 1.5 2.5]


## Pre-process Data

Data Scaling using Scikit-learn

In [6]:
# scale training data
user_rating_unscaled = user_rating
jasa_data_unscaled = jasa_data
y_train_unscaled    = y_train

scalerItem = StandardScaler()
scalerItem.fit(user_rating)
user_rating = scalerItem.transform(user_rating)

scalerUser = StandardScaler()
scalerUser.fit(jasa_data)
jasa_data = scalerUser.transform(jasa_data)

scalerTarget = MinMaxScaler((-1, 1))
scalerTarget.fit(y_train.reshape(-1, 1))
y_train = scalerTarget.transform(y_train.reshape(-1, 1))
#ynorm_test = scalerTarget.transform(y_test.reshape(-1, 1))

print(np.allclose(user_rating_unscaled, scalerItem.inverse_transform(user_rating)))
print(np.allclose(jasa_data_unscaled, scalerUser.inverse_transform(jasa_data)))

True
True


Display the scaled user ratings

In [7]:
flist = [".0f",".0f",".1f",".1f",".1f",".1f",".1f",".1f",".1f",".1f",".1f"]
user_head = user_rating_header
user_display = [user_head]
count = 0
for i in range(1, user_rating.shape[0]):
    if count == 15: break
    count += 1
    user_display.append([user_rating[i, 0].astype(int),
                  user_rating[i, 1].astype(int),
                  user_rating[i, 2].astype(float),
                  *user_rating[i, 3:].astype(float)
                ])
user_table = tabulate.tabulate(user_display, headers="firstrow", floatfmt=flist, numalign='center')
print(user_table)

 id_user    num_ratings    avg_rating    cat_1    cat_2    cat_3    cat_4    cat_5    cat_6    cat_7    cat_8
---------  -------------  ------------  -------  -------  -------  -------  -------  -------  -------  -------
   -1            1            -0.2        0.8     -0.2      0.0      0.1      0.1      0.0      0.4     -1.9
   -1            0            -1.1       -1.8     -1.9      0.9     -1.9      0.1      0.0      0.4      0.5
   -1            0            -1.7       -0.6     -1.9     -1.9     -0.0      0.4      0.3     -1.9      0.1
   -1            0            -0.6       -1.8     -1.9      0.9      0.4     -0.3      0.1      0.6      0.3
   -1           -1            0.6         0.2      0.3      0.2      0.3      0.1      0.1      0.4      0.3
   -1            0            1.0         0.7      0.5      0.7      0.7     -1.9      0.5      1.0      0.9
   -1            0            -2.0       -1.8      0.0      0.4     -1.9     -1.9      0.2      0.3     -1.9
   -1           

## Split data for training and testing

In [8]:
user_rating, user_rating_test = train_test_split(user_rating, train_size=0.80, shuffle=True, random_state=1)
jasa_data, jasa_data_test = train_test_split(jasa_data, train_size=0.80, shuffle=True, random_state=1)
y_train, y_test       = train_test_split(y_train,    train_size=0.80, shuffle=True, random_state=1)

print(f"user training data shape: {user_rating.shape}")
print(f"user test data shape: {user_rating_test.shape}")

print(f"jasa training data shape: {jasa_data.shape}")
print(f"jasa test data shape: {jasa_data_test.shape}")

user training data shape: (80000, 11)
user test data shape: (20000, 11)
jasa training data shape: (80000, 10)
jasa test data shape: (20000, 10)


# Neural Network

Defining the network

In [9]:
num_outputs = 32
tf.random.set_seed(1)
user_NN = tf.keras.models.Sequential([
  tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs, activation='linear'),
])

item_NN = tf.keras.models.Sequential([
      tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs, activation='linear'),
])

# create the user input and point to the base network
input_user = tf.keras.layers.Input(shape=(num_user_features))
vu = user_NN(input_user)
vu = tf.linalg.l2_normalize(vu, axis=1)

# create the item input and point to the base network
input_item = tf.keras.layers.Input(shape=(num_jasa_features))
vm = item_NN(input_item)
vm = tf.linalg.l2_normalize(vm, axis=1)

# compute the dot product of the two vectors vu and vm
output = tf.keras.layers.Dot(axes=1)([vu, vm])

# specify the inputs and output of the model
model = tf.keras.Model([input_user, input_item], output)

model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 8)]                  0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 9)]                  0         []                            
                                                                                                  
 sequential (Sequential)     (None, 32)                   39328     ['input_1[0][0]']             
                                                                                                  
 sequential_1 (Sequential)   (None, 32)                   39584     ['input_2[0][0]']             
                                                                                              

Set up Loss (MSE) and Optimizer (Adam Optimizer)

In [10]:
tf.random.set_seed(1)
cost_fn = tf.keras.losses.MeanSquaredError()
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt,
              loss=cost_fn)

## Train the model on the dataset

Train the model on the dataset

In [11]:
tf.random.set_seed(1)
model.fit([user_rating[:, 3:], jasa_data[:, 1:]], y_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7bdbc85aeb00>

Evaluate on the training set

In [12]:
model.evaluate([user_rating_test[:, 3:], jasa_data_test[:, 1:]], y_test)



0.01033790223300457

## Save model

In [17]:
model.save('/content/drive/MyDrive/jasakarya_saved_models/recsysnn_jasakarya_model_tf', save_format='tf') #Savedmodel

In [18]:
model.save('/content/drive/MyDrive/jasakarya_saved_models/recsysnn_jasakarya_model.keras') #Keras

In [None]:
!pip install tensorflowjs

In [19]:
!tensorflowjs_converter --input_format=tf_saved_model recsysnn_jasakarya_model_tf /content/drive/MyDrive/jasakarya_saved_models/recsysnn_jsakarya_model_js


2023-12-05 07:12:27.560608: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-05 07:12:27.560665: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-05 07:12:27.562227: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-05 07:12:30.404263: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skippin