# Importing Libraries

In [130]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split

# # Recommender
# import tensorflow as tf
# import tensorflow_recommenders as tfrs
# from tensorflow.keras.layers.experimental.preprocessing import StringLookup, TextVectorization, Normalization, Discretization, Hashing
# from tensorflow.keras.layers import Embedding, Dense, Layer, GlobalAveragePooling1D, Flatten
# from tensorflow.keras import Sequential
# from tensorflow.keras.optimizers import Adagrad
# from tensorflow.keras.losses import MeanSquaredError
# from tensorflow.keras.metrics import RootMeanSquaredError

# from nltk.corpus import stopwords
# from sklearn.model_selection import train_test_split
# from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
# from sklearn.metrics.pairwise import cosine_similarity
# from scipy.sparse.linalg import svds

# Loading the Data

In [131]:
df = pd.read_csv('../datasets/reviews-cleaned.csv')
df.head()

Unnamed: 0,user,name,rating
0,1 Family Meeple,10 Days in Europe,4.1
1,1 Family Meeple,12 Days,7.0
2,1 Family Meeple,7 Wonders,6.5
3,1 Family Meeple,A Column of Fire,5.0
4,1 Family Meeple,A Feast for Odin,10.0


# Deep learning-Based Recommenders

In [132]:
# Define the model architecture
user_ids = df['user'].unique().tolist()
user2idx = {o:i for i,o in enumerate(user_ids)}
df['user'] = df['user'].apply(lambda x: user2idx[x])

game_ids = df['name'].unique().tolist()
game2idx = {o:i for i,o in enumerate(game_ids)}
df['name'] = df['name'].apply(lambda x: game2idx[x])

df.head()

Unnamed: 0,user,name,rating
0,0,0,4.1
1,0,1,7.0
2,0,2,6.5
3,0,3,5.0
4,0,4,10.0


In [133]:
n_users, n_games = len(user_ids), len(game_ids)
n_users, n_games

(3067, 4582)

In [134]:
# Define the model architecture
n_factors = 50

user_input = Input(shape=(1,))
user_embedding = Embedding(n_users, n_factors)(user_input)
user_flat = Flatten()(user_embedding)

game_input = Input(shape=(1,))
game_embedding = Embedding(n_games, n_factors)(game_input)
game_flat = Flatten()(game_embedding)

x = Concatenate()([user_flat, game_flat])
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)

In [135]:
# Split the dataset into train and test sets
df_train, df_test = train_test_split(df, train_size = 0.7, random_state = 42)
print(df_train.shape, df_test.shape)

(1413100, 3) (605615, 3)


In [136]:
model = Model(inputs=[user_input, game_input], outputs=x)

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [137]:
# Train the model
model.fit(x=[df_train['user'], df_train['name']], y=df_train['rating'], batch_size=256, epochs=5, verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x296afa30310>

In [138]:
# Evaluate the model
test_loss, test_acc = model.evaluate(x=[df_test['user'], df_test['name']], y=df_test['rating'])
print('Test accuracy:', test_acc)

Test accuracy: 0.003550110151991248
