In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.feature_extraction.text import TfidfVectorizer

import tensorflow as tf
from tensorflow import keras
from keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from keras.models import Model

import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('C:/Users/John/Documents/LHL Lecture Material/boardgame-recommender/data/user_ratings.csv')
df = df[['Username','BGGId','Rating']]
game_df = pd.read_csv('C:/Users/John/Documents/LHL Lecture Material/boardgame-recommender/data/games.csv')

In [3]:
df['Username'] = df['Username'].astype(str)
df['BGGId'] = df['BGGId'].astype(int)
df['Rating'] = df['Rating'].astype('float32')

In [4]:
def recent_game(date):
    """ 
    Transform the PublishedYear Column to a binary classification. If it was before 2016, it is old(0), otherwise new(1)
    
    Parameters:
        date(int): the date publication    

    Returns:
        0 or 1 depending on the date provided. 
    """
    if date > 2016:
        return 1
    else: 
        return 0
    
game_df['new_or_old'] = game_df['YearPublished'].apply(recent_game)

In [5]:
feats = ['BGGId','new_or_old','BayesAvgRating','BestPlayers','MfgPlaytime','NumUserRatings','NumComments']
feats_df = game_df[feats]
comb_feats_df = df.merge(feats_df, on='BGGId')
feats = ['new_or_old','BayesAvgRating','BestPlayers','MfgPlaytime','NumUserRatings','NumComments']


In [6]:
user_ids = df['Username'].unique().tolist()
game_ids = df['BGGId'].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
game2game_encoded = {x: i for i,x in enumerate(game_ids)}

In [7]:
comb_feats_df['user'] = comb_feats_df['Username'].map(user2user_encoded)
comb_feats_df['game'] = comb_feats_df['BGGId'].map(game2game_encoded)
comb_feats_df['Rating'] = comb_feats_df['Rating'].values.astype(np.float32)

In [8]:
X = comb_feats_df[['user','game','new_or_old','BayesAvgRating','BestPlayers','MfgPlaytime','NumUserRatings','NumComments']]
y = comb_feats_df[['Rating']]
scaler = MinMaxScaler()
x_train, x_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=0)
x_train[feats], x_test[feats] = scaler.fit_transform(x_train[feats]), scaler.transform(x_test[feats])
y_train, y_test = scaler.fit_transform(y_train), scaler.transform(y_test)

In [9]:
# Hybrid Model
embed_size = 50
num_users = len(user2user_encoded)
num_games = len(game2game_encoded)
num_feats = len(feats)

#input layer
user_in = Input(name='user_in', shape=(1,))
prod_in = Input(name='prod_in', shape=(1,))

#matrix factorization
mf_user_emb = Embedding(name='mf_user_emb', input_dim=num_users, output_dim = embed_size)(user_in)
mf_user_flat = Flatten(name='mf_user_flat')(mf_user_emb)
mf_prod_emb = Embedding(name='mf_prod_emb', input_dim=num_games, output_dim=embed_size)(prod_in)
mf_prod_flat = Flatten(name='mf_prod_flat')(mf_prod_emb)
mf_dot = Dot(name='mf_dot', normalize=True, axes=1)([mf_user_flat, mf_prod_flat])

#neural network
nn_user_emb = Embedding(name='nn_user_emb', input_dim = num_users, output_dim = embed_size)(user_in)
nn_user_flat = Flatten(name='nn_user_flat')(nn_user_emb)
nn_prod_emb = Embedding(name='nn_prod_emb', input_dim=num_games, output_dim= embed_size)(prod_in)
nn_prod_flat = Flatten(name='nn_prod_flat')(nn_prod_emb)
nn_concat = Concatenate()([nn_user_flat, nn_prod_flat])
nn_dense = Dense(name='nn_dense', units=int(embed_size/2), activation='relu')(nn_concat)

# Product features
feats_in = Input(name='feat_in', shape=(num_feats,))
feats_dense = Dense(name='feat_dense', units = num_feats, activation = 'relu')(feats_in)

#Merge both together
combo = Concatenate()([mf_dot, nn_dense, feats_dense])
y_out = Dense(name='y_out', units=1, activation='linear')(combo)

hyb_model = Model(inputs=[user_in, prod_in, feats_in], outputs = y_out, name='hybrid_model')
hyb_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])
hyb_model.summary()

Model: "hybrid_model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 user_in (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 prod_in (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 nn_user_emb (Embedding)        (None, 1, 50)        20568750    ['user_in[0][0]']                
                                                                                                  
 nn_prod_emb (Embedding)        (None, 1, 50)        1096250     ['prod_in[0][0]']                
                                                                                       

In [10]:
history = hyb_model.fit(x=[x_train['user'], x_train['game'], x_train[feats]], y=y_train, epochs=3, batch_size = 5000, verbose=True)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [13]:
y_pred = hyb_model.predict(x=[x_test['user'], x_test['game'],x_test[feats]])
mean_squared_error(y_test, y_pred, squared=False)



0.11624323

In [14]:
hyb_model.save('C:/Users/John/Documents/LHL Lecture Material/boardgame-recommender/src/models/neural_cf_model')



INFO:tensorflow:Assets written to: C:/Users/John/Documents/LHL Lecture Material/boardgame-recommender/src/models/neural_cf_model\assets


INFO:tensorflow:Assets written to: C:/Users/John/Documents/LHL Lecture Material/boardgame-recommender/src/models/neural_cf_model\assets
