# Basic Item-Based Collaborative Filtering Model Utilizing Surprise Module 

In [1]:
import pandas as pd
import numpy as np
from surprise import SVDpp,Dataset, Reader, accuracy
from surprise.model_selection import train_test_split, GridSearchCV, cross_validate
import pickle

In [2]:
df = pd.read_csv('C:/Users/John/Documents/LHL Lecture Material/boardgame-recommender/data/user_ratings.csv')
df = df[['Username','BGGId','Rating']]
df.head()

Unnamed: 0,Username,BGGId,Rating
0,Tonydorrf,213788,8.0
1,tachyon14k,213788,8.0
2,Ungotter,213788,8.0
3,brainlocki3,213788,8.0
4,PPMP,213788,8.0


In [3]:
game_df = pd.read_csv('C:/Users/John/Documents/LHL Lecture Material/boardgame-recommender/data/games.csv')
game_df.head()

Unnamed: 0,BGGId,Name,Description,YearPublished,GameWeight,AvgRating,BayesAvgRating,StdDev,MinPlayers,MaxPlayers,...,Rank:partygames,Rank:childrensgames,Cat:Thematic,Cat:Strategy,Cat:War,Cat:Family,Cat:CGS,Cat:Abstract,Cat:Party,Cat:Childrens
0,1,Die Macher,die macher game seven sequential political rac...,1986,4.3206,7.61428,7.10363,1.57979,3,5,...,21926,21926,0,1,0,0,0,0,0,0
1,2,Dragonmaster,dragonmaster tricktaking card game base old ga...,1981,1.963,6.64537,5.78447,1.4544,3,4,...,21926,21926,0,1,0,0,0,0,0,0
2,3,Samurai,samurai set medieval japan player compete gain...,1998,2.4859,7.45601,7.23994,1.18227,2,4,...,21926,21926,0,1,0,0,0,0,0,0
3,4,Tal der Könige,triangular box luxurious large block tal der k...,1992,2.6667,6.60006,5.67954,1.23129,2,4,...,21926,21926,0,0,0,0,0,0,0,0
4,5,Acquire,acquire player strategically invest business t...,1964,2.5031,7.33861,7.14189,1.33583,2,6,...,21926,21926,0,1,0,0,0,0,0,0


In [22]:
# Determine rating scale for the dataset
df['Rating'].describe()
reader = Reader(rating_scale=(0,10))

In [23]:
# Create surprise dataset and split into train and test
data = Dataset.load_from_df(df, reader)
train,test = train_test_split(data, test_size=0.2)

In [24]:
# Fit training data to SVD and check for accuracy by RMSE
svd = SVDpp()
svd.fit(train)
y_pred = svd.test(test)
accuracy.rmse(y_pred, verbose=True)

RMSE: 1.3554


1.355360573287475

In [36]:
pickle.dump(svd, open('C:/Users/John/Documents/LHL Lecture Material/boardgame-recommender/src/models/cf_model.pkl','wb'))

In [4]:
with open('C:/Users/John/Documents/LHL Lecture Material/boardgame-recommender/src/models/cf_model.pkl','rb') as f:
    model = pickle.load(f)

In [7]:
def user_recommendations(user, n_games):
    """
        Takes in a user and n_games to returns their top n_games recommendations that they have not rated yet.

        Parameters:
            user(str): name of user
            n_games(int): number of games to show
        
        Returns:
            A dataframe with each game as an index with a column for their estimated rating
    """
    # Remove the boardgames that the user have rated
    boardgame_id = df['BGGId'].unique()
    user_list = df.loc[df['Username']==user, 'BGGId']
    user_list = np.setdiff1d(boardgame_id, user_list)

    # Create sparse dataset with username, bggid and rating(arbitrarily used 4. Rating number required for the dataset structure)
    testset =[[user, bggid, 4.] for bggid in user_list]

    # Test model on the testset and determine the boardgames with the highest rating
    predictions = model.test(testset)
    pred_ratings = np.array([pred.est for pred in predictions])
    top_ratings_idx = pred_ratings.argsort()[::-1][:n_games]
    bgg_ids = user_list[top_ratings_idx]
    bgg_name = [game_df.loc[game_df['BGGId'] ==id]['Name'].values[0] for id in bgg_ids]
    print(f'Top boardgames for {user} in order are: \n {bgg_name}')
    

In [8]:
user_recommendations('bennygui',5)

Top boardgames for bennygui in order are: 
 ['Viticulture Essential Edition', 'Clank!: Legacy – Acquisitions Incorporated', 'Ark Nova', 'Uprising: Curse of the Last Emperor', 'Mechs vs. Minions']
