# Basic Item-Based Collaborative Filtering Model Utilizing Surprise Module 

In [8]:
import pandas as pd
import numpy as np
from surprise import SVDpp,Dataset, Reader, accuracy
from surprise.model_selection import train_test_split, GridSearchCV, cross_validate

In [3]:
df = pd.read_csv('C:/Users/John/Documents/LHL Lecture Material/boardgame-recommender/data/user_ratings.csv')
df = df[['Username','BGGId','Rating']]
df.head()

Unnamed: 0,Username,BGGId,Rating
0,Tonydorrf,213788,8.0
1,tachyon14k,213788,8.0
2,Ungotter,213788,8.0
3,brainlocki3,213788,8.0
4,PPMP,213788,8.0


In [4]:
# Determine rating scale for the dataset
df['Rating'].describe()
reader = Reader(rating_scale=(0,10))

In [5]:
# Create surprise dataset and split into train and test
data = Dataset.load_from_df(df, reader)
train,test = train_test_split(data, test_size=0.2)

In [7]:
# Fit training data to SVD and check for accuracy by RMSE
svd = SVDpp()
svd.fit(train)
y_pred = svd.test(test)
accuracy.rmse(y_pred, verbose=True)

RMSE: 1.3558


1.355769315478485

In [11]:
# unique boardgame ids
boardgame_id = df['BGGId'].unique()
# Choose a user, remove the boardgames they have rated
user = 'bennygui'
user_list = df.loc[df['Username']==user, 'BGGId']
user_list = np.setdiff1d(boardgame_id, user_list)

In [12]:
# create a sparse dataset using username, bggid, and rating(not needed so 4 is arbitrarily used)
testset = [[user, bggid, 4.] for bggid in user_list]

# Use the model and to create a list of predictions
predictions = svd.test(testset)

# For the user, choose the number of top games to be shown
n_games = 5
pred_ratings = np.array([pred.est for pred in predictions])
top_ratings_idx = pred_ratings.argsort()[::-1][:n_games]
bgg_ids = user_list[top_ratings_idx]
print(f'Top boardgame id for {user}, are {bgg_ids}, with predicted rating {pred_ratings[top_ratings_idx]}')

Top boardgame id for bennygui, are [180241 193428 161936    490 284121], with predicted rating [9.65655991 9.62173329 9.58258823 9.51603668 9.49058143]
