<h1>Item-Based Collaborative Filtering</h1>

In [1]:
import numpy as np
import pandas as pd
import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df_ur = pd.read_csv('C:/Users/guije/Documents/boardgames_databases/bgg_400_reviews.csv')

In [3]:
df_ur.columns

Index(['user', 'name', 'rating'], dtype='object')

In [4]:
print(f"The user 'Torsten' has reviewed {len(df_ur[df_ur.user == 'Torsten'])} boardgames.")

The user 'Torsten' has reviewed 2 boardgames.


In [5]:
df_ur.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12000 entries, 0 to 11999
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   user    12000 non-null  object 
 1   name    12000 non-null  object 
 2   rating  12000 non-null  float64
dtypes: float64(1), object(2)
memory usage: 281.4+ KB


In [6]:
df_ur.isnull().any()

user      False
name      False
rating    False
dtype: bool

In [7]:
pivot = df_ur.pivot_table(index=['user'], columns=['name'], values='rating')
pivot.head()

name,6 nimmt!,7 Wonders,7 Wonders Duel,A Feast for Odin,A Game of Thrones,A Game of Thrones: The Board Game (Second Edition),Above and Below,Abyss,Acquire,Aeon's End,...,Wingspan,Wits & Wagers,Wizard,Yahtzee,Yokohama,Zombicide,Zombicide: Black Plague,Zombie Dice,Zombies!!!,Zooloretto
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-mIDE-,,,,,,,,,,,...,,,,,,,,,,7.5
.JcK.,,,,,,,,,,,...,,,,,,,,,,
00matej00,,,,,,,,9.0,,,...,,,,,,,,,,
0ddjob,,,,,,,,,,,...,,,,,,,,,,
0xA8E,,,,,,,,,,,...,,,,,,,,,,


In [8]:
# Filling NaN values with zero
pivot.fillna(0, inplace=True)
pivot.head()

name,6 nimmt!,7 Wonders,7 Wonders Duel,A Feast for Odin,A Game of Thrones,A Game of Thrones: The Board Game (Second Edition),Above and Below,Abyss,Acquire,Aeon's End,...,Wingspan,Wits & Wagers,Wizard,Yahtzee,Yokohama,Zombicide,Zombicide: Black Plague,Zombie Dice,Zombies!!!,Zooloretto
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-mIDE-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.5
.JcK.,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00matej00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0ddjob,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0xA8E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
pivot = pivot.T

In [10]:
pivot.shape

(400, 11282)

In [11]:
# Dropping columns with only zeroes
pivot = pivot.loc[:, (pivot != 0).any(axis=0)]
pivot.shape

(400, 11282)

In [12]:
piv_sparse = sp.sparse.csr_matrix(pivot.values)

<h1>Modeling</h1>

In [13]:
boardgame_similarity = cosine_similarity(piv_sparse)

In [15]:
bg_sim_df = pd.DataFrame(boardgame_similarity, index=pivot.index, columns=pivot.index)

In [16]:
are_diagonal_ones = np.all(np.diag(bg_sim_df) == 1.0)

print(f"Are all diagonal elements 1.0? {are_diagonal_ones}")

Are all diagonal elements 1.0? False


In [17]:
bg_sim_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 400 entries, 6 nimmt! to Zooloretto
Columns: 400 entries, 6 nimmt! to Zooloretto
dtypes: float64(400)
memory usage: 1.2+ MB


In [18]:
'Clue' in bg_sim_df.columns

True

<h2>Making Recommendation - Example Boardgame: Clue</h2>

In [19]:
def bg_rec(bg_name):
    number = 1
    print(f"Recommended because you like {bg_name}:\n")
    for bg in bg_sim_df.sort_values(by = bg_name, ascending = False).index[1:6]:
        print(f"#{number}: {bg}, {round(bg_sim_df[bg][bg_name]*100,2)}% match")
        number +=1  

In [20]:
bg_rec('Clue')

Recommended because you like Clue:

#1: In the Year of the Dragon, 4.29% match
#2: Euphoria: Build a Better Dystopia, 3.81% match
#3: Taboo, 3.53% match
#4: Fury of Dracula (Third/Fourth Edition), 2.44% match
#5: Las Vegas, 2.02% match
