<h1>Item-Based Collaborative Filtering</h1>

In [1]:
import numpy as np
import pandas as pd
import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df_ur = pd.read_csv('C:/Users/guije/Documents/boardgames_databases/bgg-19m-reviews.csv')

In [3]:
df_ur.columns

Index(['Unnamed: 0', 'user', 'rating', 'comment', 'ID', 'name'], dtype='object')

In [4]:
df_ur.drop(columns=['Unnamed: 0', 'comment', 'ID'], inplace=True)
df_ur.head()

Unnamed: 0,user,rating,name
0,Torsten,10.0,Pandemic
1,mitnachtKAUBO-I,10.0,Pandemic
2,avlawn,10.0,Pandemic
3,Mike Mayer,10.0,Pandemic
4,Mease19,10.0,Pandemic


In [5]:
print(f"The user 'Torsten' has reviewed {len(df_ur[df_ur.user == 'Torsten'])} boardgames.")

The user 'Torsten' has reviewed 1460 boardgames.


In [6]:
df_ur.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18964807 entries, 0 to 18964806
Data columns (total 3 columns):
 #   Column  Dtype  
---  ------  -----  
 0   user    object 
 1   rating  float64
 2   name    object 
dtypes: float64(1), object(2)
memory usage: 434.1+ MB


In [7]:
df_ur.isnull().any()

user       True
rating    False
name      False
dtype: bool

In [8]:
print(f"There's {len(df_ur[df_ur.user.isnull() == True])} reviews with no username.")

There's 66 reviews with no username.


In [9]:
no_user_i = df_ur.index[df_ur.user.isnull() == True]
len(no_user_i)

66

In [10]:
new_len = len(df_ur) - len(no_user_i)
new_len

18964741

In [11]:
df_ur.drop(index=no_user_i, inplace=True)
len(df_ur) == new_len

True

In [12]:
df_ur.isnull().any()

user      False
rating    False
name      False
dtype: bool

Due to computational constrains I only work with 10,000 random reviews.

In [13]:
df_10k = df_ur.sample(n=50000, random_state=0)
len(df_10k)

50000

In [14]:
pivot = df_10k.pivot_table(index=['user'], columns=['name'], values='rating')
pivot.head()

name,"""La Garde recule!""","""Scratch One Flat Top!""",'65: Squad-Level Combat in the Jungles of Vietnam,(Come on) Let's Quiz Again,"...and then, we held hands.",...und tschüss!,.hack//ENEMY,"1,2,3! Now you see me...",10 Days in Africa,10 Days in Asia,...,oddball Äeronauts,¡Cobardes!,Élve fogd el,Παλέρμο: Το Μεγάλο Ξεκαθάρισμα,Свинтус,Экивоки,イラストリー (Illustori),ドキッと！アイス (Dokitto! Ice),猿道 (Monkey Road),聖杯サクセション (Throne and the Grail)
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
beastvol,,,,,,,,,,,...,,,,,,,,,,
-=Yod@=-,,,,,,,,,,,...,,,,,,,,,,
-Johnny-,,,,,,,,,,,...,,,,,,,,,,
-PEDROPABLO-,,,,,,,,,,,...,,,,,,,,,,
-toni-,,,,,,,,,,,...,,,,,,,,,,


In [15]:
# Normalizing the values
pivot_n = pivot.apply(lambda x: (x-np.mean(x))/(np.max(x)-np.min(x)), axis=1)
pivot_n.head()

name,"""La Garde recule!""","""Scratch One Flat Top!""",'65: Squad-Level Combat in the Jungles of Vietnam,(Come on) Let's Quiz Again,"...and then, we held hands.",...und tschüss!,.hack//ENEMY,"1,2,3! Now you see me...",10 Days in Africa,10 Days in Asia,...,oddball Äeronauts,¡Cobardes!,Élve fogd el,Παλέρμο: Το Μεγάλο Ξεκαθάρισμα,Свинтус,Экивоки,イラストリー (Illustori),ドキッと！アイス (Dokitto! Ice),猿道 (Monkey Road),聖杯サクセション (Throne and the Grail)
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
beastvol,,,,,,,,,,,...,,,,,,,,,,
-=Yod@=-,,,,,,,,,,,...,,,,,,,,,,
-Johnny-,,,,,,,,,,,...,,,,,,,,,,
-PEDROPABLO-,,,,,,,,,,,...,,,,,,,,,,
-toni-,,,,,,,,,,,...,,,,,,,,,,


In [16]:
# Filling NaN values with zero
pivot_n.fillna(0, inplace=True)
pivot_n.head()

name,"""La Garde recule!""","""Scratch One Flat Top!""",'65: Squad-Level Combat in the Jungles of Vietnam,(Come on) Let's Quiz Again,"...and then, we held hands.",...und tschüss!,.hack//ENEMY,"1,2,3! Now you see me...",10 Days in Africa,10 Days in Asia,...,oddball Äeronauts,¡Cobardes!,Élve fogd el,Παλέρμο: Το Μεγάλο Ξεκαθάρισμα,Свинтус,Экивоки,イラストリー (Illustori),ドキッと！アイス (Dokitto! Ice),猿道 (Monkey Road),聖杯サクセション (Throne and the Grail)
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
beastvol,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
-=Yod@=-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
-Johnny-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
-PEDROPABLO-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
-toni-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
pivot_n = pivot_n.T

In [18]:
pivot_n.shape

(8768, 37901)

In [19]:
# Dropping columns with only zeroes
pivot_n = pivot_n.loc[:, (pivot_n != 0).any(axis=0)]
pivot_n.shape

(8768, 6787)

In [20]:
piv_sparse = sp.sparse.csr_matrix(pivot_n.values)

<h1>Modeling</h1>

In [21]:
boardgame_similarity = cosine_similarity(piv_sparse)

In [22]:
bg_sim_df = pd.DataFrame(boardgame_similarity, index=pivot_n.index, columns=pivot_n.index)

In [23]:
bg_sim_df.head()

name,"""La Garde recule!""","""Scratch One Flat Top!""",'65: Squad-Level Combat in the Jungles of Vietnam,(Come on) Let's Quiz Again,"...and then, we held hands.",...und tschüss!,.hack//ENEMY,"1,2,3! Now you see me...",10 Days in Africa,10 Days in Asia,...,oddball Äeronauts,¡Cobardes!,Élve fogd el,Παλέρμο: Το Μεγάλο Ξεκαθάρισμα,Свинтус,Экивоки,イラストリー (Illustori),ドキッと！アイス (Dokitto! Ice),猿道 (Monkey Road),聖杯サクセション (Throne and the Grail)
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""La Garde recule!""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Scratch One Flat Top!""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'65: Squad-Level Combat in the Jungles of Vietnam,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
(Come on) Let's Quiz Again,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"...and then, we held hands.",0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
bg_sim_df['Unstable Unicorns'][bg_sim_df.columns == 'Unstable Unicorns']

name
Unstable Unicorns    0.0
Name: Unstable Unicorns, dtype: float64

<h2>Making Recommendation - Example Boardgame: Unstable Unicorns</h2>

In [28]:
def bg_rec(bg_name):
    number = 1
    print('Recommended because you like {}:\n'.format(bg_name))
    for bg in bg_sim_df.sort_values(by = bg_name, ascending = False).index[1:6]:
        print(f'#{number}: {bg}, {round(bg_sim_df[bg][bg_name]*100,2)}% match')
        number +=1  

In [27]:
bg_rec('Unstable Unicorns')

Recommended because you like Unstable Unicorns:

#1: Rage, 0.0% match
#2: Race! Formula 90, 0.0% match
#3: Racer Knights of Falconus, 0.0% match
#4: Rack-O, 0.0% match
#5: Racko Plus, 0.0% match
