# Importing Libraries

In [12]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import precision_recall_fscore_support

from pycaret.nlp import *

# Loading the Data

In [2]:
df_bg_tmp = pd.read_csv('../datasets/games-cleaned.csv')
df_rv_tmp = pd.read_csv('../datasets/reviews-cleaned.csv')

# Collaborative Recommenders

In [3]:
df_bg = df_bg_tmp.copy()
df_rv = df_rv_tmp.copy()

In [4]:
df_bg.head()

Unnamed: 0,id,name,rank,geek_rating,avg_rating,usersrated,category,mechanic,weight,minplayers,maxplayers,playingtime,minage,yearpublished,designer,artist,publisher,description,image
0,30549,Pandemic,106,7.48669,7.58896,109006,['Medical'],"['Action Points', 'Cooperative Game', 'Hand Ma...",2.4063,2,4,45,8,2008,['Matt Leacock'],"['Josh Cappel', 'Christian Hanisch', 'Régis Mo...","['Z-Man Games', 'Albi', 'Asmodee', 'Asmodee It...","In Pandemic, several virulent diseases have br...",https://cf.geekdo-images.com/S3ybV1LAp-8SnHIXL...
1,822,Carcassonne,191,7.30857,7.41837,108776,"['City Building', 'Medieval', 'Territory Build...","['Area Majority / Influence', 'Map Addition', ...",1.9057,2,5,45,7,2000,['Klaus-Jürgen Wrede'],"['Doris Matthäus', 'Anne Pätzke', 'Chris Quill...","['Hans im Glück', '999 Games', 'Albi', 'Bard C...",Carcassonne is a tile-placement game in which ...,https://cf.geekdo-images.com/okM0dq_bEXnbyQTOv...
2,13,Catan,429,6.96965,7.13598,108064,"['Economic', 'Negotiation']","['Dice Rolling', 'Hexagon Grid', 'Income', 'Mo...",2.313,3,4,120,10,1995,['Klaus Teuber'],"['Volkan Baga', 'Tanja Donner', 'Pete Fenlon',...","['KOSMOS', '999 Games', 'Albi', 'Asmodee', 'As...","In CATAN (formerly The Settlers of Catan), pla...",https://cf.geekdo-images.com/W3Bsga_uLP9kO91gZ...
3,68448,7 Wonders,73,7.63355,7.73515,90021,"['Ancient', 'Card Game', 'City Building', 'Civ...","['Drafting', 'Hand Management', 'Set Collectio...",2.3264,2,7,30,10,2010,['Antoine Bauza'],"['Dimitri Chappuis', 'Miguel Coimbra', 'Etienn...","['Repos Production', 'ADC Blackfire Entertainm...",You are the leader of one of the 7 great citie...,https://cf.geekdo-images.com/RvFVTEpnbb4NM7k0I...
4,36218,Dominion,104,7.49912,7.61,81582,"['Card Game', 'Medieval']","['Deck, Bag, and Pool Building', 'Delayed Purc...",2.3542,2,4,30,13,2008,['Donald X. Vaccarino'],"['Matthias Catrein', 'Julien Delval', 'Tomasz ...","['Rio Grande Games', '999 Games', 'Albi', 'Bar...","&quot;You are a monarch, like your parents bef...",https://cf.geekdo-images.com/j6iQpZ4XkemZP07HN...


In [5]:
df_rv.head()

Unnamed: 0,user,name,rating
0,1 Family Meeple,10 Days in Europe,4.1
1,1 Family Meeple,12 Days,7.0
2,1 Family Meeple,7 Wonders,6.5
3,1 Family Meeple,A Column of Fire,5.0
4,1 Family Meeple,A Feast for Odin,10.0


**User-based Collaborative Recommender**

In [6]:
# User-based pivot table
user_pivot = pd.pivot_table(df_rv, index='user', columns='name', values='rating')
user_pivot

name,"...and then, we held hands.",...und tschüss!,10 Days in Africa,10 Days in Asia,10 Days in Europe,10 Days in the Americas,10 Days in the USA,10 Minute Heist: The Wizard's Tower,10' to Kill,1000 Blank White Cards,...,Zooloretto Junior,Zooloretto: The Dice Game,Zoowaboo,Zug um Zug: Deutschland,Zulus on the Ramparts!: The Battle of Rorke's Drift – Second Edition,ZÈRTZ,[redacted],duck! duck! Go!,ebbes,iKNOW
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1 Family Meeple,,,,,4.1,,,,,,...,,,,,,,6.5,4.0,,
1000rpm,,,,,7.0,,,,,,...,,7.0,,,,,,,,
28green,,,,,7.0,,7.0,,,,...,,,,,,,,,,
42amu,,,,,,,,,8.0,,...,,,,,,,,,,
549sd,,,,,,,6.0,,,,...,,,,,,,5.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zsknight,,,,,,,,,,,...,,,,,,,,,6.2,
zumba,,,4.0,,,,,,,,...,,,6.0,,,7.0,,4.0,6.0,
zumikon,,8.0,6.5,,,,7.0,,,,...,,7.5,,,,,7.5,,,
zunnesteke,,,,,,,,,,,...,,,,,,7.0,,,,


In [10]:
# Sparse matrix
sparse_user_pivot = sparse.csr_matrix(user_pivot.fillna(0))

# Convert type to save memory
sparse_user_pivot = sparse_user_pivot.astype(np.float32)

sparse_user_pivot

<3067x4582 sparse matrix of type '<class 'numpy.float32'>'
	with 2018715 stored elements in Compressed Sparse Row format>

In [13]:
# Similarity matrix
user_similarities = cosine_similarity(sparse_user_pivot)
user_similarities

array([[1.0000013 , 0.35132778, 0.44122967, ..., 0.33333313, 0.32253665,
        0.40138492],
       [0.35132778, 1.000003  , 0.38480732, ..., 0.358057  , 0.28843167,
        0.3961942 ],
       [0.44122967, 0.38480732, 0.9999994 , ..., 0.32477707, 0.28985673,
        0.45373744],
       ...,
       [0.33333313, 0.358057  , 0.32477707, ..., 1.0000007 , 0.460744  ,
        0.306209  ],
       [0.32253665, 0.28843167, 0.28985673, ..., 0.460744  , 1.0000017 ,
        0.2733309 ],
       [0.40138492, 0.3961942 , 0.45373744, ..., 0.306209  , 0.2733309 ,
        1.0000002 ]], dtype=float32)

In [14]:
# Use it as a dataframe
user_cf_df = pd.DataFrame(user_similarities, index=user_pivot.index, columns=user_pivot.index)
user_cf_df.head()

user,1 Family Meeple,1000rpm,28green,42amu,549sd,AHforever,AJBrandon,ASSaali,Aarkas,AaronBE85,...,zhiwiller,zizishaoye,zodball,zottirgen,zpark999,zsknight,zumba,zumikon,zunnesteke,zuzusdad
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1 Family Meeple,1.000001,0.351328,0.44123,0.463583,0.431945,0.327051,0.307018,0.348769,0.365398,0.422336,...,0.413991,0.387728,0.435491,0.427697,0.428628,0.434545,0.264001,0.333333,0.322537,0.401385
1000rpm,0.351328,1.000003,0.384807,0.31487,0.343502,0.15342,0.32759,0.341947,0.277185,0.328638,...,0.287583,0.364414,0.254238,0.306647,0.342873,0.345588,0.347187,0.358057,0.288432,0.396194
28green,0.44123,0.384807,0.999999,0.414176,0.423904,0.287099,0.382935,0.345915,0.311133,0.390536,...,0.454744,0.414215,0.372633,0.391947,0.457808,0.396022,0.272193,0.324777,0.289857,0.453737
42amu,0.463583,0.31487,0.414176,0.999997,0.449813,0.357573,0.294944,0.381349,0.405254,0.469776,...,0.408089,0.397024,0.433965,0.445356,0.444515,0.464872,0.300592,0.386128,0.412055,0.370331
549sd,0.431945,0.343502,0.423904,0.449813,0.999995,0.348759,0.383475,0.390232,0.403238,0.501875,...,0.475182,0.473047,0.448085,0.474304,0.544098,0.479252,0.310789,0.365346,0.389563,0.378417


In [16]:
# Similar users scores
user_input = '1 Family Meeple'
user_sim = user_cf_df[user_input].drop(user_input)
user_sim = user_sim[user_sim > 0].sort_values(ascending=False)
user_sim

user
forgotmypencil     0.542529
Dugrex56           0.529538
Wiegrief           0.526229
StoryBoardGamer    0.518482
Throat_Rip         0.513059
                     ...   
pmnj               0.172094
StoneR             0.165965
Superfly3          0.165069
Biscotti           0.147669
average_joe        0.100825
Name: 1 Family Meeple, Length: 3066, dtype: float32

In [17]:
# Turn the similarity scores into weights
user_weight = user_sim.values / np.sum(user_sim)
user_weight

array([4.8418189e-04, 4.7258765e-04, 4.6963498e-04, ..., 1.4731647e-04,
       1.3178788e-04, 8.9982015e-05], dtype=float32)

In [19]:
# Ratings for board games by users
user_ratings = user_pivot.T
user_ratings.head()

user,1 Family Meeple,1000rpm,28green,42amu,549sd,AHforever,AJBrandon,ASSaali,Aarkas,AaronBE85,...,zhiwiller,zizishaoye,zodball,zottirgen,zpark999,zsknight,zumba,zumikon,zunnesteke,zuzusdad
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"...and then, we held hands.",,,,,,,,,,,...,,,,,,,,,,
...und tschüss!,,,,,,,,,,,...,,,,,,,,8.0,,
10 Days in Africa,,,,,,,,,,,...,,7.3,,,,,4.0,6.5,,
10 Days in Asia,,,,,,,,,,,...,,7.6,,,,,,,,
10 Days in Europe,4.1,7.0,7.0,,,,,,,,...,,7.6,,,,,,,,


In [20]:
# Board games that user has not rated
# Also, drop the user himself
ratings = user_ratings[user_ratings[user_input].isnull()]
ratings = ratings.loc[:, user_sim.index]
ratings

user,forgotmypencil,Dugrex56,Wiegrief,StoryBoardGamer,Throat_Rip,reformedkenny,oatmeal1201,chadams,Reine Beth,helloworldmy,...,khronosTdG,malloc,heli,ecoboardgeek123,Ludo le gars,pmnj,StoneR,Superfly3,Biscotti,average_joe
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"...and then, we held hands.",,,,6.0,,6.5,,,,,...,,,,,,,,,,
...und tschüss!,,,,,,,,,,,...,,,5.0,,7.5,4.0,8.0,,,
10 Days in Africa,7.0,,,,,6.5,,,,,...,,5.0,7.0,,,,6.0,,,6.65892
10 Days in Asia,,,,,,,,,,,...,,,,,,,6.0,,,
10 Days in the Americas,,,,,,,,,,,...,,,,,,,7.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zug um Zug: Deutschland,,,,,,,,,,,...,,,,,,,,,,
Zulus on the Ramparts!: The Battle of Rorke's Drift – Second Edition,,,,,,,,,,,...,,,,,,,,,,
ZÈRTZ,,,,,,,,,,,...,,,6.0,,6.5,,6.0,8.0,1.0,7.57743
ebbes,,,,,,,,,,,...,,,6.0,,,,,,,


In [27]:
# Predicted ratings
pred_user_ratings = np.dot(ratings.fillna(0), user_weight)

# Observe in dataframe, top 20 recommendations
top_20_rec = pd.DataFrame(pred_user_ratings, index=ratings.index.astype(str), columns=[user_input]).sort_values(by=user_input, ascending=False).head(20)
top_20_rec

Unnamed: 0_level_0,1 Family Meeple
name,Unnamed: 1_level_1
7 Wonders Duel,6.794839
Lost Cities,6.009316
Dixit,5.990938
El Grande,5.877991
Russian Railroads,5.707936
Alhambra,5.597668
Tigris & Euphrates,5.490876
Pandemic Legacy: Season 1,5.297904
Twilight Struggle,5.286731
The Crew: The Quest for Planet Nine,5.101501


In [26]:
# View board games which user had already rated
user_rated_games = user_ratings[[user_input]]
user_rated_games.index = user_ratings.index.astype(str)
user_rated_games.sort_values(by=user_input, ascending=False).head(20)

user,1 Family Meeple
name,Unnamed: 1_level_1
Cottage Garden,10.0
A Feast for Odin,10.0
Concordia,10.0
Rajas of the Ganges,10.0
Rajas of the Ganges: The Dice Charmers,10.0
Coimbra,9.5
Yokohama,9.5
Terraforming Mars,9.5
Rococo,9.0
Heaven & Ale,9.0


**Item-based Collaborative Recommender**

In [38]:
# User-based pivot table
item_pivot = pd.pivot_table(df_rv, index='name', columns='user', values='rating')
item_pivot

user,1 Family Meeple,1000rpm,28green,42amu,549sd,AHforever,AJBrandon,ASSaali,Aarkas,AaronBE85,...,zhiwiller,zizishaoye,zodball,zottirgen,zpark999,zsknight,zumba,zumikon,zunnesteke,zuzusdad
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"...and then, we held hands.",,,,,,,,,,,...,,,,,,,,,,
...und tschüss!,,,,,,,,,,,...,,,,,,,,8.0,,
10 Days in Africa,,,,,,,,,,,...,,7.3,,,,,4.0,6.5,,
10 Days in Asia,,,,,,,,,,,...,,7.6,,,,,,,,
10 Days in Europe,4.1,7.0,7.0,,,,,,,,...,,7.6,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZÈRTZ,,,,,,,,,,,...,8.0,8.0,,,,,7.0,,7.0,
[redacted],6.5,,,,5.0,,,,,,...,5.0,,,,,,,7.5,,
duck! duck! Go!,4.0,,,,,,,,,,...,,,,,,,4.0,,,
ebbes,,,,,,,,,,,...,,,,,,6.2,6.0,,,


In [39]:
# Sparse matrix
sparse_item_pivot = sparse.csr_matrix(item_pivot.fillna(0))

# Similarity matrix
item_similarities = cosine_similarity(sparse_item_pivot)

# Use it as a dataframe
item_cf_df = pd.DataFrame(item_similarities, index=item_pivot.index, columns=item_pivot.index)
item_cf_df.head()

name,"...and then, we held hands.",...und tschüss!,10 Days in Africa,10 Days in Asia,10 Days in Europe,10 Days in the Americas,10 Days in the USA,10 Minute Heist: The Wizard's Tower,10' to Kill,1000 Blank White Cards,...,Zooloretto Junior,Zooloretto: The Dice Game,Zoowaboo,Zug um Zug: Deutschland,Zulus on the Ramparts!: The Battle of Rorke's Drift – Second Edition,ZÈRTZ,[redacted],duck! duck! Go!,ebbes,iKNOW
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"...and then, we held hands.",1.0,0.080115,0.158781,0.108188,0.140938,0.108727,0.127127,0.147714,0.141141,0.061741,...,0.075312,0.187834,0.076062,0.086458,0.098297,0.185706,0.165226,0.095863,0.127687,0.08003
...und tschüss!,0.080115,1.0,0.164283,0.135121,0.228677,0.106158,0.131682,0.073456,0.054965,0.035974,...,0.121544,0.175149,0.139251,0.096781,0.03448,0.17288,0.065776,0.166939,0.199616,0.05904
10 Days in Africa,0.158781,0.164283,1.0,0.584768,0.551947,0.481432,0.553647,0.096274,0.070299,0.134278,...,0.1753,0.268873,0.173794,0.063029,0.116536,0.336803,0.089493,0.284904,0.09823,0.087955
10 Days in Asia,0.108188,0.135121,0.584768,1.0,0.541234,0.591203,0.509913,0.078295,0.052857,0.098025,...,0.15774,0.237408,0.146475,0.068021,0.09446,0.272813,0.054192,0.252753,0.079539,0.076176
10 Days in Europe,0.140938,0.228677,0.551947,0.541234,1.0,0.474869,0.520611,0.148301,0.101523,0.097216,...,0.169539,0.290477,0.212485,0.097823,0.074929,0.33866,0.089756,0.235569,0.140514,0.14772


In [40]:
# Top 20 similar board games
item_input = 'Wingspan'
item_sim = item_cf_df[item_input]
item_sim.index = item_cf_df.index.astype(str)
item_sim = item_sim[item_sim > 0].drop(item_input)
item_sim.sort_values(ascending=False).head(20)

name
Azul                            0.889526
7 Wonders                       0.878789
Terraforming Mars               0.877635
Splendor                        0.872865
Codenames                       0.870020
The Castles of Burgundy         0.866457
Pandemic                        0.865538
7 Wonders Duel                  0.864552
Five Tribes                     0.856681
Dominion                        0.855331
Stone Age                       0.854436
Carcassonne                     0.853066
Patchwork                       0.852914
Scythe                          0.852165
Love Letter                     0.850472
Kingdomino                      0.845257
Sagrada                         0.842427
Tzolk'in: The Mayan Calendar    0.842323
The Quacks of Quedlinburg       0.840583
King of Tokyo                   0.840457
Name: Wingspan, dtype: float64