## Import libs

In [1]:
import pandas as pd
import numpy as np

from surprise import KNNWithMeans, KNNBasic
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
from surprise.model_selection import train_test_split

## Import data

In [8]:
bg = pd.read_feather('../Board-Game-Scraper/bgg_boardgames.feather')
ratings = pd.read_feather('./data/full/bgg_ratings_full.feather')
bg.boardgame_id = bg.boardgame_id.astype('int')
ratings = ratings[['nickname', 'boardgame_id', 'rating']].merge(bg[['boardgame_id', 'title']], 
                                                                on='boardgame_id', how='inner')[['nickname', 
                                                                                       'title', 'rating']]

In [50]:
dataset = pd.DataFrame({
    'nickname': ratings.nickname,
    'title': ratings.title,
    'rating': ratings.rating
})
dataset.head()

Unnamed: 0,nickname,title,rating
0,DiVo1975,Anachrony,9.0
1,ispeakmath,Anachrony,9.0
2,ahhdrjones,Anachrony,7.0
3,CowStriker,Anachrony,8.0
4,BrianNeuls,Anachrony,7.5


In [52]:
f'Number of ratings below 0.5: {len(dataset.rating[dataset.rating < 0.5])}'

'Number of ratings below 0.5: 19'

In [53]:
n_min_item = 500
n_min_users = n_min_item

# while (dataset.title.value_counts().min() < n_min_item) & (dataset.nickname.value_counts().min() < n_min_users):
#     print(len(dataset))
dataset = dataset[(dataset.nickname.isin(list(dataset.nickname.value_counts()[dataset.nickname.value_counts() >= n_min_users].reset_index().iloc[:,0]))) & 
                  (dataset.title.isin(list(dataset.title.value_counts()[dataset.title.value_counts() >= n_min_item].reset_index().iloc[:,0]))) & 
                  (dataset.rating >= 0.5)
                 ]
print(len(dataset))

3732584


In [22]:
dataset.title.value_counts().min()

33

In [54]:
dataset.nickname.value_counts()#[dataset.nickname.value_counts() >= 20].reset_index().iloc[:,0]

Doel            3767
leffe dubbel    3479
TomVasel        3401
Pandorzecza     3385
JasonSaastad    3057
                ... 
hornet69          38
OldDawg           33
mrbrown           33
gomattdawg        28
sidbits           19
Name: nickname, Length: 5923, dtype: int64

In [55]:
dataset.title.value_counts()#[dataset.title.value_counts() >= 20].reset_index().iloc[:,0]

Puerto Rico                                   5926
Love Letter                                   5643
Citadels                                      5607
Pandemic                                      5541
The Castles of Burgundy                       5479
                                              ... 
Aeon Trespass: Odyssey                          46
Settlers of Catan: Gallery Edition              43
Cosmic Encounter: 42nd Anniversary Edition      41
Dungeon Universalis                             35
Unstable Unicorns: NSFW Pack                    33
Name: title, Length: 5621, dtype: int64

In [56]:
dataset

Unnamed: 0,nickname,title,rating
9,asaarto,Anachrony,7.0
10,Daboom,Anachrony,8.0
11,bitatmoonl,Anachrony,7.5
16,benosteen,Anachrony,7.5
17,dasfungames,Anachrony,4.0
...,...,...,...
20184853,loopoocat,Grand Prix,4.0
20185075,loopoocat,Prosperity,5.0
20185211,loopoocat,Tally Ho!,1.0
20185276,loopoocat,Who Goes There?,2.0


In [49]:
dataset.rating.min(), dataset.rating.max()

(1.4013e-45, 10.0)

In [58]:
reader = Reader(rating_scale=(0.5, 10.0)) # 0.0 10.
data = Dataset.load_from_df(dataset, reader)

In [59]:
trainset, testset = train_test_split(data, test_size=0.2, random_state=1)

## User-based

In [60]:
algo = KNNWithMeans(k=50, sim_options={
    'name': 'cosine',
    'user_based': True  # compute  similarities between users
})
algo.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x2bd169e40>

In [61]:
test_pred = algo.test(testset)
accuracy.rmse(test_pred, verbose=True)

RMSE: 1.2012


1.201171255054754

## Item-based

In [62]:
algo = KNNWithMeans(k=50, sim_options={
    'name': 'cosine',
    'user_based': False  # compute similarities between items
})
algo.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x2e0439960>

In [63]:
test_pred = algo.test(testset)
accuracy.rmse(test_pred, verbose=True)

RMSE: 1.1850


1.1849620863381591

## Recommendations

In [109]:
def generate_recommendation(user, model, dataset, thresh=7, amount=10):
    all_titles = list(dataset['title'].values)
    users_played_titles = dataset[dataset['nickname'] == user]['nickname']
    titles = np.array(list(set(all_titles) - set(users_played_titles)))

    np.random.shuffle(titles)
    
    rec_list = []
    for title in titles:
        review_prediction = model.predict(uid=user, iid=title)
        rating = review_prediction.est

        if rating >= thresh:
            rec_list.append((title, round(rating, 2)))
            
            if len(rec_list) >= amount:
                return rec_list

In [108]:
dataset.nickname.iloc[11233]

'BeerAndBoard'

In [110]:
generate_recommendation('BeerAndBoard', algo, dataset)

[('Pax Pamir', 7.48),
 ('Canal Mania', 7.17),
 ('2nd Fleet: Modern Naval Combat in the North Atlantic', 7.45),
 ('The El Grande Expansions', 7.46),
 ('Mansions of Madness: Second Edition – Horrific Journeys: Expansion', 8.95),
 ('BattleLore: Goblin Skirmishers Specialist Pack', 7.48),
 ('Age of Empires III: The Age of Discovery', 7.1),
 ('Scythe: The Wind Gambit', 7.97),
 ('Race for the Galaxy: Alien Artifacts', 7.6),
 ('Solomon Kane', 7.85)]