# Simple Recommender by Content Based Filtering

Use TfidfVectorizer with cosine similarity to create recommendations based on the game's description

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df  = pd.read_csv('C:/Users/John/Documents/LHL Lecture Material/boardgame-recommender/data/games.csv')
df.head()

Unnamed: 0,BGGId,Name,Description,YearPublished,GameWeight,AvgRating,BayesAvgRating,StdDev,MinPlayers,MaxPlayers,...,Rank:partygames,Rank:childrensgames,Cat:Thematic,Cat:Strategy,Cat:War,Cat:Family,Cat:CGS,Cat:Abstract,Cat:Party,Cat:Childrens
0,1,Die Macher,die macher game seven sequential political rac...,1986,4.3206,7.61428,7.10363,1.57979,3,5,...,21926,21926,0,1,0,0,0,0,0,0
1,2,Dragonmaster,dragonmaster tricktaking card game base old ga...,1981,1.963,6.64537,5.78447,1.4544,3,4,...,21926,21926,0,1,0,0,0,0,0,0
2,3,Samurai,samurai set medieval japan player compete gain...,1998,2.4859,7.45601,7.23994,1.18227,2,4,...,21926,21926,0,1,0,0,0,0,0,0
3,4,Tal der Könige,triangular box luxurious large block tal der k...,1992,2.6667,6.60006,5.67954,1.23129,2,4,...,21926,21926,0,0,0,0,0,0,0,0
4,5,Acquire,acquire player strategically invest business t...,1964,2.5031,7.33861,7.14189,1.33583,2,6,...,21926,21926,0,1,0,0,0,0,0,0


In [3]:
game_df = df[['BGGId','Name','Description']].copy()
game_df.head()

Unnamed: 0,BGGId,Name,Description
0,1,Die Macher,die macher game seven sequential political rac...
1,2,Dragonmaster,dragonmaster tricktaking card game base old ga...
2,3,Samurai,samurai set medieval japan player compete gain...
3,4,Tal der Könige,triangular box luxurious large block tal der k...
4,5,Acquire,acquire player strategically invest business t...


In [4]:
game_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21925 entries, 0 to 21924
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   BGGId        21925 non-null  int64 
 1   Name         21925 non-null  object
 2   Description  21924 non-null  object
dtypes: int64(1), object(2)
memory usage: 514.0+ KB


In [5]:
game_df = game_df.dropna()
game_df

Unnamed: 0,BGGId,Name,Description
0,1,Die Macher,die macher game seven sequential political rac...
1,2,Dragonmaster,dragonmaster tricktaking card game base old ga...
2,3,Samurai,samurai set medieval japan player compete gain...
3,4,Tal der Könige,triangular box luxurious large block tal der k...
4,5,Acquire,acquire player strategically invest business t...
...,...,...,...
21920,347146,Salvage,oil tanker fire rescue team send deal damage...
21921,347521,Blitzkrieg!: World War Two in 20 Minutes,new square edition include nippon expansion up...
21922,348955,Rock Paper Scissors: Deluxe Edition,million year people force play timehonored gam...
21923,349131,Splitter,splitter group number score point mdash s s md...


In [6]:
def cosine_results(df):
    """
    Takes in a dataframe and returns the cosine similarities between all the game_ids.

    Parameters:
        df(pandas dataframe): dataframe with boardgame_id and description of each game
    
    Returns:
        results(dictionary): dictionary with values in order of highest cosine similarity
    """
    vectorizer = TfidfVectorizer(stop_words='english')
    vec_matrix = vectorizer.fit_transform(df['Description'])
    cosine_sim = cosine_similarity(vec_matrix, vec_matrix)
    
    idx_results = {}
    for i,value in enumerate(cosine_sim):
        #For each index, grab the BGGId and create a dictionary with the BGGId as the key
        idx_results[df['BGGId'].iloc[i]] = list(value.argsort()[::-1])
    return idx_results


In [7]:
def recommender(game_id, count):
    """ 
    Given the game_id and the count, it will return the count number of game_id that is 
    most similar to it
    
    Parameters: 
        game_id(int): the boardgamegeek game_id
        count(int): an integer

    Returns:
        results(list(str)): List of boardgames names
    """
    cos_res = cosine_results(game_df)
    top_5 = cos_res[game_id][1:count+1]
    game_list = [df['Name'].iloc[game] for game in top_5]
    return game_list

In [18]:
recommender(157354,5)

['Four Tribes', 'Attila', 'Age of Gods', 'Trellis', "The Pirate's Flag"]

In [17]:
game_df[game_df['BGGId']==157354]

Unnamed: 0,BGGId,Name,Description
13409,157354,Five Tribes,cross land night caravan arrive fabled sulta...
