# Simple Recommender by Content Based Filtering

Use TfidfVectorizer with cosine similarity to create recommendations based on the game's description

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df  = pd.read_csv('C:/Users/John/Documents/LHL Lecture Material/boardgame-recommender/data/games.csv')
df.head()

Unnamed: 0,BGGId,Name,Description,YearPublished,GameWeight,AvgRating,BayesAvgRating,StdDev,MinPlayers,MaxPlayers,...,Rank:partygames,Rank:childrensgames,Cat:Thematic,Cat:Strategy,Cat:War,Cat:Family,Cat:CGS,Cat:Abstract,Cat:Party,Cat:Childrens
0,1,Die Macher,die macher game seven sequential political rac...,1986,4.3206,7.61428,7.10363,1.57979,3,5,...,21926,21926,0,1,0,0,0,0,0,0
1,2,Dragonmaster,dragonmaster tricktaking card game base old ga...,1981,1.963,6.64537,5.78447,1.4544,3,4,...,21926,21926,0,1,0,0,0,0,0,0
2,3,Samurai,samurai set medieval japan player compete gain...,1998,2.4859,7.45601,7.23994,1.18227,2,4,...,21926,21926,0,1,0,0,0,0,0,0
3,4,Tal der Könige,triangular box luxurious large block tal der k...,1992,2.6667,6.60006,5.67954,1.23129,2,4,...,21926,21926,0,0,0,0,0,0,0,0
4,5,Acquire,acquire player strategically invest business t...,1964,2.5031,7.33861,7.14189,1.33583,2,6,...,21926,21926,0,1,0,0,0,0,0,0


In [13]:
game_df = df[['BGGId','Name','Description']].copy()
game_df.head()

Unnamed: 0,BGGId,Name,Description
0,1,Die Macher,die macher game seven sequential political rac...
1,2,Dragonmaster,dragonmaster tricktaking card game base old ga...
2,3,Samurai,samurai set medieval japan player compete gain...
3,4,Tal der Könige,triangular box luxurious large block tal der k...
4,5,Acquire,acquire player strategically invest business t...


In [14]:
game_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21925 entries, 0 to 21924
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   BGGId        21925 non-null  int64 
 1   Name         21925 non-null  object
 2   Description  21924 non-null  object
dtypes: int64(1), object(2)
memory usage: 514.0+ KB


In [15]:
game_df = game_df.dropna()
game_df

Unnamed: 0,BGGId,Name,Description
0,1,Die Macher,die macher game seven sequential political rac...
1,2,Dragonmaster,dragonmaster tricktaking card game base old ga...
2,3,Samurai,samurai set medieval japan player compete gain...
3,4,Tal der Könige,triangular box luxurious large block tal der k...
4,5,Acquire,acquire player strategically invest business t...
...,...,...,...
21920,347146,Salvage,oil tanker fire rescue team send deal damage...
21921,347521,Blitzkrieg!: World War Two in 20 Minutes,new square edition include nippon expansion up...
21922,348955,Rock Paper Scissors: Deluxe Edition,million year people force play timehonored gam...
21923,349131,Splitter,splitter group number score point mdash s s md...


In [28]:
game_df.iloc[0]

BGGId                                                          1
Description    die macher game seven sequential political rac...
Name: 0, dtype: object

In [16]:
def cosine_results(df):
    """
    Takes in a dataframe and returns the cosine similarities between all the game_ids.

    Parameters:
        df(pandas dataframe): dataframe with boardgame_id and description of each game
    
    Returns:
        results(dictionary): dictionary with values in order of highest cosine similarity
    """
    vectorizer = TfidfVectorizer(stop_words='english')
    vec_matrix = vectorizer.fit_transform(df['Description'])
    cosine_sim = cosine_similarity(vec_matrix, vec_matrix)
    
    idx_results = {}
    for i,value in enumerate(cosine_sim):
        #For each index, grab the BGGId and create a dictionary with the BGGId as the key
        idx_results[df['BGGId'].iloc[i]] = list(value.argsort()[::-1])
    return idx_results


In [7]:
cos_res = cosine_results(game_df)

In [17]:
def recommender(game_id, count):
    """ 
    Given the game_id and the count, it will return the count number of game_id that is 
    most similar to it
    
    Parameters: 
        game_id(int): the boardgamegeek game_id
        count(int): an integer

    Returns:
        results(list(str)): List of boardgames names
    """
    cos_res = cosine_results(game_df)
    top_5 = cos_res[game_id][1:count+1]
    game_list = [df['Name'].iloc[game] for game in top_5]
    return game_list

In [27]:
recommender(30549,5)

['Pandemic Legacy: Season 1',
 'Pandemic: The Cure',
 'Pandemic: Contagion',
 'Infection',
 'Carcassonne für 2']

In [26]:
game_df[game_df['Name'] == 'Pandemic']

Unnamed: 0,BGGId,Name,Description
7919,30549,Pandemic,pandemic virulent disease break simultaneously...


In [33]:
game_df[game_df['Name'] == 'Pandemic Legacy: Season 1']['Description'].values[0]

'pandemic legacy cooperative campaign game overarch storyarc play   session depend group game beginning game start similar basic pandemic team diseasefighte specialist race clock travel world treat disease hotspot research cure plague handdure player turn action available travel world way need discard card build structure like research station treat disease remove cube board cube color remove disease eradicate trade card player find cure disease require card color discard research station player unique role special ability help actionsafter player take action draw card card include epidemic card place new disease cube board lead outbreak spread disease cube outbreak additionally increase panic level city make city expensive travel toeach month game chance achieve month objective succeed win immediately month fail second chance funding beneficial event cardsdure campaign new rule component introduce require permanently alter component game include write card rip card place permanent sti

In [34]:
game_df[game_df['Name'] == 'Pandemic: The Cure']['Description'].values[0]

'pandemic cure dicebased version popular pandemic board game set minute play   minute board game disease threaten world team save humanity team world hotspot check break control research cure plaguesplayer roll dice turn determine action available fly sail major population center world treat disease current region collect sample study exchange knowledge help goal discover cure player take different role unique set dice ability mdash player advantage specialization hope win game dispatcher example spend dice fly board medic particularly adept treat disease player roll dice like time reroll perfect turn likely epidemic occurat end turn new quotinfection dicequot roll determine type location newly infect population region board infect dice give color outbreak occur spread disease adjacent region outbreak place people infect rate infection get high player lose player discover cure disease win humanity savedpart pandemic series'

In [35]:
game_df[game_df['Name'] == 'Pandemic: Contagion']['Description'].values[0]

'year try defeat disease threaten mankindrsquo existence table turn   disease guess   curein pandemic contagion compete disease fellow player eliminate humanity cure wipe human civilization come deadly disease know man rest peacepandemic contagion include deck contagion card differ color border city card similar colored border eventwho card player disease card disease cube turn possible action action twice action      draw contagion card equal current incubation rate      advance mutation level      spread disease equal current transmission ratewhen number disease equal cityrsquos population number city wipe   player place final disease earn special action player disease score population number small number award second thirdmost diseasesyour disease card mutation incubation number card draw transmission number disease cube spread resistance number cubescard protect action advance mutation level discard require number cardsevent card draw start round affect turn benefit card negative i

In [36]:
game_df[game_df['Name'] == 'Infection']['Description'].values[0]

'player roll dice race board catch disease try cure     catch disease leave public place pass disease player cure   medical station medicaid christmas bonus lottery help pay   bill diagnose disease info center wrong catch   diseasedisease card provide wealth information color code indicate severity   basic disease major communicable   disease card include   common latin cause   symptom treatment    multiple medical   library medical professional   consult insure accuracy informationtreatment option price   vary large range visit voodoo   doctor cheap way result unpredictable   intensive care center good service expensive place cure'

In [37]:
game_df[game_df['Name'] == 'Carcassonne für 2']['Description'].values[0]

'carcassonne tileplacement game player draw place tile piece southern french landscape tile feature city road cloister grassland combination thereof place adjacent tile play way city connect city road road etcetera having place tile player decide place meeple area city knight road robber cloister monk grass farmer area complete meeple score point ownerdure game carcassonne player face decision like quotis worth put meeple therequot quotshould use tile expand city place near opponent instead give hard time complete project score pointsquot player place tile option place meeple turn proceed quickly game option possibilitiescarcassonne fuumlr   twoplayer version carcassonne come metal box contain   tile   meeple'