In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
df = pd.read_csv('data/game_details.csv')
df.head()

In [6]:
# Create a TfidfVectorizer and Remove stopwords
tfidf = TfidfVectorizer(stop_words='english')
# Fit and transform the data to a tfidf matrix
tfidf_matrix = tfidf.fit_transform(df['description'])
# Print the shape of the tfidf_matrix
tfidf_matrix.shape

(27925, 82130)

In [7]:
# Compute the cosine similarity between each movie description
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [8]:
indices = pd.Series(df.index, index=df['name']).drop_duplicates()


In [9]:
def _get_recommendations(name, cosine_sim=cosine_sim, num_recommend = 10):
    idx = indices[name]
# Get the pairwsie similarity scores of all games with that game
    sim_scores = list(enumerate(cosine_sim[idx]))
# Sort the games based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
# Get the scores of the 10 most similar games
    top_similar = sim_scores[1:num_recommend+1]
# Get the game indices
    game_indices = [i[0] for i in top_similar]
# Return the top 10 most similar games
    return df['name'].iloc[game_indices]

In [10]:
game = 'Ticket to Ride'
test_recommendations = _get_recommendations(game, num_recommend = 50)

In [11]:
type(test_recommendations)

pandas.core.series.Series

In [12]:
def get_recommendations(game_title, number_of_recommendations = 10):
    games = _get_recommendations(game_title, num_recommend= number_of_recommendations)
    return df[df['name'].isin(games.to_list())].drop(columns=['id_y'])

In [13]:
get_recommendations(game, number_of_recommendations = 10)

Unnamed: 0,id_x,name,yearpublished_x,rank,bayesaverage,average,usersrated,is_expansion,abstracts_rank,cgs_rank,...,boardgameartists,stddev,median,owned,trading,wanting,wishing,numcomments,numweights,averageweight
170,14996,Ticket to Ride: Europe,2005,171,7.40188,7.52738,79683,0,,,...,"Cyrille Daujean,Julien Delval",1.1943,0,125802,878,702,6990,11033,4080,1.9184
773,202670,Ticket to Ride: Rails & Sails,2016,774,6.76061,7.43292,4895,0,,,...,"Cyrille Daujean,Julien Delval,Alan R. Moon",1.41691,0,12027,111,226,1529,837,101,2.4752
1075,276894,Ticket to Ride: London,2019,1076,6.57866,7.0719,5178,0,,,...,"Cyrille Daujean,Julien Delval",1.12365,0,13765,163,73,589,930,109,1.3486
1145,253284,Ticket to Ride: New York,2018,1146,6.54418,6.90194,7233,0,,,...,"Cyrille Daujean,Julien Delval,Alan R. Moon",1.16892,0,16624,244,80,578,1263,119,1.2605
2143,205125,Ticket to Ride: First Journey (U.S.),2016,2144,6.18732,6.85805,2523,0,,,...,"Cyrille Daujean,Jean-Baptiste Reynaud",1.24861,0,7893,182,62,422,698,47,1.4043
2146,218208,Ticket to Ride: First Journey (Europe),2017,2147,6.18549,7.01298,1994,0,,,...,"Cyrille Daujean,Alan R. Moon,Jean-Baptiste Rey...",1.17791,0,5250,62,55,448,395,55,1.5091
2586,119637,Zug um Zug: Deutschland,2012,2587,6.08248,7.52936,929,0,,,...,"Cyrille Daujean,Julien Delval",1.12016,0,1895,7,45,173,151,30,1.9
3769,34127,Ticket to Ride: The Card Game,2008,3770,5.88315,6.11908,4101,0,,,...,"Cyrille Daujean,Julien Delval,Alan R. Moon",1.44981,0,6716,250,78,390,1138,337,1.8754
6864,366488,Ticket to Ride: Ghost Train,2022,6865,5.65478,6.84295,312,0,,,...,,1.24787,0,1406,6,31,170,64,6,1.5
18594,244525,Ticket to Ride Demo,2017,18595,5.51728,6.90278,36,0,,,...,,2.09193,0,93,1,22,50,20,2,1.0


In [14]:
def get_group_recommendations(game_titles, number_of_recommendations = 10):
    for game_title in game_titles:
        recommended_games = _get_recommendations(game_title, num_recommend= number_of_recommendations)
    return df[df['name'].isin(recommended_games.to_list())].drop(columns=['id_y'])