In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Example DataFrame
df = pd.read_csv('details.csv')

In [4]:
# Split the 'categories' column and explode it into separate rows
split_categories = df['categories'].str.split(' | ').explode()

# Remove leading/trailing whitespace and get unique categories
unique_categories = split_categories.str.strip().unique()

# Convert the result into a list
unique_category_list = list(unique_categories)

In [6]:
# Assuming df is your DataFrame and it has a 'categories' column
df['categories'] = df['categories'].fillna('No Category').apply(lambda x: x.split('|'))

# Explode the categories into separate rows for each category per game
split_categories = df.explode('categories')

# Remove leading/trailing whitespace in categories
split_categories['categories'] = split_categories['categories'].str.strip()

# Get unique categories (no need to explicitly convert to list for MultiLabelBinarizer)
unique_categories = split_categories['categories'].unique()

In [8]:
# Reset 'categories' column to lists for MultiLabelBinarizer compatibility
df['categories'] = df['categories'].apply(lambda categories: [category.strip() for category in categories])

# Initialize and fit the MultiLabelBinarizer
mlb = MultiLabelBinarizer()
categories_matrix = mlb.fit_transform(df['categories'])

# Create a DataFrame for the encoded categories
categories_df = pd.DataFrame(categories_matrix, columns=mlb.classes_)


# Calculate cosine similarity matrix
cosine_sim = cosine_similarity(categories_df)

In [30]:
def recommend_games_by_categories(title, df, cosine_sim):
    # Get the index of the game that matches the title
    idx = df.index[df['full_title'] == title].tolist()[0]

    if df.iloc[idx]['categories'] == ['No Category']:
        print(f"No recommendations for '{title}' as it has no specific categories.")
        return []
    
    # Get the pairwise similarity scores of all games with that game
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the games based on the similarity scores
    sim_scores_sorted = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the 10 most similar games, excluding the first one if it's the game itself
    game_indices = [i[0] for i in sim_scores_sorted[1:11]]

    # Return the top 10 most similar games
    return df['full_title'].iloc[game_indices]


In [35]:
print(recommend_games_by_categories('Babel (2013)', df, cosine_sim))
print("=============")
print(recommend_games_by_categories('Marvel United (2020)', df, cosine_sim))

5699                                          Babel (2013)
14689                                  Gloria Mundi (2006)
2554                                     Antique II (2014)
5716                                     Antique II (2014)
7724                                     Alba longa (2011)
705      Alien Frontiers Factions: Definitive Edition (...
706      Carcassonne - Extension 6 : Comte, Roi & Briga...
888               Airlines - Golden Age of Aviation (2016)
935                                        Peak Oil (2017)
956                                  Captain Silver (2017)
Name: full_title, dtype: object
No recommendations for 'Marvel United (2020)' as it has no specific categories.
[]


In [44]:
df.loc[df['titre']=='Babel']

Unnamed: 0,_id,categories,casting,gameplay,description,full_title,titre,url,Note,Note rectifiée,Note Finkel,Nombre d'avis,Nombre d'avis rectifiés
2611,63C57C9D24FD0FD5FC6B9E6B,"[Antiquité, Jeux de plateau, Immobilier]",Par Uwe Rosenberg et Hagen DorgathenIllustré p...,2 |12 ans et +|45,\n Construisez la tour de...,Babel (2013),Babel,https://www.trictrac.net/jeu-de-societe/babel-0,7.55,7.31,10.49,172.0,138.0
5699,63C57F2F24FD0FD5FC6BAA7B,"[Antiquité, Jeux de plateau, Immobilier]",Par Uwe Rosenberg et Hagen DorgathenIllustré p...,2 |12 ans et +|45,\n Construisez la tour de...,Babel (2013),Babel,https://www.trictrac.net/jeu-de-societe/babel-0,7.55,7.31,10.49,172.0,138.0


In [42]:
df.loc[df['titre']=='Antique II']

Unnamed: 0,_id,categories,casting,gameplay,description,full_title,titre,url,Note,Note rectifiée,Note Finkel,Nombre d'avis,Nombre d'avis rectifiés
2554,63C57C9124FD0FD5FC6B9E32,"[Antiquité, Jeux de plateau, Guerre]",Par Walther Mac GerdtsIllustré par Marina Fahr...,3 à 6|13 ans et +|120,\n Antike II est un jeu d...,Antique II (2014),Antique II,https://www.trictrac.net/jeu-de-societe/antiqu...,9.0,8.0,8.03,2.0,1.0
5716,63C57F3224FD0FD5FC6BAA8C,"[Antiquité, Jeux de plateau, Guerre]",Par Walther Mac GerdtsIllustré par Marina Fahr...,3 à 6|13 ans et +|120,\n Antike II est un jeu d...,Antique II (2014),Antique II,https://www.trictrac.net/jeu-de-societe/antiqu...,9.0,8.0,8.03,2.0,1.0


In [36]:
print(df[['titre','categories']].loc[df['titre']=='Babel'])
print(df[['titre','categories']].loc[df['titre']=='Antique II'])
print(df[['titre','categories']].loc[df['titre']=='Alba longa'])

      titre                                categories
2611  Babel  [Antiquité, Jeux de plateau, Immobilier]
5699  Babel  [Antiquité, Jeux de plateau, Immobilier]
           titre                            categories
2554  Antique II  [Antiquité, Jeux de plateau, Guerre]
5716  Antique II  [Antiquité, Jeux de plateau, Guerre]
           titre                                categories
7724  Alba longa  [Antiquité, Jeux de plateau, Historique]


In [22]:
df.head()

Unnamed: 0,_id,categories,casting,gameplay,description,full_title,titre,url,Note,Note rectifiée,Note Finkel,Nombre d'avis,Nombre d'avis rectifiés
0,63C5790A08B9FB9FD8F26EC8,[No Category],Par Eric Lang et Andrea ChiarvesioIllustré par...,1 à 4|14 ans et +|45,\n Marvel United : Unis c...,Marvel United (2020),Marvel United,https://www.trictrac.net/jeu-de-societe/marvel...,7.58,8.4,8.74,19.0,10.0
1,63C5790A08B9FB9FD8F26EC9,[No Category],Par Elizabeth HargraveIllustré par Matt Paquet...,2 à 5|14 ans et +|60,"\n Chaque année, au print...",Mariposas (2020),Mariposas,https://www.trictrac.net/jeu-de-societe/mariposas,7.39,7.29,7.68,33.0,17.0
2,63C5790B08B9FB9FD8F26ECA,"[Enchères, Aventures dans la jungle, Collectio...",Par Alexandre GarciaIllustré par Nastya LehnÉd...,2 à 5|8 ans et +|45,\n Vous êtes à la tête d’...,Quetzal (2020),Quetzal,https://www.trictrac.net/jeu-de-societe/quetzal,8.18,7.22,7.42,28.0,9.0
3,63C5790B08B9FB9FD8F26ECB,[No Category],Édité par Arkhane Asylum Publishing,à partir de 1|12 ans et +|~,\n Un Kit de démarrage co...,Alien - Kit de Démarrage (2020),Alien - Kit de Démarrage,https://www.trictrac.net/jeu-de-societe/alien-...,9.0,0.0,0.0,4.0,0.0
4,63C5790B08B9FB9FD8F26ECC,"[Card-driven, Combinaison, Coopération, Deck b...",Par Maxime Rambourg et Théo RivièreIllustré pa...,1 à 4|12 ans et +|60,\n Le Dr Foo a créé une t...,The LOOP (2020),The LOOP,https://www.trictrac.net/jeu-de-societe/the-loop,8.75,8.79,9.7,72.0,24.0
