In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.corpus import stopwords
import string
import random

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/martin/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/martin/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
df = pd.read_csv('details.csv')
df_avis = pd.read_csv('avis.csv')

In [4]:
stop_words = set(stopwords.words('french'))

def preprocess(text):
    text = text.lower()
    text = ''.join([char for char in text if char not in string.punctuation])
    tokens = nltk.word_tokenize(text, language='french')
    tokens = [word for word in tokens if word not in stop_words]
    return ' '.join(tokens)

df['processed_description'] = df['description'].apply(preprocess)

In [5]:
# Feature Extraction
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df['processed_description'])

# Similarity Measure
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [15]:
# Recommender Function
def recommend_games(title, df, cosine_sim):
    # Get the index of the game that matches the title
    idx = df.index[df['titre'] == title].tolist()[0]

    # Get the pairwise similarity scores of all games with that game
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the games based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar games
    sim_scores = sim_scores[1:21]

    # Get the game indices
    game_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar games
    return df['titre'].iloc[game_indices]

In [7]:
df_avis.head()

Unnamed: 0,_id,author,date_published,title_review,note,review_href,title,url,treated,comment
0,63C6726EA81F3E3016AF9A56,Monsieur Guillaume,2021-01-27 11:06:44,Voyages sur les ailes des papillons,8,"""https://www.trictrac.net/avis/vraiment-bon-12...",Mariposas,https://www.trictrac.net/jeu-de-societe/maripo...,1,"Lorsque le jeu est jeu, bon, réflexif, joli po..."
1,63C6726EA81F3E3016AF9A57,morlockbob,2020-10-18 10:04:21,le festival de Michoacan,7,"""https://www.trictrac.net/avis/le-festival-de-...",Mariposas,https://www.trictrac.net/jeu-de-societe/maripo...,1,Comment continuer après un mega hit ? Simpleme...
2,63C6726EA81F3E3016AF9A58,SwatSh,2021-02-01 08:35:08,Vivez la migration des monarques,7,"""https://www.trictrac.net/avis/vivez-la-migrat...",Mariposas,https://www.trictrac.net/jeu-de-societe/maripo...,1,"Vin d'jeu: Avec Mariposas, Elizabeth Hargrave ..."
3,63C6726EA81F3E3016AF9A59,Timi JeuxATheme,2020-11-19 17:04:57,Bon,8,,Mariposas,https://www.trictrac.net/jeu-de-societe/maripo...,1,
4,63C6726EA81F3E3016AF9A5A,prunelles,2021-05-24 12:43:18,Envolez-moi,9,"""https://www.trictrac.net/avis/envolez-moi""",Mariposas,https://www.trictrac.net/jeu-de-societe/maripo...,1,"Très joli bijou que ce jeu-là ! Le matériel, l..."


In [8]:
recommend_games('123Goal',df,cosine_sim)

15558           Bohnkick
436              Ballons
3478             Ballons
18966            Ulysses
9897            Cartobut
15575       Soccer Chess
18050     Orcs et Trolls
18905    Avis de Tempête
15585          Tipp-Kick
18560             Finale
Name: titre, dtype: object

In [92]:
def recommend(username, df_avis):
    # Retrieve games and ratings reviewed by the user
    df_avis_username = df_avis[['title', 'note']].loc[df_avis['author'] == username]

    # Initialize weighted recommendations with all game titles and zero weights
    weighted_recommendations = {game: 0 for game in df['titre'].unique()}

    # Iterate over the user's reviewed games
    for index, row in df_avis_username.iterrows():
        title, rating = row['title'], row['note']
        similar_games = recommend_games(title, df, cosine_sim)

        # Weighting similar games by the user's rating
        for sim_game in similar_games:
            weighted_recommendations[sim_game] += rating

    # Remove any games the user has already reviewed
    reviewed_games = set(df_avis_username['title'])
    weighted_recommendations = {game: weight for game, weight in weighted_recommendations.items() if game not in reviewed_games}

    # Sort recommendations based on weights
    sorted_recommendations = sorted(weighted_recommendations, key=weighted_recommendations.get, reverse=True)

    return sorted_recommendations[:10]

In [10]:
recommend('Monsieur Guillaume', df_avis)

['Le Dilemme du Roi',
 'Battle Line Médiéval',
 'Bluffer',
 'Zombie Kidz Évolution',
 'Small Detectives',
 'Ruse',
 'Munchkin Donjon',
 'Wacondah',
 'Kabuki',
 "Zombie 15'"]

In [30]:
def test_recommendation_accuracy_random(username, df_avis, test_size=1, threshold_rating=8):
    # Retrieve games and ratings reviewed by the user
    user_ratings = df_avis[['title', 'note']].loc[df_avis['author'] == username]

    # Filter out highly rated games for testing
    high_rated_games = user_ratings[user_ratings['note'] >= threshold_rating]
    test_games = high_rated_games.sample(n=test_size)
    
    # Remaining games for training
    training_games = user_ratings.drop(test_games.index)

    # Create a temporary DataFrame for training
    temp_df_avis = df_avis.copy()
    temp_df_avis.loc[df_avis['author'] == username] = training_games

    # Generate recommendations using the temporary DataFrame
    recommendations = recommend(username, temp_df_avis)

    # Check if test games are in the recommendations
    hits = sum(game in recommendations for game in test_games['title'])
    accuracy = hits / test_size

    #print(username, "a", len(high_rated_games), "avis avec une note supérieure ou égale à", threshold_rating)
    #print("On en retire",test_size, "et on regarde combien apparaissent dans les", len(recommendations), "recommendations.")
    #print("Il y en a ", hits)
    #print(accuracy)
    return accuracy

In [95]:
def test_recommendation_accuracy_moyenne(username, df_avis, test_size=1):
    # Retrieve games and ratings reviewed by the user
    user_ratings = df_avis[['title', 'note']].loc[df_avis['author'] == username]

    # Calculate the user-specific threshold rating as the median rating
    threshold_rating = user_ratings['note'].mean()

    # Filter out games rated above the user-specific threshold for testing
    high_rated_games = user_ratings[user_ratings['note'] > threshold_rating]
    if len(high_rated_games) < test_size:
        print(f"Not enough high-rated games above the threshold of {threshold_rating} for user {username}. Adjusting test size to {len(high_rated_games)}.")
        test_size = max(1, len(high_rated_games))  # Ensure at least one game is tested

    test_games = high_rated_games.sample(n=test_size)
    
    # Remaining games for training
    training_games = user_ratings.drop(test_games.index)

    # Create a temporary DataFrame for training
    temp_df_avis = df_avis.copy()
    temp_df_avis.loc[df_avis['author'] == username] = training_games

    # Generate recommendations using the temporary DataFrame
    recommendations = recommend(username, temp_df_avis)

    # Check if test games are in the recommendations
    hits = sum(game in recommendations for game in test_games['title'])
    accuracy = hits / test_size

    return accuracy


In [103]:
moyenne=0
for i in range(500):
    moyenne+=test_recommendation_accuracy_moyenne('Monsieur Guillaume',df_avis,10)
print(moyenne/500)

AttributeError: 'Series' object has no attribute 'median_low'

Modele random : 
- Sur 500 tests : 0.0564 de precision

Modele moyenne :
- Sur 500 tests : 0.0604 de precision

In [104]:
recommend('Monsieur Guillaume',df_avis)

['Deus',
 'Punto',
 'For Sale',
 'Le Dilemme du Roi',
 'Rise of Tribes - deluxe upgrade',
 'Battle Line Médiéval',
 'Targui',
 'Puls',
 'Tales of Arabian nights',
 'Crime Zoom - Sa dernière carte']

In [16]:
recommend_games('Marchands du Nord',df,cosine_sim)

411                            Marchands du Nord
3433                           Marchands du Nord
19414                                  Die Hanse
14074                 Great War At Sea : Jutland
17064                                      Kogge
19449                                     Kontor
14918                                     Bounty
18193                                    Tortuga
19842                                Störtebeker
1694                  Galions, Canons & Doublons
4804                  Galions, Canons & Doublons
9284                                   Merchants
10612                       The Kaiser's Pirates
7818                                Mundus Novus
8835     Metal Adventures - La prise & le profit
17891                      Kapitän Wackelpudding
17829                                      Hansa
17924                                      Tyrus
2366                      Race to the North Pole
4877                      Race to the North Pole
Name: titre, dtype: 