In [1]:
import pandas as pd

In [2]:
data = pd.read_csv("BDD/avis_sans_outliers.csv")

In [3]:
# Drop unnecessary columns and handle missing values
data_cleaned = data.drop(columns=['Unnamed: 0', 'url', 'title_review', 'date_published'])
data_cleaned['comment'] = data_cleaned['comment'].fillna('')  # Fill missing comments

In [4]:
from surprise import Dataset, Reader, KNNWithZScore
from collections import defaultdict

# Prepare data for surprise
reader = Reader(rating_scale=(1, 10))  # Assuming rating scale is from 1 to 10
data_surprise = Dataset.load_from_df(data[['author', 'title', 'note']], reader)

# Build full trainset
trainset = data_surprise.build_full_trainset()

# Initialize KNN with Z-Score algorithm for user-based collaborative filtering
algo = KNNWithZScore(sim_options={'name': 'cosine', 'user_based': True}, k=20, min_k=1)
algo.fit(trainset)

def get_neighbors(user_id, game_title, k=20):
    # Retrieve inner ID of the user
    user_inner_id = trainset.to_inner_uid(user_id)
    
    # Retrieve inner ID of the game
    try:
        game_inner_id = trainset.to_inner_iid(game_title)
    except ValueError:
        return f"No data available for the game '{game_title}'."
    
    # Retrieve the k nearest neighbors of the user
    neighbors = algo.get_neighbors(user_inner_id, k)
    # Convert inner IDs of the neighbors back to raw IDs
    neighbors_ids = [trainset.to_raw_uid(inner_id) for inner_id in neighbors]
    print(neighbors_ids)
    # Filter the dataset to find the neighbors who have rated the specified game
    filtered_comments = data[(data['author'].isin(neighbors_ids)) & (data['title'] == game_title)]
    
    # Collect and return usernames and their comments on the specified game
    return filtered_comments[['author', 'comment']].values.tolist()


Computing the cosine similarity matrix...
Done computing similarity matrix.


In [6]:
neighbors_comments = get_neighbors('Monsieur Guillaume', 'Mariposas', k=20)
with open('output_comments.txt', 'w') as file:
    for c in neighbors_comments :
        file.write(str(c)+"\n")
        

['prunelles', 'Olfenw', 'Lilly', 'letroyenfou', 'Empédocle', 'Ricardo31', 'Player One', 'Sempre Sainté', 'grotesk', 'Olène', 'Greta', 'Patmol', 'Delorean', 'plumesdanges', 'Didi5962', 'Sirien', 'Pollo', 'Meeplejuice', 'Gido_L4', 'Tigragon22']


In [9]:
data[['title','comment']].loc[data['author']=='Greta']

Unnamed: 0,title,comment
32,Mysterium Park,J'ai eu l'occasion de jouer à des versions exp...
495,Punto,Un jeu à emporter partout !\n\nUn matériel cos...
2170,Skyjo,"J'adore ce jeu !\n\nA vrai dire, je l'avais ac..."
2771,ATLANTES,Les nombreux icônes sur les cartes et la capac...
3561,Draftosaurus,"Pour découvrir la mécanique de draft, rien de ..."
...,...,...
113609,Montgolfière,Ambiance assurée avec des petites vacheries en...
113661,Métro,J'aime bien pouvoir jouer à deux et Métro ne m...
114138,Jumpy Jack,Un jeu de pari hippique qui peut se jouer de 2...
115069,Bonjour Robert,Petit jeu d'ambiance sans prétention dans lequ...


In [15]:
data[data['comment'].isna()]

Unnamed: 0.1,Unnamed: 0,author,date_published,title_review,note,title,url,comment
3,3,Timi JeuxATheme,2020-11,Bon,8,Mariposas,https://www.trictrac.net/jeu-de-societe/maripo...,
7,7,PawnPawn,2021-05,Pas mauvais,6,Mariposas,https://www.trictrac.net/jeu-de-societe/maripo...,
8,8,M.PinK,2021-08,Bon,7,Mariposas,https://www.trictrac.net/jeu-de-societe/maripo...,
9,9,micmac85,2022-01,Bon,8,Mariposas,https://www.trictrac.net/jeu-de-societe/maripo...,
10,10,Max Riock,2020-11,Vraiment bon,9,Mariposas,https://www.trictrac.net/jeu-de-societe/maripo...,
...,...,...,...,...,...,...,...,...
117608,176123,Glatos,2022-06,"Ni bon, ni mauvais",5,Kill Bique,https://www.trictrac.net/jeu-de-societe/kill-b...,
117623,176139,L.S.G.,2016-02,Pas mauvais,6,22 Pommes,https://www.trictrac.net/jeu-de-societe/22-pom...,
117633,176149,Monsieur Julien,2020-09,"Ni bon, ni mauvais",5,22 Pommes,https://www.trictrac.net/jeu-de-societe/22-pom...,
117655,176173,koumbaia,2021-10,Bon,7,22 Pommes,https://www.trictrac.net/jeu-de-societe/22-pom...,
