In [27]:
import pandas as pd

In [28]:
data = pd.read_csv("BDD/avis_sans_outliers.csv")

In [29]:
# Drop unnecessary columns and handle missing values
data_cleaned = data.drop(columns=['Unnamed: 0', 'url', 'title_review', 'date_published'])
data_cleaned['comment'] = data_cleaned['comment'].fillna('')  # Fill missing comments

In [34]:
from surprise import Dataset, Reader, KNNWithZScore
from collections import defaultdict

# Prepare data for surprise
reader = Reader(rating_scale=(1, 10))  # Assuming rating scale is from 1 to 10
data_surprise = Dataset.load_from_df(data[['author', 'title', 'note']], reader)

# Build full trainset
trainset = data_surprise.build_full_trainset()

# Initialize KNN with Z-Score algorithm for user-based collaborative filtering
algo = KNNWithZScore(sim_options={'name': 'cosine', 'user_based': True}, k=20, min_k=1)
algo.fit(trainset)

def get_neighbors(user_id, game_title, k=20):
    # Retrieve inner ID of the user
    user_inner_id = trainset.to_inner_uid(user_id)
    
    # Retrieve inner ID of the game
    try:
        game_inner_id = trainset.to_inner_iid(game_title)
    except ValueError:
        return f"No data available for the game '{game_title}'."
    
    # Retrieve the k nearest neighbors of the user
    neighbors = algo.get_neighbors(user_inner_id, k)
    # Convert inner IDs of the neighbors back to raw IDs
    neighbors_ids = [trainset.to_raw_uid(inner_id) for inner_id in neighbors]
    print(neighbors_ids)
    # Filter the dataset to find the neighbors who have rated the specified game
    filtered_comments = data[(data['author'].isin(neighbors_ids)) & (data['title'] == game_title)]
    
    # Collect and return usernames and their comments on the specified game
    return filtered_comments[['author', 'comment']].values.tolist()


Computing the cosine similarity matrix...
Done computing similarity matrix.


In [35]:
neighbors_comments = get_neighbors('morlockbob', 'Mariposas', k=20)

[317, 318, 322, 421, 456, 590, 748, 916, 1038, 1044, 1086, 1146, 1150, 1266, 1267, 1272, 1275, 1285, 1287, 1346]
['Alundra', 'montrem', 'Genseric', 'Crunsk', 'Kiwi_74', 'XanderLeaDaren', 'DuncanIdaho', 'Pyjam', 'GutsOh', 'Syl', 'Golgoth_Be', 'aglaglas', 'Vauxhall', 'gianakin', 'Takumi13', 'LeBouffon', 'metapsy', 'draco', 'florentjfr', 'Zoulex']


In [47]:
def get_neighbors_who_commented_game(user_id, game_title, k=20):
    # Ensure user exists in dataset
    try:
        user_inner_id = trainset.to_inner_uid(user_id)
    except ValueError:
        return f"No data available for user '{user_id}'."

    # Filter the dataset for users who have commented on the game
    game_users = data[data['title'] == game_title]['author'].unique()

    # Convert raw user IDs to inner IDs where possible
    game_users_inner_ids = [trainset.to_inner_uid(uid) for uid in game_users if trainset.knows_user(uid)]

    # Check if there are any valid game user inner IDs
    if not game_users_inner_ids:
        return "No neighboring users have commented on this game and are in the trainset."

    # Get the similarity matrix
    user_similarities = algo.compute_similarities()

    # Create a list of (similarity, user_id) tuples, ensure we don't compare user to themselves
    similarities = [(user_similarities[user_inner_id][inner_id], trainset.to_raw_uid(inner_id))
                    for inner_id in game_users_inner_ids if inner_id != user_inner_id]

    # Sort based on similarity (higher is more similar)
    sorted_similar_neighbors = sorted(similarities, key=lambda x: x[0], reverse=True)[:k]

    # If no valid similarities found
    if not sorted_similar_neighbors:
        return "No similar users found based on the similarity criteria."

    # Retrieve the comments from the sorted similar users
    neighbors_ids = [uid for _, uid in sorted_similar_neighbors]
    filtered_comments = data[(data['author'].isin(neighbors_ids)) & (data['title'] == game_title)]

    # Collect and return usernames and their comments on the specified game
    return filtered_comments[['author', 'comment']].values.tolist()

In [51]:
neighbors_comments = get_neighbors_who_commented_game('morlockbob', 'Trader', k=20)
print(neighbors_comments)

No neighboring users have commented on this game and are in the trainset.


In [33]:
data[['title','comment']].loc[data['author']=='morlockbob']

Unnamed: 0,title,comment
1,Mariposas,Comment continuer après un mega hit ? Simpleme...
25,Mysterium Park,C'est une phrase récurrente que j'entends dans...
55,Quetzal,Jeu magnifiquement illustré qui donne envie de...
119,Troyes Dice,Faute de grive on mange du merle. Rééditer Tro...
206,Harry Potter : Bataille à Poudlard - Défense c...,"Jolie boîte au matériel fourni mais, à priori ..."
...,...,...
116658,Carcassonne: La Ruée vers l'Or,"APres ""MArtine au zoo"", ""martine et les gendar..."
116747,Orléans,Passé la laideur de la boîte (je voulais faire...
116869,Mystic Vale,MV ose l'originalité (quoi que Gloom est déjà ...
117147,Village,Vu le nombre de commentaires positifs sur ce j...
