In [2]:
import pandas as pd
from surprise import KNNWithZScore
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate, train_test_split
from surprise import accuracy

In [3]:
avis = pd.read_csv("BDD/avis_sans_outliers.csv")

In [4]:
# Define sim_options (can be tuned as needed)
sim_options = {
    'name': 'cosine',
    'user_based': True
}

# Instantiate the KNNWithMeans algorithm
algo = KNNWithZScore(sim_options=sim_options, k=20)

# Dataset loading
reader = Reader(line_format='user item rating', rating_scale=(0, 10))
data = Dataset.load_from_df(avis[['author', 'title', 'note']], reader)

# Splitting the dataset into train and test set for evaluation
trainset, testset = train_test_split(data, test_size=0.25)

# Train the algorithm on the trainset and predict ratings for the testset
algo.fit(trainset)

# Predict ratings for the testset
predictions = algo.test(testset)

# Evaluate the algorithm with MAE and RMSE
accuracy.mae(predictions)
accuracy.rmse(predictions)

Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.2452
RMSE: 1.5922


1.5922360585054942

In [5]:
# Recommender function
def get_top_n_predictions(predictions, n=10):
    """
    Return the top-N recommended items for each user from a set of predictions.
    """

    # First map the predictions to each user.
    top_n = {}
    for uid, iid, true_r, est, _ in predictions:
        if uid not in top_n:
            top_n[uid] = []
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the N highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [6]:
top_n = get_top_n_predictions(predictions, n=5)

In [7]:
# Print the recommended items for the first couple of users
first_couple_users = list(top_n.keys())[:10]  # Adjust the number to select the first N users

for uid in first_couple_users:
    user_ratings = top_n[uid]
    print(f"Recommendations for user {uid}: {[iid for (iid, _) in user_ratings]}")

Recommendations for user Tourbillon: ['Mamma Mia !', 'Carson City', 'Le Fou Volant', 'Dindons & Dragons']
Recommendations for user elyrelle: ['One Deck Dungeon', 'Cartographers', 'Pix']
Recommendations for user Joker75: ['Antiquity', 'The New Era', 'Android: Netrunner - Das Kartenspiel', 'Kingsburg : Forgez un Royaume', "Through The Ages : L'Histoire vous appartient"]
Recommendations for user momov: ['Le Seigneur des Anneaux : Les Forces des Ténèbres', 'Formule Dé : Zanvoort 2 & Spa-Francorchamps', 'Primordial Soup', 'Tichu', 'Les Aventuriers du Rail']
Recommendations for user samuel2277: ['Battlestar Galactica', 'Cyclades', 'Troyes', 'Niagara : Flussgeister am Niagara', 'Hive Pocket']
Recommendations for user Radikalkiwi: ['Top Ten', 'Unlock! Timeless Adventures', 'Queendomino Version Géante', 'The 7th Continent', 'Perudo']
Recommendations for user lilounette: ['Hop ! Hop ! Galopons !', 'Battle sheep', 'Perlinpinpin', 'Potion Explosion', 'Zombie Kidz Évolution']
Recommendations for us

In [8]:
print(top_n["Pac"])

[("Through The Ages : L'Histoire vous appartient", 8.842401531762135), ('Le Seigneur des Anneaux : Les Forces des Ténèbres', 8.78823277454045), ('Richard Cœur de Lion', 8.682561877696372), ('Space Hulk', 8.573064860325767), ('Medina', 8.564822051796867)]
