In [9]:
import pandas as pd
from surprise import KNNWithMeans
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate, train_test_split
from surprise import accuracy

In [6]:
avis = pd.read_csv("BDD/avis_sans_outliers.csv")

In [14]:
# Define sim_options (can be tuned as needed)
sim_options = {
    'name': 'cosine',
    'user_based': False  # item-based
}

# Instantiate the KNNWithMeans algorithm
algo = KNNWithMeans(sim_options=sim_options)

# Dataset loading
reader = Reader(line_format='user item rating', rating_scale=(0, 10))
data = Dataset.load_from_df(avis[['author', 'title', 'note']], reader)

# Splitting the dataset into train and test set for evaluation
trainset, testset = train_test_split(data, test_size=0.25)

# Train the algorithm on the trainset and predict ratings for the testset
algo.fit(trainset)

# Predict ratings for the testset
predictions = algo.test(testset)

# Evaluate the algorithm with MAE and RMSE
accuracy.mae(predictions)
accuracy.rmse(predictions)

Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.2390
RMSE: 1.5704


1.5703699757343412

In [15]:
# Recommender function
def get_top_n_predictions(predictions, n=10):
    """
    Return the top-N recommended items for each user from a set of predictions.
    """

    # First map the predictions to each user.
    top_n = {}
    for uid, iid, true_r, est, _ in predictions:
        if uid not in top_n:
            top_n[uid] = []
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the N highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [16]:
top_n = get_top_n_predictions(predictions, n=5)

# Print the recommended items for each user
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

Pac ['Twilight Imperium : Shattered Empire', 'Formule Dé : Zanvoort 2 & Spa-Francorchamps', 'Niagara : Flussgeister am Niagara', 'Turfmaster', 'Le Seigneur des Anneaux : Les Forces des Ténèbres']
Lilly ['Codenames Duo', 'Jaipur', 'Mariposas', 'Mysterium : Hidden Signs', 'Le signe des Anciens']
LedLink ['Les Loups-Garous de Thiercelieux 2 : Nouvelle Lune', 'Deep Sea Adventure', "Tainted Grail : la Chute d'Avalon", 'Rythme and Boulet', 'Meeple War']
Yzarc ['Alhambra : 2 - Les Portes de la Ville', 'Dominion : Prosperité', 'Sun Tzu Deluxe', 'Alhambra : 4 - La Chambre du Trésor', 'Cuba : El Presidente']
Spip5 ['Secret Hitler', 'Honshu', "Le Parrain : l'empire de Corleone", 'Watson & Holmes', 'Sauve qui peut']
glouglou ['Parsely Games', 'Space Alert', 'Super Rhino', 'Beverly Place', 'Citadelles : quatrième édition']
Guillaume GN ['Puerto Rico', 'Helden der Normandie', 'The Crew', 'Maracaïbo', 'La course farfelue des souris des champs']
gogol1er ['Ghost Stories : The Village People', 'Les Lou

In [17]:
print(top_n["Pac"])

[('Twilight Imperium : Shattered Empire', 9.251806973908321), ('Formule Dé : Zanvoort 2 & Spa-Francorchamps', 8.846002078103382), ('Niagara : Flussgeister am Niagara', 8.842757181713203), ('Turfmaster', 8.741660552978837), ('Le Seigneur des Anneaux : Les Forces des Ténèbres', 8.712441357871155)]
