In [2]:
import pandas as pd
from surprise import KNNWithMeans
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate, train_test_split
from surprise import accuracy

In [3]:
avis = pd.read_csv("BDD/avis_sans_outliers.csv")

In [4]:
# Define sim_options (can be tuned as needed)
sim_options = {
    'name': 'cosine',
    'user_based': False  # item-based
}

# Instantiate the KNNWithMeans algorithm
algo = KNNWithMeans(sim_options=sim_options)

# Dataset loading
reader = Reader(line_format='user item rating', rating_scale=(0, 10))
data = Dataset.load_from_df(avis[['author', 'title', 'note']], reader)

# Splitting the dataset into train and test set for evaluation
trainset, testset = train_test_split(data, test_size=0.25)

# Train the algorithm on the trainset and predict ratings for the testset
algo.fit(trainset)

# Predict ratings for the testset
predictions = algo.test(testset)

# Evaluate the algorithm with MAE and RMSE
accuracy.mae(predictions)
accuracy.rmse(predictions)

Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  1.2396
RMSE: 1.5741


1.5741406631540633

In [5]:
# Recommender function
def get_top_n_predictions(predictions, n=10):
    """
    Return the top-N recommended items for each user from a set of predictions.
    """

    # First map the predictions to each user.
    top_n = {}
    for uid, iid, true_r, est, _ in predictions:
        if uid not in top_n:
            top_n[uid] = []
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the N highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [6]:
top_n = get_top_n_predictions(predictions, n=5)

In [9]:
# Print the recommended items for the first couple of users
first_couple_users = list(top_n.keys())[:10]  # Adjust the number to select the first N users

for uid in first_couple_users:
    user_ratings = top_n[uid]
    print(f"Recommendations for user {uid}: {[iid for (iid, _) in user_ratings]}")

Recommendations for user Christophe Massart: ['Dungeon Twister 2 - Prison', 'Unanimo', 'Palazzo Paletti', 'Hamster Rolle', 'Mamma Mia !']
Recommendations for user Shaaaady: ['Twelve Heroes', 'Dixit : Origin', 'Le Fou Volant', 'Codenames : Disney', 'When I Dream']
Recommendations for user Gorthyn: ['Visionary', 'Medina', 'Mémoire 44', 'Tadsch Mahal', 'Herr der Ringe: Die Entscheidung']
Recommendations for user jmguiche: ['Clank! Legacy: Acquisitions Incorporated.', 'Die Brücken von Shangrila', 'Ganymede', 'Saint Pétersbourg', 'Maracaïbo']
Recommendations for user tidoltibo: ['WeyKick Football', 'Pandémie : Au Seuil de la Catastrophe', 'Snooker', 'Baby-foot / football de table', 'Kuhhandel Master']
Recommendations for user Sicander vincit: ["Through The Ages : L'Histoire vous appartient", 'Moai', 'Leader 1 : Hell of the North', 'Mousquetaires du Roy', 'Descent : voyage dans les ténèbres']
Recommendations for user Raphaël: ['Pente', 'Medina', 'Mémoire 44', 'Maharaja', 'Richard Cœur de Lio

In [8]:
print(top_n["Pac"])

[('Roads & Boats', 8.915360598844542), ('Descent  : Héros de Legende', 8.852162586995277), ('Tigris & Euphrates', 8.731896722756963), ("Through The Ages : L'Histoire vous appartient", 8.323886585588575), ('Vasco da Gama', 8.308300652273935)]
