In [1]:
import pandas as pd

In [2]:
avis = pd.read_csv("BDD/avis_sans_outliers.csv")

In [22]:
# Count the occurrences of each rating
rating_counts = avis['note'].value_counts()

# Calculate the total number of reviews
total_reviews = avis['note'].count()

# Calculate the percentage of reviews for each rating
rating_percentages = (rating_counts / total_reviews) * 100

# Sort the index to ensure the ratings are in order
rating_percentages = rating_percentages.sort_index()

rating_percentages


note
4      7.778722
5      4.241162
6     19.297247
7      8.025153
8     32.011387
9      8.086336
10    20.559993
Name: count, dtype: float64

In [8]:
from surprise import accuracy

# Function to calculate MAE for a specific rating
def calculate_mae_for_specific_rating(predictions, rating):
    # Filtering predictions for the specified rating
    filtered_predictions = [pred for pred in predictions if pred.r_ui == rating]
    
    # Calculating MAE for the filtered predictions
    if filtered_predictions:
        mae = accuracy.mae(filtered_predictions, verbose=False)
        return mae
    else:
        return None

In [19]:
from surprise import KNNWithMeans
from surprise import KNNWithZScore
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate, train_test_split
from surprise.accuracy import mae

# Assuming you've already loaded your data into a 'data' variable

# Example dataset loading, replace with your actual data loading code
reader = Reader(line_format='user item rating', rating_scale=(0, 10))
data = Dataset.load_from_df(avis[['author', 'title', 'note']], reader)

trainset, testset = train_test_split(data, test_size=0.2)

# List of algorithms to evaluate
algorithms = [
    #KNNBasic(sim_options={'name': 'cosine', 'user_based': True}),
    KNNWithMeans(k=20,sim_options={'name': 'cosine', 'user_based': True}),
    KNNWithZScore(k=20,sim_options={'name': 'cosine', 'user_based': True}),
    #KNNBaseline(sim_options={'name': 'cosine', 'user_based': True}, bsl_options={'method': 'sgd', 'learning_rate': .00005})
]

for algo in algorithms:
    print(f"\nEvaluating {algo.__class__.__name__}...")
    algo.fit(trainset)
    predictions = algo.test(testset)
    for i in range(11):
        mae = calculate_mae_for_specific_rating(predictions, i)
        if mae is not None:
            print(f'MAE for rating {i}: {mae}')
        else:
            print(f'No predictions found for rating {i}')
    print(f'MAE {accuracy.mae(predictions, verbose=False)}')


Evaluating KNNWithMeans...
Computing the cosine similarity matrix...
Done computing similarity matrix.
No predictions found for rating 0
No predictions found for rating 1
No predictions found for rating 2
No predictions found for rating 3
MAE for rating 4: 2.798961624344402
MAE for rating 5: 1.6856896230399718
MAE for rating 6: 1.193122636521836
MAE for rating 7: 0.7288198475961436
MAE for rating 8: 0.7724444472101837
MAE for rating 9: 0.9036739962724859
MAE for rating 10: 1.7278472244394836
MAE 1.2515788285191283

Evaluating KNNWithZScore...
Computing the cosine similarity matrix...
Done computing similarity matrix.
No predictions found for rating 0
No predictions found for rating 1
No predictions found for rating 2
No predictions found for rating 3
MAE for rating 4: 2.774176470525486
MAE for rating 5: 1.6686615216916418
MAE for rating 6: 1.1969732193931777
MAE for rating 7: 0.7107865495429324
MAE for rating 8: 0.7832487628292228
MAE for rating 9: 0.8873488439010084
MAE for rating 10

In [17]:
from surprise import Dataset, Reader, KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline
from surprise.model_selection import train_test_split
from surprise.accuracy import mae

# Example dataset loading, replace with your actual data loading code
reader = Reader(line_format='user item rating', rating_scale=(0, 10))
# Replace 'avis[['author', 'title', 'note']]' with your DataFrame
data = Dataset.load_from_df(avis[['author', 'title', 'note']], reader)

trainset, testset = train_test_split(data, test_size=0.2)

# k values to test for each algorithm
k_values = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

best_mae = float('inf')
best_algo = None
best_k = None

for k in k_values:
    algorithms = [
        KNNWithMeans(k=k, sim_options={'name': 'cosine', 'user_based': True}),
        KNNWithZScore(k=k, sim_options={'name': 'cosine', 'user_based': True}),
    ]

    for algo in algorithms:
        print(f"\nEvaluating {algo.__class__.__name__} with k={k}...")
        algo.fit(trainset)
        predictions = algo.test(testset)
        current_mae = mae(predictions, verbose=False)
        
        print(f'MAE: {current_mae} for {algo.__class__.__name__} with k={k}')
        
        if current_mae < best_mae:
            best_mae = current_mae
            best_algo = algo.__class__.__name__
            best_k = k

print(f'\nBest algorithm: {best_algo} with k={best_k} having MAE: {best_mae}')


Evaluating KNNWithMeans with k=5...
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE: 1.3122265277315108 for KNNWithMeans with k=5

Evaluating KNNWithZScore with k=5...
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE: 1.307824134740411 for KNNWithZScore with k=5

Evaluating KNNWithMeans with k=10...
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE: 1.2688612272021416 for KNNWithMeans with k=10

Evaluating KNNWithZScore with k=10...
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE: 1.2647595070211073 for KNNWithZScore with k=10

Evaluating KNNWithMeans with k=15...
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE: 1.2531718458717398 for KNNWithMeans with k=15

Evaluating KNNWithZScore with k=15...
Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE: 1.2494571519546283 for KNNWithZScore with k=15

Evaluati