# Evaluator Module
The Evaluator module creates evaluation reports.

Reports contain evaluation metrics depending on models specified in the evaluation config.

In [4]:
# reloads modules automatically before entering the execution of code
%load_ext autoreload
%autoreload 2

# third parties imports
import numpy as np 
import pandas as pd
# -- add new imports here --
from surprise import model_selection
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise.model_selection import LeaveOneOut
from collections import defaultdict
from surprise.dataset import Trainset
import surprise
from surprise import Reader
from surprise import Dataset
# local imports
from configs import EvalConfig
from constants import Constant as C
from loaders import export_evaluation_report
from loaders import load_ratings
# -- add new imports here --
from models import get_top_n  # Importez la fonction get_top_n depuis votre fichier models.py

# 1. Model validation functions
Validation functions are a way to perform crossvalidation on recommender system models. 

In [2]:
def generate_split_predictions(algo, df_ratings, eval_config):
    """Generate predictions on a random test set specified in eval_config"""
    # Convertir le DataFrame en Dataset de Surprise
    reader = Reader(rating_scale=(0.5, 5.0))
    ratings_dataset = Dataset.load_from_df(df_ratings[['userId', 'movieId', 'rating']], reader)
    
    # Diviser l'ensemble de données en ensembles d'entraînement et de test
    trainset, testset = train_test_split(ratings_dataset, test_size=eval_config.test_size, random_state=42)
    
    # Entraîner l'algorithme sur l'ensemble d'entraînement
    algo.fit(trainset)
    
    # Faire des prédictions sur l'ensemble de test
    predictions = algo.test(testset)
    
    return predictions


def generate_loo_top_n(algo, df_ratings, eval_config):
    """Generate top-n recommendations for each user on a random Leave-one-out split (LOO)"""
    # Convertir le DataFrame en Dataset de Surprise
    reader = Reader(rating_scale=(0.5, 5.0))
    ratings_dataset = Dataset.load_from_df(df_ratings[['userId', 'movieId', 'rating']], reader)
    
    # Créer un split LeaveOneOut
    loo = LeaveOneOut(n_splits=1, random_state=eval_config.random_state)

    # Get the train and test sets
    for trainset, testset in loo.split(ratings_dataset):
        # Entraîner l'algorithme sur le trainset
        algo.fit(trainset)
        
        # Get the anti-testset
        anti_testset = trainset.build_anti_testset()
        
        # Faire des prédictions sur l'anti-testset
        all_predictions = algo.test(anti_testset)
        
        # Initialiser un dictionnaire pour stocker les meilleures recommandations pour chaque utilisateur
        top_n_recommendations = defaultdict(list)
        
        # Sélectionner les meilleures recommandations pour chaque utilisateur
        for uid, iid, _, est, _ in all_predictions:
            top_n_recommendations[uid].append((iid, est))
        
        # Trier les recommandations pour chaque utilisateur par note estimée
        for uid, user_ratings in top_n_recommendations.items():
            user_ratings.sort(key=lambda x: x[1], reverse=True)
            top_n_recommendations[uid] = user_ratings[:eval_config.top_n_value]

    return top_n_recommendations, testset



def generate_full_top_n(algo, df_ratings, eval_config):
    """Generate top-n recommendations for each user with full training set (LOO)"""
    # Convertir le DataFrame en Dataset de Surprise
    reader = Reader(rating_scale=(0.5, 5.0))
    ratings_dataset = Dataset.load_from_df(df_ratings[['userId', 'movieId', 'rating']], reader)
    
    # Construire le trainset complet à partir du Dataset
    full_trainset = ratings_dataset.build_full_trainset()
    
    # Entraîner l'algorithme sur le trainset complet
    algo.fit(full_trainset)
    
    # Obtenir les anti-testset
    anti_testset = full_trainset.build_anti_testset()
    
    # Faire des prédictions sur l'anti-testset
    all_predictions = algo.test(anti_testset)
    
    # Initialiser un dictionnaire pour stocker les meilleures recommandations pour chaque utilisateur
    top_n_recommendations = defaultdict(list)
    
    # Sélectionner les meilleures recommandations pour chaque utilisateur
    for uid, iid, _, est, _ in all_predictions:
        top_n_recommendations[uid].append((iid, est))
    
    # Trier les recommandations pour chaque utilisateur par note estimée
    for uid, user_ratings in top_n_recommendations.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n_recommendations[uid] = user_ratings[:eval_config.top_n_value]

    return top_n_recommendations

def precompute_information():
    """ Returns a dictionary that precomputes relevant information for evaluating in full mode
    
    Dictionary keys:
    - precomputed_dict["item_to_rank"] : contains a dictionary mapping movie ids to rankings
    - (-- for your project, add other relevant information here -- )
    """
    precomputed_dict = {}
    precomputed_dict["item_to_rank"] = None
    return precomputed_dict                


def create_evaluation_report(eval_config, sp_ratings, precomputed_dict, available_metrics):
    """ Create a DataFrame evaluating various models on metrics specified in an evaluation config.  
    """
    evaluation_dict = {}
    for model_name, model, arguments in eval_config.models:
        print(f'Handling model {model_name}')
        algo = model(**arguments)
        evaluation_dict[model_name] = {}
        
        # Type 1 : split evaluations
        if len(eval_config.split_metrics) > 0:
            print('Training split predictions')
            predictions = generate_split_predictions(algo, sp_ratings, eval_config)
            for metric in eval_config.split_metrics:
                print(f'- computing metric {metric}')
                assert metric in available_metrics['split']
                evaluation_function, parameters =  available_metrics["split"][metric]
                evaluation_dict[model_name][metric] = evaluation_function(predictions, **parameters) 

        # Type 2 : loo evaluations
        if len(eval_config.loo_metrics) > 0:
            print('Training loo predictions')
            anti_testset_top_n, testset = generate_loo_top_n(algo, sp_ratings, eval_config)
            for metric in eval_config.loo_metrics:
                assert metric in available_metrics['loo']
                evaluation_function, parameters =  available_metrics["loo"][metric]
                evaluation_dict[model_name][metric] = evaluation_function(anti_testset_top_n, testset, **parameters)
        
        # Type 3 : full evaluations
        if len(eval_config.full_metrics) > 0:
            print('Training full predictions')
            anti_testset_top_n = generate_full_top_n(algo, sp_ratings, eval_config)
            for metric in eval_config.full_metrics:
                assert metric in available_metrics['full']
                evaluation_function, parameters =  available_metrics["full"][metric]
                evaluation_dict[model_name][metric] = evaluation_function(
                    anti_testset_top_n,
                    **precomputed_dict,
                    **parameters
                )
        
    return pd.DataFrame.from_dict(evaluation_dict).T


'''# Chargez l'ensemble de données de test
df_ratings = load_ratings(False)

# Créez une instance de l'algorithme que vous souhaitez tester
algo = surprise.SVD()

# Testez la fonction generate_split_predictions
predictions_split = generate_split_predictions(algo, df_ratings, EvalConfig)
top_n_split = get_top_n(predictions_split, n=EvalConfig.top_n_value)

# Testez la fonction generate_loo_top_n
top_n_loo, _ = generate_loo_top_n(algo, df_ratings, EvalConfig)

# Testez la fonction generate_full_top_n
top_n_full = generate_full_top_n(algo, df_ratings, EvalConfig)

# Vérifiez les résultats
print("Top-N Recommendations (Split Method):")
print(top_n_split)

print("Top-N Recommendations (LOO Method):")
print(top_n_loo)'''


TypeError: create_evaluation_report() missing 4 required positional arguments: 'eval_config', 'sp_ratings', 'precomputed_dict', and 'available_metrics'

# 2. Evaluation metrics
Implement evaluation metrics for either rating predictions (split metrics) or for top-n recommendations (loo metric, full metric)

In [9]:
def get_hit_rate(anti_testset_top_n, testset):
    """Compute the average hit over the users (loo metric)
    
    A hit (1) happens when the movie in the testset has been picked by the top-n recommender
    A fail (0) happens when the movie in the testset has not been picked by the top-n recommender
    """
    # -- implement the function get_hit_rate --
    return hit_rate


def get_novelty(anti_testset_top_n, item_to_rank):
    """Compute the average novelty of the top-n recommendation over the users (full metric)
    
    The novelty is defined as the average ranking of the movies recommended
    """
    # -- implement the function get_novelty --
    return average_rank_sum

# 3. Evaluation workflow
Load data, evaluate models and save the experimental outcomes

In [12]:
# Dans le fichier configs.py

AVAILABLE_METRICS = {
    "split": {
        "mae": (accuracy.mae, {'verbose': False}),
        "rmse": (accuracy.rmse, {'verbose': False}),
        # Ajoutez de nouvelles métriques de division ici
    },
    # Ajoutez de nouveaux types de métriques ici
}

sp_ratings = load_ratings(surprise_format=True)
precomputed_dict = precompute_information()
evaluation_report = create_evaluation_report(EvalConfig, sp_ratings, precomputed_dict, AVAILABLE_METRICS)
export_evaluation_report(evaluation_report)

ValueError: too many values to unpack (expected 3)