# Model Evaluation
This notebook evaluates the performance of different feature and regressor combinations on both MovieLens and Hackathon datasets.
   

In [None]:
import os
import time
import pandas as pd
import numpy as np
from surprise import accuracy
from models import ContentBased
from loaders import load_ratings
import seaborn as sns
import matplotlib.pyplot as plt

## 1. Define Evaluation Function

In [None]:
def evaluate_model(features_method, regressor_method, dataset='normal'):
    """
    Evaluate a model with specific feature and regressor methods

    Args:
        features_method (str): Method to extract features
        regressor_method (str): Regressor to use
        dataset (str): 'normal' or 'hackathon'

    Returns:
        dict: Dictionary containing RMSE, MAE, and training time
    """
    # Load data
    df_ratings = load_ratings()

    # Create and train model
    start_time = time.time()
    model = ContentBased(features_method=features_method, regressor_method=regressor_method)
    model.fit(df_ratings)
    training_time = time.time() - start_time

    # Make predictions
    predictions = model.predict(df_ratings)

    # Calculate metrics
    rmse = np.sqrt(np.mean((df_ratings['rating'] - predictions) ** 2))
    mae = np.mean(np.abs(df_ratings['rating'] - predictions))

    return {
        'RMSE': rmse,
        'MAE': mae,
        'Time': training_time
    }

## 2. Evaluate Normal Dataset

In [None]:
# Define combinations to test
normal_combinations = [
    ('all_features', 'linear_regression'),
    ('all_features', 'random_forest'),
    ('all_features', 'xgboost'),
    ('all_features', 'neural_network')
]

# Evaluate each combination
normal_results = {}
for features, regressor in normal_combinations:
    print(f"Evaluating {features} with {regressor}...")
    results = evaluate_model(features, regressor, 'normal')
    normal_results[f"{features}_{regressor}"] = results

# Create DataFrame
normal_df = pd.DataFrame(normal_results).T
display(normal_df)

## 3. Evaluate Hackathon Dataset

In [None]:
# Define combinations to test
hackathon_combinations = [
    ('genome_tags', 'linear_regression'),
    ('all_features_with_genome', 'linear_regression'),
    ('all_features_with_genome', 'random_forest'),
    ('all_features_with_genome', 'xgboost'),
    ('all_features_with_genome', 'neural_network')
    # Méthodes avec genome tags et features visuelles
    ("all_features_with_genome_and_visuals", "linear_regression"),
    ("all_features_with_genome_and_visuals", "random_forest"),
    ("all_features_with_genome_and_visuals", "xgboost"),
    ("all_features_with_genome_and_visuals", "neural_network")
]

# Evaluate each combination
hackathon_results = {}
for features, regressor in hackathon_combinations:
    print(f"Evaluating {features} with {regressor}...")
    results = evaluate_model(features, regressor, 'hackathon')
    hackathon_results[f"{features}_{regressor}"] = results

# Create DataFrame
hackathon_df = pd.DataFrame(hackathon_results).T
display(hackathon_df)

## 4. Visualize Results

In [None]:
# Plot RMSE comparison
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
sns.barplot(x=normal_df.index, y='RMSE', data=normal_df)
plt.title('Normal Dataset RMSE')
plt.xticks(rotation=45)

plt.subplot(1, 2, 2)
sns.barplot(x=hackathon_df.index, y='RMSE', data=hackathon_df)
plt.title('Hackathon Dataset RMSE')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

# Plot training time comparison
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
sns.barplot(x=normal_df.index, y='Time', data=normal_df)
plt.title('Normal Dataset Training Time')
plt.xticks(rotation=45)

plt.subplot(1, 2, 2)
sns.barplot(x=hackathon_df.index, y='Time', data=hackathon_df)
plt.title('Hackathon Dataset Training Time')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

print("\nBests combinaisons (RMSE):")
print(df_results.nsmallest(3, 'rmse'))

print("\nFastest:")
print(df_results.nsmallest(3, 'training_time'))

## 5. Save Results

In [None]:
# Save results to CSV
results_dir = 'C:/Users/nicol/Documents/GitHub/Majeur-BA/RECOMMENDER-SYSTEM/mlsmm2156/evaluation/results'
os.makedirs(results_dir, exist_ok=True)

normal_df.to_csv(os.path.join(results_dir, 'normal_results.csv'))
hackathon_df.to_csv(os.path.join(results_dir, 'hackathon_results.csv'))
