In [1]:
# ---------------------------
# MovieMood – Visualisation POC
# Ticket #6 – Viz / Tooling ML
# ---------------------------

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import re

# ---------------------------
# Config graphique
# ---------------------------
sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (10,6)

# ---------------------------
# Dossier outputs à la racine
# ---------------------------
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
outputs_dir = os.path.join(project_root, 'outputs')

if not os.path.exists(outputs_dir):
    os.makedirs(outputs_dir)

# ---------------------------
# Fichiers historiques
# ---------------------------
ratings_history_file = os.path.join(project_root, 'ratings_small.csv')
movies_file = os.path.join(project_root, 'tmdb_5000_movies.csv')

# ---------------------------
# Chargement des données historiques
# ---------------------------
ratings = pd.read_csv(ratings_history_file)
movies = pd.read_csv(movies_file)

# ---------------------------
# Lecture de tous les CSV ALS dans outputs/
# ---------------------------
als_files = [f for f in os.listdir(outputs_dir) if re.match(r'recommended_movies_\d+\.csv', f)]
if not als_files:
    raise FileNotFoundError(f"Aucun fichier de recommandations trouvé dans '{outputs_dir}/'")

# Pour chaque utilisateur
for file_name in als_files:
    user_id = int(re.search(r'\d+', file_name).group())
    ratings_file = os.path.join(outputs_dir, file_name)

    # Chargement des recommandations ALS
    recommended_df = pd.read_csv(ratings_file)

    # Notes existantes de l'utilisateur
    user_ratings = ratings[ratings['userId'] == user_id].merge(
        movies, left_on='movieId', right_on='id', how='left'
    )[["title", "release_date", "rating"]].dropna()

    # --- Visualisation 1 : Notes historiques ---
    plt.figure()
    sns.barplot(
        data=user_ratings.sort_values('rating', ascending=False).head(10),
        x='rating', y='title', palette='coolwarm'
    )
    plt.title(f"Top 10 films notés par l'utilisateur {user_id}")
    plt.xlabel("Rating")
    plt.ylabel("Titre du film")
    plt.tight_layout()
    plt.savefig(os.path.join(outputs_dir, f"top_rated_{user_id}.png"), bbox_inches='tight')
    plt.close()

    # --- Visualisation 2 : Recommandations ALS ---
    plt.figure()
    sns.barplot(
        data=recommended_df.sort_values('score', ascending=False).head(10),
        x='score', y='title', palette='viridis'
    )
    plt.title(f"Top 10 recommandations ALS pour l'utilisateur {user_id}")
    plt.xlabel("Score de recommandation")
    plt.ylabel("Titre du film")
    plt.tight_layout()
    plt.savefig(os.path.join(outputs_dir, f"top_recommended_{user_id}.png"), bbox_inches='tight')
    plt.close()

    # --- Visualisation 3 : Comparaison notes vs recommandations ---
    comparison_df = pd.merge(
        user_ratings[['title', 'rating']],
        recommended_df[['title', 'score']],
        on='title',
        how='outer'
    )

    # Ajouter un type pour couleur
    comparison_df['type'] = comparison_df.apply(
        lambda row: 'Historique' if pd.notnull(row['rating']) else 'Recommandé', axis=1
    )

    # Remplacer NaN par 0 pour afficher tous les points
    comparison_df['rating'] = comparison_df['rating'].fillna(0)
    comparison_df['score'] = comparison_df['score'].fillna(0)

    plt.figure()
    sns.scatterplot(
        data=comparison_df,
        x='score',
        y='rating',
        hue='type',
        s=100
    )
    plt.title(f"Comparaison notes utilisateurs vs recommandations (user {user_id})")
    plt.xlabel("Score de recommandation ALS")
    plt.ylabel("Note utilisateur")
    plt.xlim(0, 1)
    plt.ylim(0, 5)
    plt.tight_layout()
    plt.savefig(os.path.join(outputs_dir, f"comparison_{user_id}.png"), bbox_inches='tight')
    plt.close()

    print(f"Graphiques générés pour l'utilisateur {user_id}")


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(


Graphiques générés pour l'utilisateur 3
Graphiques générés pour l'utilisateur 1



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(
