# Analyse Comparative des Expériences JibJobRecSys
Ce notebook permet de charger et de comparer les résultats de plusieurs expériences (runs) du système de recommandation JibJobRecSys.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

## 1. Chargement des résultats expérimentaux

In [None]:
results_path = '../results/all_experiment_results.csv'
results_df = pd.read_csv(results_path)
results_df = results_df.sort_values(by='timestamp')
results_df.head()

## 2. Tableau des résultats trié par NDCG@10

In [None]:
if 'NDCG@10' in results_df.columns:
    display(results_df.sort_values(by='NDCG@10', ascending=False))
else:
    display(results_df)

## 3. Impact du nombre de hidden channels

In [None]:
if 'hidden_channels' in results_df.columns and 'NDCG@10' in results_df.columns:
    plt.figure(figsize=(8,6))
    grouped = results_df.groupby('hidden_channels').mean(numeric_only=True)
    x = grouped.index.astype(str)
    plt.bar(x, grouped['NDCG@10'], label='NDCG@10', alpha=0.8)
    plt.bar(x, grouped['Precision@10'], label='Precision@10', alpha=0.8)
    plt.bar(x, grouped['Recall@10'], label='Recall@10', alpha=0.8)
    plt.xlabel('Hidden Channels')
    plt.ylabel('Score')
    plt.title('Impact of Hidden Channels on Performance')
    plt.legend()
    plt.tight_layout()
    os.makedirs('../visualizations', exist_ok=True)
    plt.savefig('../visualizations/performance_vs_hidden_channels.png')
    plt.show()

## 4. Impact du nombre de couches (num_layers)

In [None]:
if 'num_layers' in results_df.columns and 'NDCG@10' in results_df.columns:
    plt.figure(figsize=(8,6))
    grouped = results_df.groupby('num_layers').mean(numeric_only=True)
    x = grouped.index.astype(str)
    plt.bar(x, grouped['NDCG@10'], label='NDCG@10', alpha=0.8)
    plt.bar(x, grouped['Precision@10'], label='Precision@10', alpha=0.8)
    plt.bar(x, grouped['Recall@10'], label='Recall@10', alpha=0.8)
    plt.xlabel('Number of GNN Layers')
    plt.ylabel('Score')
    plt.title('Impact of Number of GNN Layers on Performance')
    plt.legend()
    plt.tight_layout()
    plt.savefig('../visualizations/performance_vs_num_layers.png')
    plt.show()

## 5. Courbes de perte comparatives (optionnel)

In [None]:
import glob
# Charger les historiques d'entraînement pour les meilleurs runs
history_files = glob.glob('../results/training_history_*.csv')
plt.figure(figsize=(10,6))
for file in history_files:
    hist = pd.read_csv(file)
    exp_name = os.path.basename(file).replace('training_history_','').replace('.csv','')
    plt.plot(hist['train_loss'], label=f'{exp_name} - train')
    plt.plot(hist['val_loss'], label=f'{exp_name} - val', linestyle='--')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Comparative Training & Validation Loss Curves')
plt.legend()
plt.tight_layout()
plt.savefig('../visualizations/comparative_loss_curves.png')
plt.show()

## 6. Comparaison RMSE et MAE

In [None]:
if 'hidden_channels' in results_df.columns and 'RMSE' in results_df.columns:
    plt.figure(figsize=(8,6))
    grouped = results_df.groupby('hidden_channels').mean(numeric_only=True)
    x = grouped.index.astype(str)
    plt.bar(x, grouped['RMSE'], label='RMSE', alpha=0.8)
    plt.bar(x, grouped['MAE'], label='MAE', alpha=0.8)
    plt.xlabel('Hidden Channels')
    plt.ylabel('Error')
    plt.title('RMSE and MAE vs Hidden Channels')
    plt.legend()
    plt.tight_layout()
    plt.savefig('../visualizations/rmse_mae_vs_hidden_channels.png')
    plt.show()