# üéµ Classification des Genres Musicaux
## Notebook 4: √âvaluation et Analyse Finale

**Objectif:** Analyse approfondie des r√©sultats et pr√©paration des visualisations pour le rapport.

---

## 1. Configuration et Imports

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from pathlib import Path

sys.path.insert(0, '..')

from src.config import Config
from src.models import ModelTrainer
from src.evaluation import Evaluator
from src.visualization import Visualizer

plt.style.use('seaborn-v0_8-whitegrid')
%matplotlib inline

print("‚úÖ Imports r√©ussis!")

## 2. Charger les Donn√©es et Mod√®les

In [None]:
# Charger les features
features_df = pd.read_csv(Config.DATA_PROCESSED / Config.FEATURES_FILE)
print(f"‚úÖ Features charg√©es: {len(features_df)} √©chantillons")

# Initialiser le trainer et pr√©parer les donn√©es
trainer = ModelTrainer()
X_train, X_val, X_test, y_train, y_val, y_test = trainer.prepare_data(features_df)

In [None]:
# Charger les mod√®les sauvegard√©s
model_files = list(Config.MODELS_DIR.glob("*.joblib"))
print(f"\nMod√®les sauvegard√©s trouv√©s: {len(model_files)}")

for model_file in model_files:
    model_name = trainer.load_model(model_file)

# Ou r√©-entra√Æner si n√©cessaire
if len(trainer.trained_models) == 0:
    print("\n‚ö†Ô∏è Aucun mod√®le sauvegard√©, r√©-entra√Ænement...")
    trainer.train_all_models(X_train, y_train, X_val, y_val)

## 3. √âvaluation D√©taill√©e du Meilleur Mod√®le

In [None]:
evaluator = Evaluator()
visualizer = Visualizer()

# Utiliser le premier mod√®le disponible comme "meilleur"
best_model = list(trainer.trained_models.keys())[0]
print(f"\nüèÜ Analyse du mod√®le: {best_model}")

# Pr√©dictions
y_pred = trainer.predict(best_model, X_test)

# M√©triques globales
metrics = evaluator.calculate_metrics(y_test, y_pred)
print(f"\nüìä M√©triques Globales:")
for name, value in metrics.items():
    print(f"   {name}: {value:.4f}")

In [None]:
# M√©triques par classe
class_metrics = evaluator.calculate_per_class_metrics(y_test, y_pred)
print("\nüìä M√©triques par Genre:")
print(class_metrics.round(3).to_string())

## 4. Visualisations pour le Rapport

In [None]:
# Figure 1: Matrice de confusion normalis√©e
fig = evaluator.plot_confusion_matrix(
    y_test, y_pred,
    normalize=True,
    title=f"Matrice de Confusion Normalis√©e - {best_model}",
    save_name="confusion_matrix_normalized.png"
)
plt.show()

In [None]:
# Figure 2: Matrice de confusion (valeurs absolues)
fig = evaluator.plot_confusion_matrix(
    y_test, y_pred,
    normalize=False,
    title=f"Matrice de Confusion - {best_model}",
    save_name="confusion_matrix_absolute.png"
)
plt.show()

In [None]:
# Figure 3: Rapport de classification
fig = evaluator.plot_classification_report(
    y_test, y_pred,
    title=f"Performance par Genre - {best_model}",
    save_name="classification_report.png"
)
plt.show()

In [None]:
# Figure 4: Accuracy par genre
fig, ax = plt.subplots(figsize=(12, 6))

# Calculer l'accuracy par genre
cm = evaluator.get_confusion_matrix(y_test, y_pred)
accuracy_per_genre = cm.diagonal() / cm.sum(axis=1)

colors = ['green' if acc > 0.7 else 'orange' if acc > 0.5 else 'red' 
          for acc in accuracy_per_genre]

bars = ax.bar(Config.GENRES, accuracy_per_genre, color=colors)
ax.axhline(y=0.7, color='green', linestyle='--', alpha=0.5, label='Seuil 70%')
ax.axhline(y=np.mean(accuracy_per_genre), color='blue', linestyle='-', alpha=0.7, 
           label=f'Moyenne: {np.mean(accuracy_per_genre):.1%}')

ax.set_xlabel('Genre')
ax.set_ylabel('Accuracy')
ax.set_title('Accuracy par Genre Musical', fontsize=14, fontweight='bold')
ax.set_ylim(0, 1)
ax.legend()

for bar, acc in zip(bars, accuracy_per_genre):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
            f'{acc:.1%}', ha='center', va='bottom', fontsize=9)

plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig(Config.REPORTS_DIR / 'accuracy_per_genre.png', dpi=100, bbox_inches='tight')
plt.show()

## 5. Analyse des Erreurs

In [None]:
# Analyser les erreurs
confused_pairs = evaluator.get_most_confused_pairs(y_test, y_pred, top_n=10)

print("\n‚ö†Ô∏è Paires de genres les plus confondues:")
print("-" * 40)
for rank, (real, pred, count) in enumerate(confused_pairs, 1):
    print(f"   {rank}. {real} ‚Üí {pred}: {count} erreurs")

In [None]:
# Visualisation des erreurs
fig, ax = plt.subplots(figsize=(10, 6))

pairs = [f"{r} ‚Üí {p}" for r, p, c in confused_pairs[:8]]
counts = [c for r, p, c in confused_pairs[:8]]

colors = plt.cm.Reds(np.linspace(0.3, 0.8, len(pairs)))
ax.barh(pairs, counts, color=colors)

ax.set_xlabel('Nombre d\'erreurs')
ax.set_title('Paires de Genres les Plus Confondues', fontsize=14, fontweight='bold')

for i, count in enumerate(counts):
    ax.text(count + 0.2, i, str(count), va='center', fontsize=10)

plt.tight_layout()
plt.savefig(Config.REPORTS_DIR / 'confused_pairs.png', dpi=100, bbox_inches='tight')
plt.show()

## 6. Importance des Features (si applicable)

In [None]:
# V√©rifier si le mod√®le supporte l'importance des features
importance_df = trainer.get_feature_importance(best_model)

if importance_df is not None:
    # Top 20 features
    feature_cols = [c for c in features_df.columns if c not in ['filename', 'genre']]
    importance_df['feature'] = feature_cols
    top_features = importance_df.head(20)
    
    fig, ax = plt.subplots(figsize=(10, 8))
    ax.barh(top_features['feature'], top_features['importance'], color='steelblue')
    ax.set_xlabel('Importance')
    ax.set_title('Top 20 Caract√©ristiques les Plus Importantes', fontsize=14, fontweight='bold')
    ax.invert_yaxis()
    
    plt.tight_layout()
    plt.savefig(Config.REPORTS_DIR / 'feature_importance.png', dpi=100, bbox_inches='tight')
    plt.show()
else:
    print(f"‚ö†Ô∏è {best_model} ne supporte pas l'importance des features")

## 7. R√©sum√© pour le Rapport

In [None]:
# G√©n√©rer le rapport textuel
report = evaluator.generate_report(best_model, y_test, y_pred)
print(report)

# Sauvegarder le rapport
with open(Config.REPORTS_DIR / 'evaluation_report.txt', 'w') as f:
    f.write(report)
print(f"\nüíæ Rapport sauvegard√©: {Config.REPORTS_DIR / 'evaluation_report.txt'}")

In [None]:
# Tableau r√©capitulatif
summary = pd.DataFrame([
    ['Dataset', 'GTZAN', '1000 fichiers'],
    ['Genres', '10', 'Blues, Classical, Country, Disco, Hip-hop, Jazz, Metal, Pop, Reggae, Rock'],
    ['Features extraites', str(len(feature_cols)), 'MFCC, Spectral, Chroma, Tempo, etc.'],
    ['Meilleur mod√®le', best_model, ''],
    ['Accuracy (Test)', f"{metrics['accuracy']:.1%}", ''],
    ['F1-Score (Test)', f"{metrics['f1_score']:.1%}", ''],
], columns=['M√©trique', 'Valeur', 'D√©tails'])

print("\nüìä TABLEAU R√âCAPITULATIF")
print("=" * 60)
print(summary.to_string(index=False))
print("=" * 60)

# Sauvegarder
summary.to_csv(Config.REPORTS_DIR / 'summary.csv', index=False)

## 8. Conclusions et Recommandations

### Points Forts:
- [Compl√©ter selon vos r√©sultats]
- Genres bien classifi√©s: ...

### Points Faibles:
- Genres confondus: ...
- Limitations: ...

### Recommandations:
1. Utiliser des r√©seaux de neurones profonds (CNN sur spectrogrammes)
2. Augmenter le dataset avec plus d'exemples
3. Tester des techniques d'augmentation de donn√©es audio
4. Explorer des features suppl√©mentaires

In [None]:
# Liste des fichiers g√©n√©r√©s
print("\nüìÅ Fichiers g√©n√©r√©s dans reports/:")
for file in Config.REPORTS_DIR.glob('*'):
    print(f"   - {file.name}")

In [None]:
print("\n" + "=" * 60)
print("‚úÖ √âVALUATION TERMIN√âE!")
print("=" * 60)
print(f"""
üìå Prochaines √©tapes:
   1. R√©diger le rapport final avec les visualisations g√©n√©r√©es
   2. Pr√©parer la pr√©sentation PowerPoint
   3. S'entra√Æner pour la soutenance

üìÖ Date de soutenance: Semaine du 23 f√©vrier 2026
""")