In [None]:
# Model Evaluation - Mod√®le de Pr√©diction de Rues Risqu√©es

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Librairies pour l'√©valuation
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score, validation_curve, learning_curve
from sklearn.inspection import permutation_importance
import joblib

# Librairies pour visualisation avanc√©e
try:
    import shap
    HAS_SHAP = True
except ImportError:
    HAS_SHAP = False
    print("‚ö†Ô∏è SHAP non install√© - analyse d'interpr√©tabilit√© limit√©e")

# Pour les cartes
import folium
from folium.plugins import HeatMap

print("üìä D√©but de l'√©valuation du mod√®le")
print("="*60)

# =====================================================================
# 1. CHARGEMENT DU MOD√àLE ET DES DONN√âES
# =====================================================================

print("\nüìÅ Chargement du mod√®le et des donn√©es...")

def load_model_and_data():
    """Charge le mod√®le entra√Æn√© et les donn√©es de test"""
    import json
    
    try:
        # Chargement des m√©tadonn√©es du mod√®le
        with open('../models/model_metadata.json', 'r') as f:
            model_metadata = json.load(f)
        
        print(f"‚úÖ M√©tadonn√©es du mod√®le charg√©es:")
        print(f"  - Mod√®le: {model_metadata['model_name']}")
        print(f"  - Type: {model_metadata['model_type']}")
        print(f"  - Features: {model_metadata['num_features']}")
        print(f"  - Date d'entra√Ænement: {model_metadata['model_training_date']}")
        
        # Chargement du mod√®le
        if model_metadata['model_type'] == 'neural_network':
            try:
                import keras
                model = keras.models.load_model('../models/risk_prediction_model.h5')
                print(f"‚úÖ Mod√®le Neural Network charg√©")
            except ImportError:
                print("‚ùå TensorFlow non disponible pour charger le Neural Network")
                return None, None, None, None
        else:
            model = joblib.load('../models/risk_prediction_model.joblib')
            print(f"‚úÖ Mod√®le {model_metadata['model_name']} charg√©")
        
        # Chargement du scaler
        scaler = joblib.load('../models/feature_scaler.joblib')
        print(f"‚úÖ Scaler charg√©")
        
        # Chargement des donn√©es de features
        try:
            X_test = pd.read_parquet('../data/features/feature_matrix.parquet')
            y_test = pd.read_parquet('../data/features/target_variable.parquet')['risk_score']
            
            # Division simul√©e des donn√©es (car on n'a pas sauv√© la division exacte)
            # On prend les 20% derniers comme test set
            test_size = int(0.2 * len(X_test))
            X_test_subset = X_test.iloc[-test_size:]
            y_test_subset = y_test.iloc[-test_size:]
            
            print(f"‚úÖ Donn√©es de test charg√©es: {X_test_subset.shape[0]} observations")
            
        except FileNotFoundError:
            print("‚ö†Ô∏è Fichiers de features non trouv√©s - utilisation de donn√©es simul√©es")
            # G√©n√©ration de donn√©es simul√©es pour la d√©mo
            np.random.seed(42)
            n_samples = 20
            n_features = model_metadata['num_features']
            X_test_subset = pd.DataFrame(
                np.random.randn(n_samples, n_features),
                columns=model_metadata['feature_names'][:n_features]
            )
            y_test_subset = pd.Series(np.random.uniform(0, 10, n_samples))
            print(f"‚ö†Ô∏è Donn√©es simul√©es cr√©√©es: {n_samples} observations")
        
        return model, scaler, X_test_subset, y_test_subset, model_metadata
        
    except FileNotFoundError as e:
        print(f"‚ùå Fichier non trouv√©: {e}")
        print("   V√©rifiez que le model development a √©t√© ex√©cut√©")
        return None, None, None, None, None
    except Exception as e:
        print(f"‚ùå Erreur de chargement: {e}")
        return None, None, None, None, None

# Chargement du mod√®le et des donn√©es
model, scaler, X_test, y_test, model_metadata = load_model_and_data()

# =====================================================================
# 2. √âVALUATION DES PERFORMANCES G√âN√âRALES
# =====================================================================

print("\n" + "="*60)
print("üìà √âVALUATION DES PERFORMANCES G√âN√âRALES")
print("="*60)

def comprehensive_evaluation(model, X_test, y_test, scaler, model_metadata):
    """√âvaluation compl√®te des performances du mod√®le"""
    
    print(f"\nüî¨ √âvaluation en cours...")
    
    # Normalisation des features
    X_test_scaled = scaler.transform(X_test)
    
    # Pr√©dictions
    if model_metadata['model_type'] == 'neural_network':
        y_pred = model.predict(X_test_scaled, verbose=0).flatten()
    else:
        y_pred = model.predict(X_test_scaled)
    
    # Calcul des m√©triques
    metrics = {
        'RMSE': np.sqrt(mean_squared_error(y_test, y_pred)),
        'MAE': mean_absolute_error(y_test, y_pred),
        'R¬≤': r2_score(y_test, y_pred),
        'MAPE': np.mean(np.abs((y_test - y_pred) / y_test)) * 100,
        'Max Error': np.max(np.abs(y_test - y_pred))
    }
    
    # Calcul d'erreurs relatives
    relative_errors = np.abs(y_test - y_pred) / y_test
    metrics['Median Relative Error'] = np.median(relative_errors) * 100
    metrics['95th Percentile Error'] = np.percentile(relative_errors, 95) * 100
    
    print(f"\nüìä M√âTRIQUES DE PERFORMANCE:")
    print(f"  ‚Ä¢ RMSE (Root Mean Square Error): {metrics['RMSE']:.3f}")
    print(f"  ‚Ä¢ MAE (Mean Absolute Error): {metrics['MAE']:.3f}")
    print(f"  ‚Ä¢ R¬≤ (Coefficient de d√©termination): {metrics['R¬≤']:.3f}")
    print(f"  ‚Ä¢ MAPE (Mean Absolute Percentage Error): {metrics['MAPE']:.1f}%")
    print(f"  ‚Ä¢ Erreur maximale: {metrics['Max Error']:.3f}")
    print(f"  ‚Ä¢ Erreur relative m√©diane: {metrics['Median Relative Error']:.1f}%")
    print(f"  ‚Ä¢ 95e percentile d'erreur: {metrics['95th Percentile Error']:.1f}%")
    
    # Interpr√©tation des r√©sultats
    print(f"\nüéØ INTERPR√âTATION:")
    if metrics['R¬≤'] > 0.8:
        print("  ‚úÖ Excellent: R¬≤ > 0.8 - Mod√®le tr√®s pr√©dictif")
    elif metrics['R¬≤'] > 0.6:
        print("  ‚úÖ Bon: R¬≤ > 0.6 - Mod√®le assez pr√©dictif")
    elif metrics['R¬≤'] > 0.4:
        print("  ‚ö†Ô∏è Moyen: R¬≤ > 0.4 - Mod√®le mod√©r√©ment pr√©dictif")
    else:
        print("  ‚ùå Faible: R¬≤ < 0.4 - Mod√®le peu pr√©dictif")
    
    if metrics['MAPE'] < 15:
        print("  ‚úÖ Erreur acceptable: MAPE < 15%")
    elif metrics['MAPE'] < 25:
        print("  ‚ö†Ô∏è Erreur mod√©r√©e: MAPE < 25%")
    else:
        print("  ‚ùå Erreur importante: MAPE > 25%")
    
    return metrics, y_pred

# √âvaluation si le mod√®le est charg√©
if model is not None:
    performance_metrics, predictions = comprehensive_evaluation(model, X_test, y_test, scaler, model_metadata)

# =====================================================================
# 3. ANALYSE DES R√âSIDUS ET ERREURS
# =====================================================================

print("\n" + "="*60)
print("üîç ANALYSE DES R√âSIDUS ET ERREURS")
print("="*60)

def residual_analysis(y_true, y_pred):
    """Analyse d√©taill√©e des r√©sidus"""
    
    print(f"\nüìä Analyse des r√©sidus...")
    
    # Calcul des r√©sidus
    residuals = y_true - y_pred
    
    # Statistiques des r√©sidus
    print(f"\nüìà STATISTIQUES DES R√âSIDUS:")
    print(f"  ‚Ä¢ Moyenne: {residuals.mean():.3f}")
    print(f"  ‚Ä¢ √âcart-type: {residuals.std():.3f}")
    print(f"  ‚Ä¢ Minimum: {residuals.min():.3f}")
    print(f"  ‚Ä¢ Maximum: {residuals.max():.3f}")
    print(f"  ‚Ä¢ Skewness: {residuals.skew():.3f}")
    print(f"  ‚Ä¢ Kurtosis: {residuals.kurtosis():.3f}")
    
    # Tests de normalit√© (approximatif)
    from scipy.stats import jarque_bera
    try:
        jb_stat, jb_p = jarque_bera(residuals)
        print(f"  ‚Ä¢ Test de Jarque-Bera: p-value = {jb_p:.3f}")
        if jb_p > 0.05:
            print("    ‚úÖ R√©sidus probablement normaux")
        else:
            print("    ‚ö†Ô∏è R√©sidus non-normaux d√©tect√©s")
    except:
        print("  ‚Ä¢ Test de normalit√© non disponible")
    
    # D√©tection des outliers
    Q1 = residuals.quantile(0.25)
    Q3 = residuals.quantile(0.75)
    IQR = Q3 - Q1
    outlier_threshold = 1.5 * IQR
    outliers = residuals[(residuals < Q1 - outlier_threshold) | (residuals > Q3 + outlier_threshold)]
    
    print(f"\nüéØ D√âTECTION D'OUTLIERS:")
    print(f"  ‚Ä¢ Nombre d'outliers: {len(outliers)} ({len(outliers)/len(residuals)*100:.1f}%)")
    if len(outliers) > 0:
        print(f"  ‚Ä¢ Indices des outliers: {list(outliers.index)}")
    
    return residuals

def create_residual_visualizations(y_true, y_pred, residuals):
    """Cr√©e des visualisations pour l'analyse des r√©sidus"""
    
    print(f"\nüìä Cr√©ation des visualisations...")
    
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    fig.suptitle('Analyse des R√©sidus et Performances du Mod√®le', fontsize=16, fontweight='bold')
    
    # 1. Pr√©dictions vs R√©alit√©
    ax1 = axes[0, 0]
    ax1.scatter(y_true, y_pred, alpha=0.6, color='blue')
    min_val, max_val = min(y_true.min(), y_pred.min()), max(y_true.max(), y_pred.max())
    ax1.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2, label='Parfait')
    ax1.set_xlabel('Valeurs R√©elles')
    ax1.set_ylabel('Pr√©dictions')
    ax1.set_title('Pr√©dictions vs R√©alit√©')
    ax1.legend()
    ax1.grid(alpha=0.3)
    
    # 2. Distribution des r√©sidus
    ax2 = axes[0, 1]
    ax2.hist(residuals, bins=15, color='lightblue', edgecolor='black', alpha=0.7)
    ax2.axvline(residuals.mean(), color='red', linestyle='--', label=f'Moyenne: {residuals.mean():.3f}')
    ax2.set_xlabel('R√©sidus')
    ax2.set_ylabel('Fr√©quence')
    ax2.set_title('Distribution des R√©sidus')
    ax2.legend()
    ax2.grid(axis='y', alpha=0.3)
    
    # 3. R√©sidus vs Pr√©dictions
    ax3 = axes[0, 2]
    ax3.scatter(y_pred, residuals, alpha=0.6, color='green')
    ax3.axhline(0, color='red', linestyle='--')
    ax3.set_xlabel('Pr√©dictions')
    ax3.set_ylabel('R√©sidus')
    ax3.set_title('R√©sidus vs Pr√©dictions')
    ax3.grid(alpha=0.3)
    
    # 4. Q-Q Plot (approximatif)
    ax4 = axes[1, 0]
    from scipy.stats import probplot
    probplot(residuals, dist="norm", plot=ax4)
    ax4.set_title('Q-Q Plot (Normalit√© des R√©sidus)')
    ax4.grid(alpha=0.3)
    
    # 5. Erreurs absolues
    ax5 = axes[1, 1]
    abs_errors = np.abs(residuals)
    ax5.scatter(y_true, abs_errors, alpha=0.6, color='orange')
    ax5.set_xlabel('Valeurs R√©elles')
    ax5.set_ylabel('Erreur Absolue')
    ax5.set_title('Erreurs Absolues vs Valeurs R√©elles')
    ax5.grid(alpha=0.3)
    
    # 6. R√©sidus standardis√©s
    ax6 = axes[1, 2]
    standardized_residuals = residuals / residuals.std()
    ax6.scatter(range(len(standardized_residuals)), standardized_residuals, alpha=0.6, color='purple')
    ax6.axhline(0, color='red', linestyle='-', alpha=0.5)
    ax6.axhline(2, color='red', linestyle='--', alpha=0.5, label='¬±2œÉ')
    ax6.axhline(-2, color='red', linestyle='--', alpha=0.5)
    ax6.set_xlabel('Index')
    ax6.set_ylabel('R√©sidus Standardis√©s')
    ax6.set_title('R√©sidus Standardis√©s')
    ax6.legend()
    ax6.grid(alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print(f"  ‚úÖ Visualisations cr√©√©es")

# Analyse des r√©sidus si les pr√©dictions sont disponibles
if 'predictions' in locals():
    residuals = residual_analysis(y_test, predictions)
    create_residual_visualizations(y_test, predictions, residuals)

# =====================================================================
# 4. VALIDATION CROIS√âE ET ROBUSTESSE
# =====================================================================

print("\n" + "="*60)
print("üîÑ VALIDATION CROIS√âE ET TESTS DE ROBUSTESSE")
print("="*60)

def cross_validation_analysis(model, X, y, scaler, cv_folds=5):
    """Analyse par validation crois√©e"""
    
    print(f"\nüîÑ Validation crois√©e avec {cv_folds} plis...")
    
    # Pour les mod√®les sklearn
    if hasattr(model, 'predict'):
        # Donn√©es normalis√©es pour la validation crois√©e
        X_scaled = scaler.transform(X)
        
        # Validation crois√©e pour diff√©rentes m√©triques
        cv_scores = {}
        
        # R¬≤
        r2_scores = cross_val_score(model, X_scaled, y, cv=cv_folds, scoring='r2')
        cv_scores['R¬≤'] = {'mean': r2_scores.mean(), 'std': r2_scores.std(), 'scores': r2_scores}
        
        # RMSE (n√©gatif dans sklearn, donc on inverse)
        rmse_scores = cross_val_score(model, X_scaled, y, cv=cv_folds, scoring='neg_root_mean_squared_error')
        cv_scores['RMSE'] = {'mean': -rmse_scores.mean(), 'std': rmse_scores.std(), 'scores': -rmse_scores}
        
        # MAE (n√©gatif dans sklearn, donc on inverse)
        mae_scores = cross_val_score(model, X_scaled, y, cv=cv_folds, scoring='neg_mean_absolute_error')
        cv_scores['MAE'] = {'mean': -mae_scores.mean(), 'std': mae_scores.std(), 'scores': -mae_scores}
        
        print(f"\nüìä R√âSULTATS DE LA VALIDATION CROIS√âE:")
        for metric, results in cv_scores.items():
            print(f"  ‚Ä¢ {metric}:")
            print(f"    - Moyenne: {results['mean']:.3f}")
            print(f"    - √âcart-type: {results['std']:.3f}")
            print(f"    - Scores individuels: {[f'{score:.3f}' for score in results['scores']]}")
        
        # Analyse de la stabilit√©
        r2_cv = cv_scores['R¬≤']['std']
        if r2_cv < 0.05:
            print(f"\n‚úÖ STABILIT√â EXCELLENTE: √âcart-type R¬≤ = {r2_cv:.3f} < 0.05")
        elif r2_cv < 0.1:
            print(f"\n‚úÖ STABILIT√â BONNE: √âcart-type R¬≤ = {r2_cv:.3f} < 0.1")
        else:
            print(f"\n‚ö†Ô∏è STABILIT√â VARIABLE: √âcart-type R¬≤ = {r2_cv:.3f} > 0.1")
        
        return cv_scores
    else:
        print("‚ö†Ô∏è Validation crois√©e non disponible pour ce type de mod√®le")
        return None

def learning_curve_analysis(model, X, y, scaler):
    """Analyse des courbes d'apprentissage"""
    
    print(f"\nüìà Analyse des courbes d'apprentissage...")
    
    if hasattr(model, 'predict'):
        X_scaled = scaler.transform(X)
        
        # Calcul des courbes d'apprentissage
        train_sizes = np.linspace(0.1, 1.0, 10)
        train_sizes_abs, train_scores, val_scores = learning_curve(
            model, X_scaled, y, train_sizes=train_sizes, cv=3, scoring='r2'
        )
        
        # Moyennes et √©carts-types
        train_mean = np.mean(train_scores, axis=1)
        train_std = np.std(train_scores, axis=1)
        val_mean = np.mean(val_scores, axis=1)
        val_std = np.std(val_scores, axis=1)
        
        # Visualisation
        plt.figure(figsize=(10, 6))
        plt.plot(train_sizes_abs, train_mean, 'o-', color='blue', label='Score d\'entra√Ænement')
        plt.fill_between(train_sizes_abs, train_mean - train_std, train_mean + train_std, alpha=0.1, color='blue')
        plt.plot(train_sizes_abs, val_mean, 'o-', color='red', label='Score de validation')
        plt.fill_between(train_sizes_abs, val_mean - val_std, val_mean + val_std, alpha=0.1, color='red')
        
        plt.xlabel('Taille de l\'√©chantillon d\'entra√Ænement')
        plt.ylabel('Score R¬≤')
        plt.title('Courbes d\'Apprentissage')
        plt.legend()
        plt.grid(alpha=0.3)
        plt.show()
        
        # Analyse de l'overfitting
        final_gap = train_mean[-1] - val_mean[-1]
        if final_gap < 0.05:
            print(f"‚úÖ PAS D'OVERFITTING: √âcart final = {final_gap:.3f}")
        elif final_gap < 0.1:
            print(f"‚ö†Ô∏è OVERFITTING L√âGER: √âcart final = {final_gap:.3f}")
        else:
            print(f"‚ùå OVERFITTING IMPORTANT: √âcart final = {final_gap:.3f}")
        
        return train_sizes_abs, train_mean, val_mean
    else:
        print("‚ö†Ô∏è Courbes d'apprentissage non disponibles pour ce type de mod√®le")
        return None, None, None

# Validation crois√©e et courbes d'apprentissage
if model is not None and X_test is not None:
    cv_results = cross_validation_analysis(model, X_test, y_test, scaler)
    learning_results = learning_curve_analysis(model, X_test, y_test, scaler)

# =====================================================================
# 5. ANALYSE D'IMPORTANCE ET INTERPR√âTABILIT√â
# =====================================================================

print("\n" + "="*60)
print("üîç ANALYSE D'IMPORTANCE ET INTERPR√âTABILIT√â")
print("="*60)

def feature_importance_analysis(model, X, y, scaler, feature_names):
    """Analyse d'importance des features"""
    
    print(f"\nüî¨ Analyse d'importance des features...")
    
    X_scaled = scaler.transform(X)
    
    # 1. Importance int√©gr√©e du mod√®le (si disponible)
    if hasattr(model, 'feature_importances_'):
        model_importance = model.feature_importances_
        importance_type = "Importance du mod√®le (bas√©e sur l'arbre)"
    elif hasattr(model, 'coef_'):
        model_importance = np.abs(model.coef_)
        importance_type = "Coefficients absolus"
    else:
        model_importance = None
        importance_type = None
    
    # 2. Permutation importance (plus robuste)
    try:
        perm_importance = permutation_importance(model, X_scaled, y, n_repeats=5, random_state=42)
        perm_importance_mean = perm_importance.importances_mean
        perm_importance_std = perm_importance.importances_std
        
        print(f"‚úÖ Permutation importance calcul√©e")
        
        # DataFrame pour faciliter l'affichage
        importance_df = pd.DataFrame({
            'feature': feature_names,
            'perm_importance': perm_importance_mean,
            'perm_std': perm_importance_std
        })
        
        if model_importance is not None:
            importance_df['model_importance'] = model_importance
            
        importance_df = importance_df.sort_values('perm_importance', ascending=False)
        
        print(f"\nüèÜ TOP 10 FEATURES PAR PERMUTATION IMPORTANCE:")
        for i, row in importance_df.head(10).iterrows():
            print(f"  {i+1:2d}. {row['feature']:25} | {row['perm_importance']:.4f} ¬± {row['perm_std']:.4f}")
        
        # Visualisation
        plt.figure(figsize=(12, 8))
        top_features = importance_df.head(15)
        
        if model_importance is not None:
            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
            
            # Permutation importance
            ax1.barh(range(len(top_features)), top_features['perm_importance'], 
                    xerr=top_features['perm_std'], color='skyblue', edgecolor='navy')
            ax1.set_yticks(range(len(top_features)))
            ax1.set_yticklabels(top_features['feature'])
            ax1.set_xlabel('Permutation Importance')
            ax1.set_title('Importance par Permutation')
            ax1.grid(axis='x', alpha=0.3)
            
            # Model importance
            ax2.barh(range(len(top_features)), top_features['model_importance'], 
                    color='lightgreen', edgecolor='darkgreen')
            ax2.set_yticks(range(len(top_features)))
            ax2.set_yticklabels(top_features['feature'])
            ax2.set_xlabel(importance_type)
            ax2.set_title(importance_type)
            ax2.grid(axis='x', alpha=0.3)
            
            plt.tight_layout()
        else:
            plt.barh(range(len(top_features)), top_features['perm_importance'], 
                    xerr=top_features['perm_std'], color='skyblue', edgecolor='navy')
            plt.yticks(range(len(top_features)), top_features['feature'])
            plt.xlabel('Permutation Importance')
            plt.title('Importance des Features par Permutation')
            plt.grid(axis='x', alpha=0.3)
        
        plt.gca().invert_yaxis()
        plt.show()
        
        return importance_df
        
    except Exception as e:
        print(f"‚ö†Ô∏è Erreur calcul permutation importance: {e}")
        return None

def shap_analysis(model, X_sample, scaler):
    """Analyse SHAP si disponible"""
    
    if not HAS_SHAP:
        print("‚ö†Ô∏è SHAP non disponible - analyse d'interpr√©tabilit√© limit√©e")
        return None
    
    print(f"\nüî¨ Analyse SHAP...")
    
    try:
        X_scaled = scaler.transform(X_sample)
        
        # Cr√©ation de l'explainer SHAP
        if hasattr(model, 'predict_proba'):
            explainer = shap.Explainer(model, X_scaled)
        else:
            explainer = shap.Explainer(model.predict, X_scaled)
        
        # Calcul des valeurs SHAP pour un √©chantillon
        shap_values = explainer(X_scaled[:min(50, len(X_scaled))])
        
        # Visualisation summary plot
        shap.summary_plot(shap_values, X_sample.iloc[:min(50, len(X_sample))], show=False)
        plt.title('SHAP Summary Plot - Importance et Impact des Features')
        plt.show()
        
        # Feature importance globale
        feature_importance = np.abs(shap_values.values).mean(0)
        shap_importance_df = pd.DataFrame({
            'feature': X_sample.columns,
            'shap_importance': feature_importance
        }).sort_values('shap_importance', ascending=False)
        
        print(f"\nüèÜ TOP 10 FEATURES PAR SHAP IMPORTANCE:")
        for i, row in shap_importance_df.head(10).iterrows():
            print(f"  {i+1:2d}. {row['feature']:25} | {row['shap_importance']:.4f}")
        
        return shap_values, shap_importance_df
        
    except Exception as e:
        print(f"‚ö†Ô∏è Erreur analyse SHAP: {e}")
        return None, None

# Analyses d'importance
if model is not None and X_test is not None:
    importance_df = feature_importance_analysis(model, X_test, y_test, scaler, X_test.columns.tolist())
    
    # Analyse SHAP sur un √©chantillon
    shap_values, shap_importance = shap_analysis(model, X_test.head(50), scaler)

# =====================================================================
# 6. ANALYSE PAR SEGMENTS
# =====================================================================

print("\n" + "="*60)
print("üìä ANALYSE PAR SEGMENTS")
print("="*60)

def segment_analysis(y_true, y_pred, X_test):
    """Analyse des performances par segments"""
    
    print(f"\nüìä Analyse par segments...")
    
    # Cr√©ation de segments de risque
    risk_segments = pd.cut(y_true, bins=[0, 3, 6, 10], labels=['Faible', 'Moyen', '√âlev√©'])
    
    # Performances par segment
    segment_results = {}
    
    for segment in risk_segments.cat.categories:
        mask = risk_segments == segment
        if mask.sum() > 0:
            y_true_seg = y_true[mask]
            y_pred_seg = y_pred[mask]
            
            segment_results[segment] = {
                'count': mask.sum(),
                'rmse': np.sqrt(mean_squared_error(y_true_seg, y_pred_seg)),
                'mae': mean_absolute_error(y_true_seg, y_pred_seg),
                'r2': r2_score(y_true_seg, y_pred_seg),
                'mape': np.mean(np.abs((y_true_seg - y_pred_seg) / y_true_seg)) * 100
            }
    
    print(f"\nüìà PERFORMANCES PAR SEGMENT DE RISQUE:")
    for segment, metrics in segment_results.items():
        print(f"\n  {segment} Risque ({metrics['count']} observations):")
        print(f"    - RMSE: {metrics['rmse']:.3f}")
        print(f"    - MAE: {metrics['mae']:.3f}")
        print(f"    - R¬≤: {metrics['r2']:.3f}")
        print(f"    - MAPE: {metrics['mape']:.1f}%")
    
    # Visualisation par segments
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    
    segments = list(segment_results.keys())
    
    # RMSE par segment
    rmse_values = [segment_results[seg]['rmse'] for seg in segments]
    axes[0].bar(segments, rmse_values, color='lightblue', edgecolor='navy')
    axes[0].set_title('RMSE par Segment de Risque')
    axes[0].set_ylabel('RMSE')
    axes[0].grid(axis='y', alpha=0.3)
    
    # R¬≤ par segment
    r2_values = [segment_results[seg]['r2'] for seg in segments]
    axes[1].bar(segments, r2_values, color='lightgreen', edgecolor='darkgreen')
    axes[1].set_title('R¬≤ par Segment de Risque')
    axes[1].set_ylabel('R¬≤')
    axes[1].grid(axis='y', alpha=0.3)
    
    # MAPE par segment
    mape_values = [segment_results[seg]['mape'] for seg in segments]
    axes[2].bar(segments, mape_values, color='lightcoral', edgecolor='darkred')
    axes[2].set_title('MAPE par Segment de Risque')
    axes[2].set_ylabel('MAPE (%)')
    axes[2].grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    return segment_results

# Analyse par segments
if 'predictions' in locals():
    segment_results = segment_analysis(y_test, predictions, X_test)

# =====================================================================
# 7. TESTS DE ROBUSTESSE
# =====================================================================

print("\n" + "="*60)
print("üõ°Ô∏è TESTS DE ROBUSTESSE")
print("="*60)

def robustness_tests(model, X_test, y_test, scaler):
    """Tests de robustesse du mod√®le"""
    
    print(f"\nüõ°Ô∏è Tests de robustesse...")
    
    X_scaled = scaler.transform(X_test)
    
    # 1. Test avec bruit ajout√©
    noise_levels = [0.01, 0.05, 0.1, 0.2]
    noise_results = {}
    
    for noise_level in noise_levels:
        # Ajout de bruit gaussien
        X_noisy = X_scaled + np.random.normal(0, noise_level, X_scaled.shape)
        
        # Pr√©dictions avec bruit
        if hasattr(model, 'predict'):
            y_pred_noisy = model.predict(X_noisy)
        else:
            y_pred_noisy = model.predict(X_noisy, verbose=0).flatten()
        
        # M√©triques avec bruit
        noise_results[noise_level] = {
            'r2': r2_score(y_test, y_pred_noisy),
            'rmse': np.sqrt(mean_squared_error(y_test, y_pred_noisy))
        }
    
    print(f"\nüîä ROBUSTESSE AU BRUIT:")
    for noise_level, metrics in noise_results.items():
        print(f"  Bruit {noise_level*100:3.0f}%: R¬≤ = {metrics['r2']:.3f}, RMSE = {metrics['rmse']:.3f}")
    
    # 2. Test avec valeurs manquantes simul√©es
    missing_percentages = [0.05, 0.1, 0.2]
    missing_results = {}
    
    for missing_pct in missing_percentages:
        X_missing = X_scaled.copy()
        
        # Simulation de valeurs manquantes (remplac√©es par la m√©diane)
        n_missing = int(missing_pct * X_missing.size)
        missing_indices = np.random.choice(X_missing.size, n_missing, replace=False)
        
        # Remplacement par la m√©diane de chaque feature
        for i in range(X_missing.shape[1]):
            col_missing = np.random.choice(X_missing.shape[0], int(missing_pct * X_missing.shape[0]), replace=False)
            X_missing[col_missing, i] = np.median(X_scaled[:, i])
        
        # Pr√©dictions avec valeurs manquantes simul√©es
        if hasattr(model, 'predict'):
            y_pred_missing = model.predict(X_missing)
        else:
            y_pred_missing = model.predict(X_missing, verbose=0).flatten()
        
        missing_results[missing_pct] = {
            'r2': r2_score(y_test, y_pred_missing),
            'rmse': np.sqrt(mean_squared_error(y_test, y_pred_missing))
        }
    
    print(f"\n‚ùì ROBUSTESSE AUX VALEURS MANQUANTES:")
    for missing_pct, metrics in missing_results.items():
        print(f"  {missing_pct*100:3.0f}% manquant: R¬≤ = {metrics['r2']:.3f}, RMSE = {metrics['rmse']:.3f}")
    
    # Visualisation de la robustesse
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
    
    # Robustesse au bruit
    noise_levels_pct = [n*100 for n in noise_levels]
    noise_r2 = [noise_results[n]['r2'] for n in noise_levels]
    ax1.plot(noise_levels_pct, noise_r2, 'o-', color='blue', linewidth=2, markersize=8)
    ax1.set_xlabel('Niveau de Bruit (%)')
    ax1.set_ylabel('R¬≤')
    ax1.set_title('Robustesse au Bruit')
    ax1.grid(alpha=0.3)
    
    # Robustesse aux valeurs manquantes
    missing_pct_list = [m*100 for m in missing_percentages]
    missing_r2 = [missing_results[m]['r2'] for m in missing_percentages]
    ax2.plot(missing_pct_list, missing_r2, 'o-', color='red', linewidth=2, markersize=8)
    ax2.set_xlabel('Valeurs Manquantes (%)')
    ax2.set_ylabel('R¬≤')
    ax2.set_title('Robustesse aux Valeurs Manquantes')
    ax2.grid(alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    return noise_results, missing_results

# Tests de robustesse
if model is not None:
    noise_results, missing_results = robustness_tests(model, X_test, y_test, scaler)

# =====================================================================
# 8. G√âN√âRATION DU RAPPORT D'√âVALUATION
# =====================================================================

print("\n" + "="*60)
print("üìã G√âN√âRATION DU RAPPORT D'√âVALUATION")
print("="*60)

def generate_evaluation_report(model_metadata, performance_metrics, cv_results, importance_df, segment_results):
    """G√©n√®re un rapport complet d'√©valuation"""
    
    print(f"\nüìã G√©n√©ration du rapport...")
    
    report = {
        'evaluation_date': datetime.now().isoformat(),
        'model_info': {
            'name': model_metadata['model_name'],
            'type': model_metadata['model_type'],
            'training_date': model_metadata['model_training_date'],
            'features_count': model_metadata['num_features']
        },
        'performance_metrics': performance_metrics,
        'cross_validation': cv_results if cv_results else {},
        'feature_importance': importance_df.head(10).to_dict('records') if importance_df is not None else [],
        'segment_analysis': segment_results if 'segment_results' in locals() else {},
        'robustness': {
            'noise_test': noise_results if 'noise_results' in locals() else {},
            'missing_values_test': missing_results if 'missing_results' in locals() else {}
        }
    }
    
    # Interpr√©tations et recommandations
    recommendations = []
    
    # Bas√© sur les performances g√©n√©rales
    if performance_metrics['R¬≤'] > 0.8:
        recommendations.append("‚úÖ Excellent mod√®le - D√©ploiement recommand√©")
    elif performance_metrics['R¬≤'] > 0.6:
        recommendations.append("‚úÖ Bon mod√®le - D√©ploiement possible avec monitoring")
    else:
        recommendations.append("‚ö†Ô∏è Performances limit√©es - Am√©lioration n√©cessaire")
    
    # Bas√© sur la robustesse
    if 'noise_results' in locals():
        noise_degradation = (performance_metrics['R¬≤'] - noise_results[0.1]['r2']) / performance_metrics['R¬≤']
        if noise_degradation < 0.1:
            recommendations.append("‚úÖ Mod√®le robuste au bruit")
        else:
            recommendations.append("‚ö†Ô∏è Sensibilit√© au bruit d√©tect√©e")
    
    # Bas√© sur les segments
    if 'segment_results' in locals():
        segment_r2_values = [seg['r2'] for seg in segment_results.values()]
        if min(segment_r2_values) > 0.5:
            recommendations.append("‚úÖ Performances coh√©rentes entre segments")
        else:
            recommendations.append("‚ö†Ô∏è Performances variables selon les segments de risque")
    
    report['recommendations'] = recommendations
    
    # Sauvegarde du rapport
    import json
    import os
    
    output_dir = '../models'
    os.makedirs(output_dir, exist_ok=True)
    
    report_filename = f"{output_dir}/evaluation_report.json"
    with open(report_filename, 'w') as f:
        json.dump(report, f, indent=2, default=str)
    
    print(f"‚úÖ Rapport sauvegard√©: {report_filename}")
    
    # Affichage du r√©sum√©
    print(f"\nüìä R√âSUM√â DE L'√âVALUATION:")
    print(f"  ‚Ä¢ Mod√®le √©valu√©: {report['model_info']['name']}")
    print(f"  ‚Ä¢ Score R¬≤: {performance_metrics['R¬≤']:.3f}")
    print(f"  ‚Ä¢ RMSE: {performance_metrics['RMSE']:.3f}")
    print(f"  ‚Ä¢ MAPE: {performance_metrics['MAPE']:.1f}%")
    
    print(f"\nüéØ RECOMMANDATIONS:")
    for rec in recommendations:
        print(f"  {rec}")
    
    return report

# G√©n√©ration du rapport final
if all(var in locals() for var in ['model_metadata', 'performance_metrics']):
    final_report = generate_evaluation_report(
        model_metadata, 
        performance_metrics,
        locals().get('cv_results'),
        locals().get('importance_df'),
        locals().get('segment_results')
    )

# =====================================================================
# 9. VISUALISATION G√âOSPATIALE (BONUS)
# =====================================================================

print("\n" + "="*60)
print("üó∫Ô∏è VISUALISATION G√âOSPATIALE (OPTIONNEL)")
print("="*60)

def create_risk_map(X_test, y_test, predictions):
    """Cr√©e une carte de risque g√©ospatiale"""
    
    print(f"\nüó∫Ô∏è Cr√©ation de la carte de risque...")
    
    try:
        # V√©rification des colonnes g√©ographiques
        if 'latitude' in X_test.columns and 'longitude' in X_test.columns:
            
            # Centre de la carte (Boston)
            center_lat = X_test['latitude'].mean()
            center_lon = X_test['longitude'].mean()
            
            # Cr√©ation de la carte
            m = folium.Map(location=[center_lat, center_lon], zoom_start=12)
            
            # Ajout des points avec couleurs selon le risque
            for idx, row in X_test.iterrows():
                lat, lon = row['latitude'], row['longitude']
                real_risk = y_test.iloc[idx] if idx < len(y_test) else 5
                pred_risk = predictions[idx] if idx < len(predictions) else 5
                
                # Couleur bas√©e sur le risque pr√©dit
                if pred_risk >= 7:
                    color = 'red'
                elif pred_risk >= 5:
                    color = 'orange'
                elif pred_risk >= 3:
                    color = 'yellow'
                else:
                    color = 'green'
                
                folium.CircleMarker(
                    location=[lat, lon],
                    radius=8,
                    popup=f"Risque r√©el: {real_risk:.2f}<br>Risque pr√©dit: {pred_risk:.2f}",
                    fillColor=color,
                    color='black',
                    weight=1,
                    fillOpacity=0.7
                ).add_to(m)
            
            # Sauvegarde de la carte
            map_filename = '../models/risk_map.html'
            m.save(map_filename)
            print(f"‚úÖ Carte sauvegard√©e: {map_filename}")
            
            return m
        else:
            print("‚ö†Ô∏è Colonnes g√©ographiques non trouv√©es")
            return None
            
    except Exception as e:
        print(f"‚ö†Ô∏è Erreur cr√©ation carte: {e}")
        return None

# Cr√©ation de la carte si possible
if all(var in locals() for var in ['X_test', 'y_test', 'predictions']):
    risk_map = create_risk_map(X_test, y_test, predictions)

# =====================================================================
# 10. CONCLUSIONS ET PROCHAINES √âTAPES
# =====================================================================

print("\n" + "="*60)
print("üéØ CONCLUSIONS ET PROCHAINES √âTAPES")
print("="*60)

print(f"\n‚úÖ √âVALUATION COMPL√âT√âE:")

if 'performance_metrics' in locals():
    print(f"  ‚Ä¢ Performance globale: R¬≤ = {performance_metrics['R¬≤']:.3f}")
    
    # √âvaluation qualitative
    if performance_metrics['R¬≤'] > 0.8:
        quality = "EXCELLENTE"
        emoji = "üèÜ"
    elif performance_metrics['R¬≤'] > 0.6:
        quality = "BONNE"
        emoji = "‚úÖ"
    elif performance_metrics['R¬≤'] > 0.4:
        quality = "MOYENNE"
        emoji = "‚ö†Ô∏è"
    else:
        quality = "FAIBLE"
        emoji = "‚ùå"
    
    print(f"  ‚Ä¢ Qualit√© du mod√®le: {emoji} {quality}")

print(f"\nüìÅ FICHIERS G√âN√âR√âS:")
generated_files = [
    '../models/evaluation_report.json',
    '../models/risk_map.html'
]

for filepath in generated_files:
    import os
    if os.path.exists(filepath):
        print(f"  ‚úÖ {filepath}")
    else:
        print(f"  ‚ö†Ô∏è {filepath} (non g√©n√©r√©)")

print(f"\nüöÄ PROCHAINES √âTAPES RECOMMAND√âES:")
if 'final_report' in locals() and 'recommendations' in final_report:
    for i, rec in enumerate(final_report['recommendations'], 1):
        print(f"  {i}. {rec}")

print(f"\nüîß AM√âLIORATIONS POSSIBLES:")
print(f"  ‚Ä¢ Collecter plus de donn√©es d'entra√Ænement")
print(f"  ‚Ä¢ Enrichir avec des features externes (m√©t√©o, √©v√©nements)")
print(f"  ‚Ä¢ Tester des architectures de mod√®les plus complexes")
print(f"  ‚Ä¢ Impl√©menter un syst√®me de re-entra√Ænement automatique")
print(f"  ‚Ä¢ D√©velopper des alertes en temps r√©el")

print("\n" + "="*60)
print("‚ú® √âVALUATION DU MOD√àLE TERMIN√âE AVEC SUCC√àS")
print("="*60)

‚ö†Ô∏è SHAP non install√© - analyse d'interpr√©tabilit√© limit√©e
üìä D√©but de l'√©valuation du mod√®le

üìÅ Chargement du mod√®le et des donn√©es...
‚ùå Fichier non trouv√©: [Errno 2] No such file or directory: '../models/model_metadata.json'
   V√©rifiez que le model development a √©t√© ex√©cut√©

üìà √âVALUATION DES PERFORMANCES G√âN√âRALES

üîç ANALYSE DES R√âSIDUS ET ERREURS

üîÑ VALIDATION CROIS√âE ET TESTS DE ROBUSTESSE

üîç ANALYSE D'IMPORTANCE ET INTERPR√âTABILIT√â

üìä ANALYSE PAR SEGMENTS

üõ°Ô∏è TESTS DE ROBUSTESSE

üìã G√âN√âRATION DU RAPPORT D'√âVALUATION

üó∫Ô∏è VISUALISATION G√âOSPATIALE (OPTIONNEL)

üéØ CONCLUSIONS ET PROCHAINES √âTAPES

‚úÖ √âVALUATION COMPL√âT√âE:

üìÅ FICHIERS G√âN√âR√âS:
  ‚ö†Ô∏è ../models/evaluation_report.json (non g√©n√©r√©)
  ‚ö†Ô∏è ../models/risk_map.html (non g√©n√©r√©)

üöÄ PROCHAINES √âTAPES RECOMMAND√âES:

üîß AM√âLIORATIONS POSSIBLES:
  ‚Ä¢ Collecter plus de donn√©es d'entra√Ænement
  ‚Ä¢ Enrichir avec des features externes