# UK Historic Electricity Demand - Final Model Comparison
**Author:** Abdul Salam Aldabik

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("‚úÖ Libraries loaded successfully")

## 1. Load Model Metrics

In [None]:
# Load metrics from complete model training
print("üìÇ Loading model metrics...\n")

# Main models from 05_complete_model_training.ipynb
main_models_path = '../Data/complete_model_comparison.csv'
if os.path.exists(main_models_path):
    df_all = pd.read_csv(main_models_path)
    print(f"‚úÖ Loaded models: {len(df_all)} models")
    print("\n" + "="*80)
    print(" "*25 + "ALL MODELS LOADED")
    print("="*80)
    print(df_all[['Model', 'MAE', 'RMSE', 'MAPE', 'R¬≤']].to_string(index=False))
    print("="*80)
else:
    print(f"‚ö†Ô∏è  File not found: {main_models_path}")
    print("‚ö†Ô∏è  Run 05_complete_model_training.ipynb first!")
    df_all = pd.DataFrame()

## 2. Identify Best Model

In [None]:
if not df_all.empty:
    # Sort by MAPE (best metric for forecasting)
    df_all = df_all.sort_values('MAPE').reset_index(drop=True)
    best_model = df_all.iloc[0]
    
    print("\n" + "="*80)
    print("üèÜ BEST PERFORMING MODEL")
    print("="*80)
    print(f"\nModel: {best_model['Model']}")
    print(f"\nPerformance Metrics:")
    print(f"  MAPE: {best_model['MAPE']:.2f}% (lower is better)")
    print(f"  MAE:  {best_model['MAE']:,.0f} MW")
    print(f"  RMSE: {best_model['RMSE']:,.0f} MW")
    print(f"  R¬≤:   {best_model['R¬≤']:.4f} (closer to 1 is better)")
    
    if 'Training Time (s)' in best_model.index:
        print(f"  Training Time: {best_model['Training Time (s)']:.1f} seconds")
    
    print("\nüí° Interpretation:")
    print(f"   On average, predictions are {best_model['MAPE']:.2f}% off from actual demand")
    print(f"   For 30,000 MW demand ‚Üí error of ~{30000 * best_model['MAPE']/100:,.0f} MW")
    
    print("\n" + "="*80)

## 3. Visualize Model Comparison

In [None]:
if not df_all.empty and len(df_all) > 1:
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # MAPE Comparison (Most Important)
    axes[0, 0].barh(df_all['Model'], df_all['MAPE'], color='#3498db')
    axes[0, 0].set_xlabel('MAPE (%)', fontsize=12, fontweight='bold')
    axes[0, 0].set_title('Mean Absolute Percentage Error (Lower is Better)', 
                         fontsize=14, fontweight='bold')
    axes[0, 0].grid(axis='x', alpha=0.3)
    for i, (idx, row) in enumerate(df_all.iterrows()):
        axes[0, 0].text(row['MAPE'] + 0.3, i, f"{row['MAPE']:.2f}%", 
                       va='center', fontweight='bold', fontsize=10)
    
    # MAE Comparison
    axes[0, 1].barh(df_all['Model'], df_all['MAE'], color='#2ecc71')
    axes[0, 1].set_xlabel('MAE (MW)', fontsize=12, fontweight='bold')
    axes[0, 1].set_title('Mean Absolute Error', fontsize=14, fontweight='bold')
    axes[0, 1].grid(axis='x', alpha=0.3)
    for i, (idx, row) in enumerate(df_all.iterrows()):
        axes[0, 1].text(row['MAE'] + 100, i, f"{row['MAE']:,.0f}", 
                       va='center', fontweight='bold', fontsize=10)
    
    # RMSE Comparison
    axes[1, 0].barh(df_all['Model'], df_all['RMSE'], color='#e74c3c')
    axes[1, 0].set_xlabel('RMSE (MW)', fontsize=12, fontweight='bold')
    axes[1, 0].set_title('Root Mean Squared Error', fontsize=14, fontweight='bold')
    axes[1, 0].grid(axis='x', alpha=0.3)
    for i, (idx, row) in enumerate(df_all.iterrows()):
        axes[1, 0].text(row['RMSE'] + 100, i, f"{row['RMSE']:,.0f}", 
                       va='center', fontweight='bold', fontsize=10)
    
    # R¬≤ Comparison
    axes[1, 1].barh(df_all['Model'], df_all['R¬≤'], color='#9b59b6')
    axes[1, 1].set_xlabel('R¬≤ Score', fontsize=12, fontweight='bold')
    axes[1, 1].set_title('R¬≤ Score (Higher is Better)', fontsize=14, fontweight='bold')
    axes[1, 1].grid(axis='x', alpha=0.3)
    for i, (idx, row) in enumerate(df_all.iterrows()):
        axes[1, 1].text(row['R¬≤'] + 0.01, i, f"{row['R¬≤']:.4f}", 
                       va='center', fontweight='bold', fontsize=10)
    
    plt.tight_layout()
    plt.savefig('../Output/final_model_comparison_all_metrics.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("‚úÖ Comparison visualization saved: final_model_comparison_all_metrics.png")

## 4. Model Rankings

In [None]:
if not df_all.empty:
    print("\n" + "="*80)
    print("MODEL RANKINGS BY DIFFERENT METRICS")
    print("="*80)
    
    print("\nü•á BEST BY MAPE (Primary Metric for Forecasting):")
    top_mape = df_all.nsmallest(3, 'MAPE')
    for i, (idx, row) in enumerate(top_mape.iterrows(), 1):
        medal = ["ü•á", "ü•à", "ü•â"][i-1] if i <= 3 else f"{i}."
        print(f"   {medal} {row['Model']}: {row['MAPE']:.2f}%")
    
    print("\nü•á BEST BY MAE (Absolute Error in MW):")
    top_mae = df_all.nsmallest(3, 'MAE')
    for i, (idx, row) in enumerate(top_mae.iterrows(), 1):
        medal = ["ü•á", "ü•à", "ü•â"][i-1] if i <= 3 else f"{i}."
        print(f"   {medal} {row['Model']}: {row['MAE']:,.0f} MW")
    
    print("\nü•á BEST BY R¬≤ (Variance Explained):")
    top_r2 = df_all.nlargest(3, 'R¬≤')
    for i, (idx, row) in enumerate(top_r2.iterrows(), 1):
        medal = ["ü•á", "ü•à", "ü•â"][i-1] if i <= 3 else f"{i}."
        print(f"   {medal} {row['Model']}: {row['R¬≤']:.4f}")
    
    if 'Training Time (s)' in df_all.columns:
        print("\n‚ö° FASTEST TRAINING TIME:")
        df_with_time = df_all[df_all['Training Time (s)'] > 0].copy()
        if len(df_with_time) > 0:
            top_fast = df_with_time.nsmallest(3, 'Training Time (s)')
            for i, (idx, row) in enumerate(top_fast.iterrows(), 1):
                print(f"   {i}. {row['Model']}: {row['Training Time (s)']:.1f}s (MAPE: {row['MAPE']:.2f}%)")
    
    print("\n" + "="*80)

## 5. Detailed Analysis & Conclusions

In [None]:
print("\n" + "="*80)
print(" "*20 + "DETAILED ANALYSIS & CONCLUSIONS")
print("="*80)

if not df_all.empty and len(df_all) > 1:
    best = df_all.iloc[0]
    worst = df_all.iloc[-1]
    
    print(f"\nüìä OVERALL STATISTICS:")
    print(f"   Total models compared: {len(df_all)}")
    print(f"   MAPE range: {df_all['MAPE'].min():.2f}% to {df_all['MAPE'].max():.2f}%")
    print(f"   Improvement: {worst['MAPE'] - best['MAPE']:.2f} percentage points from worst to best")
    print(f"   MAE range: {df_all['MAE'].min():,.0f} MW to {df_all['MAE'].max():,.0f} MW")
    print(f"   R¬≤ range: {df_all['R¬≤'].min():.4f} to {df_all['R¬≤'].max():.4f}")
    
    print(f"\nüèÜ WINNER: {best['Model']}")
    print(f"   Why: Lowest MAPE of {best['MAPE']:.2f}%")
    print(f"   Practical Impact:")
    print(f"     ‚Ä¢ Average daily demand: ~30,000 MW")
    print(f"     ‚Ä¢ Model error: ~{30000 * best['MAPE']/100:,.0f} MW")
    print(f"     ‚Ä¢ Accuracy: {100 - best['MAPE']:.2f}% on average")
    
    print(f"\nüìâ LEAST ACCURATE: {worst['Model']}")
    print(f"   MAPE: {worst['MAPE']:.2f}% (vs best: {best['MAPE']:.2f}%)")
    print(f"   Performance gap: {((worst['MAPE'] - best['MAPE']) / best['MAPE'] * 100):.1f}% worse than best model")
    
    # Model insights
    print(f"\nüî¨ KEY INSIGHTS:")
    
    if any('XGBoost' in str(m) for m in df_all['Model']):
        xgb_row = df_all[df_all['Model'].str.contains('XGBoost', case=False, na=False)].iloc[0]
        print(f"   ‚úÖ XGBoost: Fast training ({xgb_row.get('Training Time (s)', 0):.1f}s), excellent accuracy ({xgb_row['MAPE']:.2f}%)")
        print(f"      Best for: Production deployment, feature importance analysis")
    
    if any('LSTM' in str(m) for m in df_all['Model']):
        lstm_row = df_all[df_all['Model'].str.contains('LSTM', case=False, na=False)].iloc[0]
        print(f"   ‚úÖ LSTM: Deep learning approach ({lstm_row['MAPE']:.2f}% MAPE)")
        print(f"      Best for: Capturing complex temporal patterns")
    
    if any('Prophet' in str(m) for m in df_all['Model']):
        prophet_row = df_all[df_all['Model'].str.contains('Prophet', case=False, na=False)].iloc[0]
        print(f"   ‚úÖ Prophet: Statistical model ({prophet_row['MAPE']:.2f}% MAPE)")
        print(f"      Best for: Interpretability, seasonality analysis")
    
    if any('Ensemble' in str(m) for m in df_all['Model']):
        ens_row = df_all[df_all['Model'].str.contains('Ensemble', case=False, na=False)].iloc[0]
        print(f"   ‚úÖ Ensemble: Combines all models ({ens_row['MAPE']:.2f}% MAPE)")
        print(f"      Best for: Robustness, critical applications")
    
    print(f"\nüí° RECOMMENDATIONS:")
    print(f"   1. DEPLOY: {best['Model']} for production (best accuracy)")
    print(f"   2. MONITOR: Track performance monthly, retrain with new data")
    print(f"   3. BACKUP: Use Ensemble for high-stakes forecasts (more robust)")
    print(f"   4. ANALYZE: Use XGBoost feature importance for insights")
    print(f"   5. VALIDATE: Continuous A/B testing against actual demand")
    
    print("\n" + "="*80)

## 6. Save Final Report

In [None]:
if not df_all.empty:
    # Save comparison table
    df_all.to_csv('../Data/all_models_final_comparison.csv', index=False)
    print("‚úÖ Saved: all_models_final_comparison.csv")
    
    # Create text report
    report_path = '../Output/final_model_comparison_report.txt'
    
    with open(report_path, 'w', encoding='utf-8') as f:
        f.write("="*80 + "\n")
        f.write(" "*20 + "FINAL MODEL COMPARISON REPORT\n")
        f.write(" "*15 + "UK Electricity Demand Forecasting\n")
        f.write(" "*25 + f"Dataset 2 - Team Project\n")
        f.write("="*80 + "\n\n")
        
        f.write(f"Generated: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write(f"Total Models Compared: {len(df_all)}\n\n")
        
        f.write("ALL MODELS (Sorted by MAPE - Best to Worst):\n")
        f.write("-"*80 + "\n")
        f.write(df_all[['Model', 'MAE', 'RMSE', 'MAPE', 'R¬≤']].to_string(index=False))
        f.write("\n\n")
        
        best = df_all.iloc[0]
        f.write("="*80 + "\n")
        f.write("BEST PERFORMING MODEL\n")
        f.write("="*80 + "\n")
        f.write(f"Model: {best['Model']}\n\n")
        f.write(f"Performance Metrics:\n")
        f.write(f"  MAPE: {best['MAPE']:.2f}%\n")
        f.write(f"  MAE:  {best['MAE']:,.0f} MW\n")
        f.write(f"  RMSE: {best['RMSE']:,.0f} MW\n")
        f.write(f"  R¬≤:   {best['R¬≤']:.4f}\n\n")
        
        f.write(f"Interpretation:\n")
        f.write(f"  - Predictions are {best['MAPE']:.2f}% off from actual demand on average\n")
        f.write(f"  - For 30,000 MW demand, error is approximately {30000 * best['MAPE']/100:,.0f} MW\n")
        f.write(f"  - Model explains {best['R¬≤']*100:.2f}% of variance in demand\n\n")
        
        f.write("RECOMMENDATIONS:\n")
        f.write(f"  1. Deploy {best['Model']} for production forecasting\n")
        f.write("  2. Use Ensemble model for critical applications (more robust)\n")
        f.write("  3. Retrain models monthly with new data\n")
        f.write("  4. Monitor performance continuously\n")
        f.write("  5. Validate predictions against actual demand\n\n")
        
        f.write("="*80 + "\n")
        f.write("END OF REPORT\n")
        f.write("="*80 + "\n")
    
    print(f"‚úÖ Final report saved: {report_path}")
    print("\n‚úÖ ALL ANALYSIS COMPLETE!")

## Summary

**Analysis Complete:**
- ‚úÖ Loaded metrics from all trained models
- ‚úÖ Identified best model by MAPE, MAE, RMSE, R¬≤
- ‚úÖ Created comprehensive visualizations
- ‚úÖ Analyzed performance trade-offs
- ‚úÖ Generated detailed recommendations
- ‚úÖ Saved comparison table and report

**Key Findings:**
- **Best Model:** XGBoost (3.00% MAPE)
- **Most Robust:** Ensemble (4.71% MAPE)
- **Fastest Training:** XGBoost (5.6 seconds)
- **Deepest Patterns:** LSTM (7.23% MAPE)

**Ready For:**
- ‚úÖ Presentation (28 November 2025)
- ‚úÖ Deployment via Streamlit
- ‚úÖ Production forecasting
- ‚úÖ Project submission