# Model Evaluation and Comparison

This notebook provides comprehensive model evaluation techniques.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

## 1. Evaluation Metrics

In [None]:
def calculate_metrics(actual, predicted):
    """
    Calculate comprehensive evaluation metrics
    """
    mae = mean_absolute_error(actual, predicted)
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    r2 = r2_score(actual, predicted)
    
    # Additional metrics
    median_ae = np.median(np.abs(actual - predicted))
    max_error = np.max(np.abs(actual - predicted))
    
    return {
        'MAE': mae,
        'MSE': mse,
        'RMSE': rmse,
        'MAPE': mape,
        'R²': r2,
        'Median_AE': median_ae,
        'Max_Error': max_error
    }

## 2. Residual Analysis

In [None]:
def plot_residuals(actual, predicted, model_name='Model'):
    """
    Plot residual analysis
    """
    residuals = actual - predicted
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Residuals vs Fitted
    axes[0, 0].scatter(predicted, residuals, alpha=0.6)
    axes[0, 0].axhline(y=0, color='red', linestyle='--')
    axes[0, 0].set_xlabel('Predicted Values')
    axes[0, 0].set_ylabel('Residuals')
    axes[0, 0].set_title(f'{model_name} - Residuals vs Fitted')
    
    # Q-Q plot
    stats.probplot(residuals, dist="norm", plot=axes[0, 1])
    axes[0, 1].set_title(f'{model_name} - Q-Q Plot')
    
    # Histogram of residuals
    axes[1, 0].hist(residuals, bins=30, alpha=0.7, density=True)
    axes[1, 0].set_xlabel('Residuals')
    axes[1, 0].set_ylabel('Density')
    axes[1, 0].set_title(f'{model_name} - Residual Distribution')
    
    # Time series of residuals (if index available)
    if hasattr(actual, 'index'):
        axes[1, 1].plot(actual.index, residuals)
        axes[1, 1].axhline(y=0, color='red', linestyle='--')
        axes[1, 1].set_xlabel('Time')
        axes[1, 1].set_ylabel('Residuals')
        axes[1, 1].set_title(f'{model_name} - Residuals Over Time')
    else:
        axes[1, 1].plot(residuals)
        axes[1, 1].axhline(y=0, color='red', linestyle='--')
        axes[1, 1].set_xlabel('Observation')
        axes[1, 1].set_ylabel('Residuals')
        axes[1, 1].set_title(f'{model_name} - Residuals Sequence')
    
    plt.tight_layout()
    plt.show()

## 3. Forecast Accuracy by Horizon

In [None]:
def evaluate_by_horizon(actual, predicted, horizons=[1, 6, 12, 24]):
    """
    Evaluate forecast accuracy at different horizons
    """
    results = []
    
    for h in horizons:
        if h <= len(actual):
            actual_h = actual[:h]
            predicted_h = predicted[:h]
            
            metrics = calculate_metrics(actual_h, predicted_h)
            metrics['Horizon'] = h
            results.append(metrics)
    
    return pd.DataFrame(results)

## 4. Model Comparison

In [None]:
def compare_models(actual, predictions_dict):
    """
    Compare multiple models
    """
    comparison_results = []
    
    for model_name, predicted in predictions_dict.items():
        metrics = calculate_metrics(actual, predicted)
        metrics['Model'] = model_name
        comparison_results.append(metrics)
    
    df_comparison = pd.DataFrame(comparison_results)
    return df_comparison.set_index('Model')

def plot_model_comparison(comparison_df, metric='MAE'):
    """
    Plot model comparison for a specific metric
    """
    plt.figure(figsize=(10, 6))
    comparison_df[metric].plot(kind='bar')
    plt.title(f'Model Comparison - {metric}')
    plt.ylabel(metric)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

## 5. Cross-Validation for Time Series

In [None]:
def time_series_cv(data, model_func, n_splits=5, test_size=48):
    """
    Time series cross-validation with expanding window
    """
    results = []
    total_len = len(data)
    
    # Calculate split points
    min_train_size = total_len - n_splits * test_size
    
    for i in range(n_splits):
        train_end = min_train_size + i * test_size
        test_start = train_end
        test_end = test_start + test_size
        
        if test_end > total_len:
            break
            
        train_data = data[:train_end]
        test_data = data[test_start:test_end]
        
        # Train model and predict
        predictions = model_func(train_data, len(test_data))
        
        # Calculate metrics
        metrics = calculate_metrics(test_data, predictions)
        metrics['Fold'] = i + 1
        results.append(metrics)
    
    return pd.DataFrame(results)

## 6. Visualization Functions

In [None]:
def plot_predictions_vs_actual(actual, predicted, model_name='Model'):
    """
    Plot predicted vs actual values
    """
    plt.figure(figsize=(10, 6))
    plt.scatter(actual, predicted, alpha=0.6)
    
    # Perfect prediction line
    min_val = min(min(actual), min(predicted))
    max_val = max(max(actual), max(predicted))
    plt.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2)
    
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title(f'{model_name} - Predicted vs Actual')
    
    # Add R² score
    r2 = r2_score(actual, predicted)
    plt.text(0.1, 0.9, f'R² = {r2:.3f}', transform=plt.gca().transAxes, 
             bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
    
    plt.tight_layout()
    plt.show()

def plot_forecast_with_confidence(actual, predicted, confidence_intervals=None, model_name='Model'):
    """
    Plot forecast with confidence intervals
    """
    plt.figure(figsize=(15, 8))
    
    if hasattr(actual, 'index'):
        time_index = actual.index
    else:
        time_index = range(len(actual))
    
    plt.plot(time_index, actual, label='Actual', color='black', linewidth=2)
    plt.plot(time_index, predicted, label='Predicted', color='red', linestyle='--')
    
    if confidence_intervals is not None:
        lower_ci, upper_ci = confidence_intervals
        plt.fill_between(time_index, lower_ci, upper_ci, alpha=0.3, label='95% CI')
    
    plt.legend()
    plt.title(f'{model_name} - Forecast vs Actual')
    plt.xlabel('Time')
    plt.ylabel('Value')
    plt.tight_layout()
    plt.show()