# 05 - Model Evaluation

Comprehensive model evaluation and comparison

In [None]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error

import sys
import os
sys.path.append(os.path.abspath('../src'))
from src.evaluate import calculate_metrics, print_metrics

## Load Data and Model

In [None]:
# Load dataset
df = pd.read_csv('../data/processed/featured_dataset.csv')

# Load best model and scaler
with open('../models/best_model.pkl', 'rb') as f:
    best_model = pickle.load(f)

with open('../models/scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)

print(f"✓ Model loaded: {type(best_model).__name__}")
print(f"✓ Dataset shape: {df.shape}")

## Prepare Test Data

In [None]:
target_col = 'quantity'
X = df.drop(columns=[target_col])
y = df[target_col]

# Use same split as training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_test_scaled = scaler.transform(X_test)

print(f"Test set size: {X_test.shape[0]} samples")
print(f"Test period: index {X_test.index[0]} to {X_test.index[-1]}")

## Make Predictions

In [None]:
# Generate predictions
y_pred = best_model.predict(X_test_scaled)

print(f"✓ Predictions generated")
print(f"  Min prediction: {y_pred.min():.2f}")
print(f"  Max prediction: {y_pred.max():.2f}")
print(f"  Mean prediction: {y_pred.mean():.2f}")
print(f"\n  Min actual: {y_test.min():.2f}")
print(f"  Max actual: {y_test.max():.2f}")
print(f"  Mean actual: {y_test.mean():.2f}")

## Calculate Metrics

In [None]:
metrics = calculate_metrics(y_test, y_pred)
print_metrics(metrics)

## Residual Analysis

In [None]:
residuals = y_test.values - y_pred

fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# 1. Residuals over time
axes[0, 0].plot(residuals)
axes[0, 0].axhline(y=0, color='r', linestyle='--')
axes[0, 0].set_title('Residuals Over Time')
axes[0, 0].set_ylabel('Residual')

# 2. Histogram of residuals
axes[0, 1].hist(residuals, bins=50, edgecolor='black')
axes[0, 1].set_title('Distribution of Residuals')
axes[0, 1].set_xlabel('Residual')

# 3. Q-Q plot
from scipy import stats
stats.probplot(residuals, dist="norm", plot=axes[1, 0])
axes[1, 0].set_title('Q-Q Plot')

# 4. Predicted vs Actual
axes[1, 1].scatter(y_test, y_pred, alpha=0.5)
axes[1, 1].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
axes[1, 1].set_xlabel('Actual')
axes[1, 1].set_ylabel('Predicted')
axes[1, 1].set_title('Predicted vs Actual')

plt.tight_layout()
plt.savefig('../reports/model_evaluation.png', dpi=100)
plt.show()

## Error Distribution

In [None]:
# Calculate absolute percentage error
mape_values = np.abs((y_test.values - y_pred) / y_test.values) * 100

print("Error Metrics:")
print(f"  Mean Absolute Percentage Error (MAPE): {mape_values.mean():.2f}%")
print(f"  Median Absolute Percentage Error: {np.median(mape_values):.2f}%")
print(f"  95th Percentile Error: {np.percentile(mape_values, 95):.2f}%")
print(f"  Max Absolute Error: {np.max(np.abs(residuals)):.2f}")

# Plot error distribution
fig = go.Figure()
fig.add_trace(go.Histogram(
    x=mape_values,
    nbinsx=50,
    name='MAPE Distribution'
))
fig.update_layout(
    title='Distribution of Absolute Percentage Error (MAPE)',
    xaxis_title='MAPE (%)',
    yaxis_title='Frequency'
)
fig.show()

## Forecast Results Visualization

In [None]:
# Create interactive plot of predictions
import plotly.graph_objects as go

fig = go.Figure()

# Add actual values
fig.add_trace(go.Scatter(
    x=y_test.index,
    y=y_test.values,
    mode='lines',
    name='Actual',
    line=dict(color='blue')
))

# Add predictions
fig.add_trace(go.Scatter(
    x=y_test.index,
    y=y_pred,
    mode='lines',
    name='Predicted',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='Actual vs Predicted Sales',
    xaxis_title='Sample Index',
    yaxis_title='Sales',
    hovermode='x unified'
)
fig.show()

## Performance Summary

In [None]:
summary = {
    'Metric': ['R² Score', 'RMSE', 'MAE', 'MAPE', 'Mean Residual', 'Std Residual'],
    'Value': [
        f"{metrics['R2']:.4f}",
        f"{metrics['RMSE']:.4f}",
        f"{metrics['MAE']:.4f}",
        f"{metrics['MAPE']:.2f}%",
        f"{residuals.mean():.4f}",
        f"{residuals.std():.4f}"
    ]
}

summary_df = pd.DataFrame(summary)
print("\n" + "="*50)
print("FINAL MODEL PERFORMANCE SUMMARY")
print("="*50)
print(summary_df.to_string(index=False))
print("="*50)

## Save Predictions

In [None]:
# Save predictions to file
results_df = pd.DataFrame({
    'index': y_test.index,
    'actual': y_test.values,
    'predicted': y_pred,
    'residual': residuals,
    'abs_error': np.abs(residuals),
    'mape': mape_values
})

results_df.to_csv('../reports/prediction_results.csv', index=False)
print(f"✓ Predictions saved to: ../reports/prediction_results.csv")
print(f"\nSample results:")
print(results_df.head(10))