# Notebook 06: Model Comparison

## M·ª•c Ti√™u
- So s√°nh performance c·ªßa c√°c models: SARIMA, LightGBM, Prophet
- Benchmark tr√™n c√°c granularities kh√°c nhau
- Ch·ªçn model t·ªët nh·∫•t cho autoscaling

---

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
import warnings

warnings.filterwarnings('ignore')

# Th√™m src v√†o path
sys.path.insert(0, os.path.abspath('..'))

from src.data.preprocessor import load_timeseries, split_train_test
from src.features.feature_engineering import TimeSeriesFeatureEngineer
from src.models.sarima import SARIMAForecaster
from src.models.lightgbm_forecaster import LightGBMForecaster
from src.models.prophet_forecaster import ProphetForecaster, PROPHET_AVAILABLE
from src.models.evaluation import calculate_metrics, compare_models, print_metrics_table

# Settings
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

print("Libraries loaded!")
print(f"Prophet available: {PROPHET_AVAILABLE}")

## 1. Load Data

In [None]:
# Load 15-minute data
df = load_timeseries('../data/processed/timeseries_15min.parquet')
df_clean = df[df['is_storm_period'] == 0].copy()

# Train/Test split
train, test = split_train_test(df_clean, test_start='1995-08-23')

print(f"Train: {len(train)} samples")
print(f"Test: {len(test)} samples")

In [None]:
# Prepare data for different models
train_series = train['request_count']
test_series = test['request_count']

# For LightGBM - need features
fe = TimeSeriesFeatureEngineer(df_clean)
df_features = fe.create_all_features(target_col='request_count', granularity='15min')
feature_cols = fe.get_feature_columns(df_features)
X, y = fe.prepare_supervised(df_features, 'request_count', feature_cols, forecast_horizon=1)

test_start = '1995-08-23'
train_mask = X.index < test_start
X_train, X_test = X[train_mask], X[~train_mask]
y_train, y_test = y[train_mask], y[~train_mask]

print(f"\nLightGBM features: {len(feature_cols)}")
print(f"X_train: {X_train.shape}, X_test: {X_test.shape}")

## 2. Train All Models

In [None]:
# Dictionary to store results
model_results = {}
model_predictions = {}

### 2.1 SARIMA

In [None]:
print("Training SARIMA...")
print("="*50)

sarima = SARIMAForecaster(
    order=(2, 1, 2),
    seasonal_order=(1, 1, 1, 96)
)
sarima.fit(train_series, verbose=True)

# Predict
sarima_preds = sarima.predict(steps=len(test_series))
model_predictions['SARIMA'] = sarima_preds['forecast'].values

# Calculate metrics
model_results['SARIMA'] = calculate_metrics(
    test_series.values[:len(sarima_preds)],
    sarima_preds['forecast'].values
)
print(f"\nSARIMA RMSE: {model_results['SARIMA']['RMSE']:.4f}")

### 2.2 LightGBM

In [None]:
print("\nTraining LightGBM...")
print("="*50)

# Validation split
val_size = len(X_train) // 5
X_val = X_train.iloc[-val_size:]
y_val = y_train.iloc[-val_size:]
X_train_lgb = X_train.iloc[:-val_size]
y_train_lgb = y_train.iloc[:-val_size]

lgbm = LightGBMForecaster(
    n_estimators=1000,
    early_stopping_rounds=50
)
lgbm.fit(X_train_lgb, y_train_lgb, X_val, y_val, verbose=100)

# Predict
lgbm_preds = lgbm.predict(X_test)
model_predictions['LightGBM'] = lgbm_preds

# Calculate metrics
model_results['LightGBM'] = calculate_metrics(y_test.values, lgbm_preds)
print(f"\nLightGBM RMSE: {model_results['LightGBM']['RMSE']:.4f}")

### 2.3 Prophet

In [None]:
if PROPHET_AVAILABLE:
    print("\nTraining Prophet...")
    print("="*50)
    
    prophet = ProphetForecaster(
        seasonality_mode='multiplicative',
        weekly_seasonality=True,
        daily_seasonality=True,
        add_hourly_seasonality=True
    )
    prophet.fit(train, target_col='request_count', verbose=True)
    
    # Predict
    prophet_preds = prophet.predict(periods=len(test_series), freq='15min')
    model_predictions['Prophet'] = prophet_preds['yhat'].values[:len(test_series)]
    
    # Calculate metrics
    model_results['Prophet'] = calculate_metrics(
        test_series.values,
        prophet_preds['yhat'].values[:len(test_series)]
    )
    print(f"\nProphet RMSE: {model_results['Prophet']['RMSE']:.4f}")
else:
    print("Prophet not available, skipping...")

## 3. Model Comparison

In [None]:
# Print comparison table
print_metrics_table(model_results, "Model Comparison - 15min Granularity")

In [None]:
# Comparison DataFrame
comparison_df = compare_models(model_results)
comparison_df

In [None]:
# Visual comparison of metrics
fig, axes = plt.subplots(1, 4, figsize=(16, 4))

metrics_to_plot = ['MSE', 'RMSE', 'MAE', 'MAPE']
colors = ['steelblue', 'coral', 'seagreen']

for i, metric in enumerate(metrics_to_plot):
    values = [model_results[m][metric] for m in model_results.keys()]
    bars = axes[i].bar(model_results.keys(), values, color=colors[:len(values)])
    axes[i].set_title(metric)
    axes[i].set_ylabel('Value')
    
    # Add value labels
    for bar, val in zip(bars, values):
        axes[i].text(bar.get_x() + bar.get_width()/2, bar.get_height(),
                    f'{val:.2f}', ha='center', va='bottom', fontsize=9)

plt.suptitle('Model Comparison - Metrics', fontsize=14)
plt.tight_layout()
plt.savefig('../reports/figures/model_comparison_metrics.png', dpi=150, bbox_inches='tight')
plt.show()

## 4. Prediction Visualization

In [None]:
# Plot all predictions vs actual
fig, ax = plt.subplots(figsize=(16, 6))

# Actual
ax.plot(test_series.index[:len(model_predictions['SARIMA'])], 
        test_series.values[:len(model_predictions['SARIMA'])], 
        label='Actual', alpha=0.8, linewidth=1.5)

# Each model
for model_name, preds in model_predictions.items():
    ax.plot(test_series.index[:len(preds)], preds, 
            label=model_name, alpha=0.7, linestyle='--')

ax.set_xlabel('Timestamp')
ax.set_ylabel('Request Count')
ax.set_title('All Models: Predictions vs Actual')
ax.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('../reports/figures/model_comparison_predictions.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Zoom in on first day
first_day = 96  # 96 intervals = 24 hours

fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(test_series.index[:first_day], test_series.values[:first_day], 
        label='Actual', alpha=0.8, linewidth=2, color='black')

colors = {'SARIMA': 'steelblue', 'LightGBM': 'coral', 'Prophet': 'seagreen'}
for model_name, preds in model_predictions.items():
    ax.plot(test_series.index[:first_day], preds[:first_day], 
            label=model_name, alpha=0.7, linestyle='--', 
            color=colors.get(model_name, 'gray'))

ax.set_xlabel('Timestamp')
ax.set_ylabel('Request Count')
ax.set_title('First 24 Hours - Predictions vs Actual')
ax.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('../reports/figures/model_comparison_day1.png', dpi=150, bbox_inches='tight')
plt.show()

## 5. Error Analysis by Time

In [None]:
# Find minimum prediction length across all models
min_len = min(len(preds) for preds in model_predictions.values())

# Create error DataFrame with explicit lists to avoid index mismatch
error_df = pd.DataFrame({
    'timestamp': list(test_series.index[:min_len]),
    'actual': list(test_series.values[:min_len])
})

for model_name, preds in model_predictions.items():
    # Convert to list to avoid any index/array issues
    pred_values = list(preds[:min_len]) if hasattr(preds, '__iter__') else [preds] * min_len
    error_df[f'{model_name}_pred'] = pred_values
    error_df[f'{model_name}_error'] = [p - a for p, a in zip(pred_values, error_df['actual'])]
    error_df[f'{model_name}_abs_error'] = [abs(e) for e in error_df[f'{model_name}_error']]

error_df['hour'] = pd.to_datetime(error_df['timestamp']).dt.hour
error_df['day_of_week'] = pd.to_datetime(error_df['timestamp']).dt.dayofweek

print(f"Error analysis using {min_len} samples (minimum across all models)")
print(f"Models included: {list(model_predictions.keys())}")

In [None]:
# Error by hour
fig, ax = plt.subplots(figsize=(12, 5))

for model_name in model_predictions.keys():
    hourly_error = error_df.groupby('hour')[f'{model_name}_abs_error'].mean()
    ax.plot(hourly_error.index, hourly_error.values, marker='o', label=model_name)

ax.set_xlabel('Hour of Day')
ax.set_ylabel('Mean Absolute Error')
ax.set_title('Prediction Error by Hour of Day')
ax.set_xticks(range(24))
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('../reports/figures/model_error_by_hour.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Error by day of week - handle partial data (test set may not have all 7 days)
fig, ax = plt.subplots(figsize=(10, 5))

days_map = {0: 'Mon', 1: 'Tue', 2: 'Wed', 3: 'Thu', 4: 'Fri', 5: 'Sat', 6: 'Sun'}
available_days = sorted(error_df['day_of_week'].unique())
day_labels = [days_map[d] for d in available_days]
x = np.arange(len(available_days))
width = 0.25

for i, model_name in enumerate(model_predictions.keys()):
    daily_error = error_df.groupby('day_of_week')[f'{model_name}_abs_error'].mean()
    # Only plot days that exist in data
    values = [daily_error.get(d, 0) for d in available_days]
    ax.bar(x + i*width, values, width, label=model_name)

ax.set_xlabel('Day of Week')
ax.set_ylabel('Mean Absolute Error')
ax.set_title('Prediction Error by Day of Week')
ax.set_xticks(x + width)
ax.set_xticklabels(day_labels)
ax.legend()
plt.tight_layout()
plt.savefig('../reports/figures/model_error_by_day.png', dpi=150, bbox_inches='tight')
plt.show()

## 6. Model Selection for Autoscaling

In [None]:
# Ranking by different criteria
print("Model Rankings:")
print("="*60)

# By RMSE
rmse_ranking = sorted(model_results.items(), key=lambda x: x[1]['RMSE'])
print("\nBy RMSE (lower is better):")
for i, (model, metrics) in enumerate(rmse_ranking):
    print(f"  {i+1}. {model}: {metrics['RMSE']:.4f}")

# By MAE
mae_ranking = sorted(model_results.items(), key=lambda x: x[1]['MAE'])
print("\nBy MAE (lower is better):")
for i, (model, metrics) in enumerate(mae_ranking):
    print(f"  {i+1}. {model}: {metrics['MAE']:.4f}")

# By MAPE
mape_ranking = sorted(model_results.items(), key=lambda x: x[1]['MAPE'])
print("\nBy MAPE (lower is better):")
for i, (model, metrics) in enumerate(mape_ranking):
    print(f"  {i+1}. {model}: {metrics['MAPE']:.2f}%")

In [None]:
# Best model for autoscaling
best_model = rmse_ranking[0][0]
best_metrics = rmse_ranking[0][1]

print(f"\nRecommended Model for Autoscaling: {best_model}")
print(f"RMSE: {best_metrics['RMSE']:.2f} requests/interval")
print(f"MAE: {best_metrics['MAE']:.2f} requests/interval")
print(f"MAPE: {best_metrics['MAPE']:.2f}%")

## 7. Save Models

In [None]:
# Save all models
sarima.save('../models/sarima_15min.pkl')
lgbm.save('../models/lightgbm_15min.pkl')
if PROPHET_AVAILABLE:
    prophet.save('../models/prophet_15min.pkl')

print("All models saved!")

In [None]:
# Save comparison results
comparison_df.to_csv('../reports/model_comparison_15min.csv', index=False)
print("Comparison results saved to: ../reports/model_comparison_15min.csv")

## 8. Summary

In [None]:
print("="*70)
print("                    MODEL COMPARISON SUMMARY")
print("="*70)
print(f"\nGranularity: 15 minutes")
print(f"Test Period: {test.index.min()} to {test.index.max()}")
print(f"Test Samples: {len(test)}")
print(f"\n" + "-"*70)
print("PERFORMANCE METRICS:")
print("-"*70)
print(f"{'Model':<15} {'RMSE':<12} {'MAE':<12} {'MAPE':<12} {'Rank':<6}")
print("-"*70)

for i, (model, metrics) in enumerate(rmse_ranking):
    print(f"{model:<15} {metrics['RMSE']:<12.2f} {metrics['MAE']:<12.2f} {metrics['MAPE']:<12.2f}% {i+1:<6}")

print("-"*70)
print(f"\nBEST MODEL: {best_model}")
print(f"  - Lowest RMSE: {best_metrics['RMSE']:.2f} requests/interval")
print(f"  - Recommended for predictive autoscaling")
print(f"\nMODELS SAVED:")
print(f"  - ../models/sarima_15min.pkl")
print(f"  - ../models/lightgbm_15min.pkl")
if PROPHET_AVAILABLE:
    print(f"  - ../models/prophet_15min.pkl")
print("="*70)

## 9. bytes_total Model Comparison

Now we train all models on bytes_total (total bytes transferred) as a second target variable.

In [None]:
# Prepare bytes_total data
train_series_bytes = train['bytes_total']
test_series_bytes = test['bytes_total']

# For LightGBM - need features for bytes_total
fe_bytes = TimeSeriesFeatureEngineer(df_clean)
df_features_bytes = fe_bytes.create_all_features(target_col='bytes_total', granularity='15min')
feature_cols_bytes = fe_bytes.get_feature_columns(df_features_bytes)
X_bytes, y_bytes = fe_bytes.prepare_supervised(df_features_bytes, 'bytes_total', feature_cols_bytes, forecast_horizon=1)

train_mask_bytes = X_bytes.index < test_start
X_train_bytes, X_test_bytes = X_bytes[train_mask_bytes], X_bytes[~train_mask_bytes]
y_train_bytes, y_test_bytes = y_bytes[train_mask_bytes], y_bytes[~train_mask_bytes]

print(f"bytes_total - Train: {len(train_series_bytes)}, Test: {len(test_series_bytes)}")
print(f"LightGBM features: {len(feature_cols_bytes)}")

In [None]:
# Train all models on bytes_total
bytes_results = {}
bytes_predictions = {}

# 1. SARIMA for bytes_total
print("Training SARIMA on bytes_total...")
print("="*50)
sarima_bytes = SARIMAForecaster(
    order=(2, 1, 2),
    seasonal_order=(1, 1, 0, 96)  # Using (1,1,0,96) to avoid memory issues
)
sarima_bytes.fit(train_series_bytes, verbose=True)
sarima_bytes_preds = sarima_bytes.predict(steps=len(test_series_bytes))
bytes_predictions['SARIMA'] = sarima_bytes_preds['forecast'].values

bytes_results['SARIMA'] = calculate_metrics(
    test_series_bytes.values[:len(sarima_bytes_preds)],
    sarima_bytes_preds['forecast'].values
)
print(f"SARIMA bytes_total RMSE: {bytes_results['SARIMA']['RMSE']:.2f}")

In [None]:
# 2. LightGBM for bytes_total
print("\nTraining LightGBM on bytes_total...")
print("="*50)

val_size_bytes = len(X_train_bytes) // 5
X_val_bytes = X_train_bytes.iloc[-val_size_bytes:]
y_val_bytes = y_train_bytes.iloc[-val_size_bytes:]
X_train_lgb_bytes = X_train_bytes.iloc[:-val_size_bytes]
y_train_lgb_bytes = y_train_bytes.iloc[:-val_size_bytes]

lgbm_bytes = LightGBMForecaster(
    n_estimators=1000,
    early_stopping_rounds=50
)
lgbm_bytes.fit(X_train_lgb_bytes, y_train_lgb_bytes, X_val_bytes, y_val_bytes, verbose=100)

lgbm_bytes_preds = lgbm_bytes.predict(X_test_bytes)
bytes_predictions['LightGBM'] = lgbm_bytes_preds

bytes_results['LightGBM'] = calculate_metrics(y_test_bytes.values, lgbm_bytes_preds)
print(f"LightGBM bytes_total RMSE: {bytes_results['LightGBM']['RMSE']:.2f}")

In [None]:
# 3. Prophet for bytes_total
if PROPHET_AVAILABLE:
    print("\nTraining Prophet on bytes_total...")
    print("="*50)
    
    prophet_bytes = ProphetForecaster(
        seasonality_mode='multiplicative',
        weekly_seasonality=True,
        daily_seasonality=True,
        add_hourly_seasonality=True
    )
    prophet_bytes.fit(train, target_col='bytes_total', verbose=True)
    
    prophet_bytes_preds = prophet_bytes.predict(periods=len(test_series_bytes), freq='15min')
    bytes_predictions['Prophet'] = prophet_bytes_preds['yhat'].values[:len(test_series_bytes)]
    
    bytes_results['Prophet'] = calculate_metrics(
        test_series_bytes.values,
        prophet_bytes_preds['yhat'].values[:len(test_series_bytes)]
    )
    print(f"Prophet bytes_total RMSE: {bytes_results['Prophet']['RMSE']:.2f}")
else:
    print("Prophet not available, skipping...")

In [None]:
# bytes_total comparison table
print_metrics_table(bytes_results, "bytes_total Model Comparison - 15min Granularity")

# Comparison DataFrame
bytes_comparison_df = compare_models(bytes_results)
bytes_comparison_df

In [None]:
# Visualize bytes_total predictions
fig, ax = plt.subplots(figsize=(16, 6))

min_len_bytes = min(len(preds) for preds in bytes_predictions.values())
ax.plot(test_series_bytes.index[:min_len_bytes], 
        test_series_bytes.values[:min_len_bytes], 
        label='Actual', alpha=0.8, linewidth=1.5)

for model_name, preds in bytes_predictions.items():
    ax.plot(test_series_bytes.index[:min_len_bytes], preds[:min_len_bytes], 
            label=model_name, alpha=0.7, linestyle='--')

ax.set_xlabel('Timestamp')
ax.set_ylabel('Bytes Total')
ax.set_title('bytes_total: All Models Predictions vs Actual')
ax.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('../reports/figures/bytes_total_predictions.png', dpi=150, bbox_inches='tight')
plt.show()

## 10. Complete Benchmark Table

Comprehensive comparison of all models across both target variables.

In [None]:
# Create comprehensive benchmark table
all_results = {
    'request_count': model_results,
    'bytes_total': bytes_results
}

benchmark_data = []
for target, results in all_results.items():
    for model_name, metrics in results.items():
        benchmark_data.append({
            'Target': target,
            'Model': model_name,
            'MSE': metrics['MSE'],
            'RMSE': metrics['RMSE'],
            'MAE': metrics['MAE'],
            'MAPE': metrics['MAPE']
        })

benchmark_df = pd.DataFrame(benchmark_data)
print("="*80)
print("                    COMPLETE BENCHMARK TABLE")
print("="*80)
print(benchmark_df.to_string(index=False))
print("="*80)

In [None]:
# Visual comparison for both targets
fig, axes = plt.subplots(2, 3, figsize=(15, 8))

metrics_to_plot = ['RMSE', 'MAE', 'MAPE']
targets = ['request_count', 'bytes_total']
colors = {'SARIMA': 'steelblue', 'LightGBM': 'coral', 'Prophet': 'seagreen'}

for row, target in enumerate(targets):
    for col, metric in enumerate(metrics_to_plot):
        ax = axes[row, col]
        results = all_results[target]
        models = list(results.keys())
        values = [results[m][metric] for m in models]
        bars = ax.bar(models, values, color=[colors.get(m, 'gray') for m in models])
        
        ax.set_title(f'{target} - {metric}')
        ax.set_ylabel(metric)
        
        # Add value labels
        for bar, val in zip(bars, values):
            label = f'{val:.2f}' if metric != 'MAPE' else f'{val:.1f}%'
            ax.text(bar.get_x() + bar.get_width()/2, bar.get_height(),
                   label, ha='center', va='bottom', fontsize=9)

plt.suptitle('Complete Model Comparison - Both Targets', fontsize=14)
plt.tight_layout()
plt.savefig('../reports/figures/complete_benchmark.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Save all models for bytes_total
sarima_bytes.save('../models/sarima_bytes_15min.pkl')
lgbm_bytes.save('../models/lightgbm_bytes_15min.pkl')
if PROPHET_AVAILABLE:
    prophet_bytes.save('../models/prophet_bytes_15min.pkl')

print("All bytes_total models saved!")

In [None]:
# Save complete benchmark table
benchmark_df.to_csv('../reports/model_benchmark_complete.csv', index=False)
print("Complete benchmark saved to: ../reports/model_benchmark_complete.csv")

# Also save individual comparison CSVs
comparison_df.to_csv('../reports/model_comparison_request_count.csv', index=False)
bytes_comparison_df.to_csv('../reports/model_comparison_bytes_total.csv', index=False)
print("Individual comparison CSVs saved!")

## 11. Final Summary

In [None]:
print("="*80)
print("                    FINAL MODEL COMPARISON SUMMARY")
print("="*80)

for target in ['request_count', 'bytes_total']:
    print(f"\n{'='*40}")
    print(f"  Target: {target}")
    print(f"{'='*40}")
    
    results = all_results[target]
    ranking = sorted(results.items(), key=lambda x: x[1]['RMSE'])
    
    print(f"{'Model':<15} {'RMSE':<15} {'MAE':<15} {'MAPE':<15}")
    print("-"*60)
    for i, (model, metrics) in enumerate(ranking):
        mape_str = f"{metrics['MAPE']:.2f}%"
        rank = "ü•á" if i == 0 else ("ü•à" if i == 1 else "ü•â")
        print(f"{model:<15} {metrics['RMSE']:<15.2f} {metrics['MAE']:<15.2f} {mape_str:<15} {rank}")
    
    best_model = ranking[0][0]
    print(f"\n  ‚Üí Best model for {target}: {best_model}")

print("\n" + "="*80)
print("MODELS SAVED:")
print("-"*80)
print("request_count models:")
print("  - models/sarima_15min.pkl")
print("  - models/lightgbm_15min.pkl")
if PROPHET_AVAILABLE:
    print("  - models/prophet_15min.pkl")
print("\nbytes_total models:")
print("  - models/sarima_bytes_15min.pkl")
print("  - models/lightgbm_bytes_15min.pkl")
if PROPHET_AVAILABLE:
    print("  - models/prophet_bytes_15min.pkl")
print("\nBenchmark reports:")
print("  - reports/model_benchmark_complete.csv")
print("  - reports/model_comparison_request_count.csv")
print("  - reports/model_comparison_bytes_total.csv")
print("="*80)