# Volatility Model Comparison

This notebook compares different volatility forecasting models using out-of-sample evaluation.

In [None]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from data.fetcher import fetch_stock_data
from data.preprocessing import calculate_returns
from models.garch import GARCHModel, EGARCHModel, GJRGARCHModel
from models.ml_models import RandomForestVolatility, XGBoostVolatility
from models.stochastic_vol import HestonModel
from evaluation.backtesting import RollingWindowBacktest
from evaluation.metrics import calculate_all_metrics, diebold_mariano_test
import config

plt.style.use('seaborn-v0_8-whitegrid')
%matplotlib inline

## 1. Load Data

In [None]:
# Fetch SPY data for model comparison
ticker = 'SPY'
data = fetch_stock_data(
    ticker=ticker,
    start_date=config.START_DATE,
    end_date=config.END_DATE
)

prices = data['close']
returns = calculate_returns(prices.to_frame(), method='log').squeeze()

print(f"Data loaded: {len(returns)} observations")
print(f"Date range: {returns.index[0]} to {returns.index[-1]}")

## 2. Initialize Models

In [None]:
# Initialize all models
models = {
    'GARCH(1,1)': GARCHModel(p=1, q=1),
    'EGARCH(1,1)': EGARCHModel(p=1, q=1),
    'GJR-GARCH(1,1)': GJRGARCHModel(p=1, o=1, q=1),
    'Random Forest': RandomForestVolatility(n_estimators=50, lookback=20),
    'XGBoost': XGBoostVolatility(n_estimators=50, lookback=20),
    'Heston': HestonModel()
}

print("Models initialized:")
for name in models:
    print(f"  - {name}")

## 3. Single Model Training Example

In [None]:
# Train GARCH model on full sample
garch = GARCHModel(p=1, q=1)
garch.fit(returns)

print("GARCH(1,1) Parameters:")
print(garch.params)
print(f"\n5-day ahead variance forecast: {garch.forecast_variance(5)}")

In [None]:
# Plot conditional variance
cond_var = garch.get_conditional_variance()

fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

axes[0].plot(returns.index, returns, alpha=0.7, color='steelblue')
axes[0].set_title(f'{ticker} Returns')
axes[0].set_ylabel('Return')

axes[1].plot(returns.index, np.sqrt(cond_var) * np.sqrt(252), color='darkred', linewidth=1.5)
axes[1].set_title('GARCH(1,1) Conditional Volatility (Annualized)')
axes[1].set_ylabel('Volatility')

plt.tight_layout()
plt.show()

## 4. Rolling Window Backtest

In [None]:
# Initialize backtesting framework
backtest = RollingWindowBacktest(
    train_window=config.TRAIN_WINDOW,
    test_window=config.TEST_WINDOW,
    step_size=config.TEST_WINDOW,
    expanding=False
)

# Run backtest (this may take a while)
print("Running backtest...")
results = backtest.run(returns, models, verbose=True)
print(f"\nBacktest completed. Results shape: {results.shape}")

In [None]:
# Preview results
results.head(10)

## 5. Model Evaluation

In [None]:
# Calculate evaluation metrics
metrics = backtest.evaluate(results)
metrics.round(6)

In [None]:
# Rank models by different metrics
print("Model Rankings:")
print("="*50)
for metric in ['mse', 'mae', 'qlike']:
    ranking = metrics[metric].sort_values()
    print(f"\n{metric.upper()}:")
    for i, (model, value) in enumerate(ranking.items(), 1):
        print(f"  {i}. {model}: {value:.6f}")

## 6. Visualization

In [None]:
# Plot forecasts vs realized
fig, ax = backtest.plot_forecasts(results, figsize=(14, 6))
plt.show()

In [None]:
# Plot cumulative loss
fig, ax = backtest.plot_cumulative_loss(results, loss_func='qlike', figsize=(14, 6))
plt.show()

In [None]:
# Metrics comparison bar plot
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for ax, metric in zip(axes, ['rmse', 'mae', 'qlike']):
    values = metrics[metric].sort_values()
    colors = plt.cm.RdYlGn_r(np.linspace(0.2, 0.8, len(values)))
    values.plot(kind='barh', ax=ax, color=colors)
    ax.set_title(metric.upper())
    ax.set_xlabel('Value')

plt.tight_layout()
plt.show()

## 7. Statistical Tests

In [None]:
# Diebold-Mariano test comparing all models to GARCH benchmark
benchmark = 'GARCH(1,1)'
realized = results['realized'].values

print(f"Diebold-Mariano Test vs {benchmark}:")
print("="*60)
print(f"{'Model':<20} {'DM Stat':>12} {'p-value':>12} {'Winner':>15}")
print("-"*60)

for model in models:
    if model != benchmark:
        pred1 = results[benchmark].values
        pred2 = results[model].values
        
        # Remove NaN
        valid = ~(np.isnan(pred1) | np.isnan(pred2) | np.isnan(realized))
        
        dm_stat, p_value = diebold_mariano_test(
            realized[valid], pred1[valid], pred2[valid], loss_func='qlike'
        )
        
        if p_value < 0.05:
            winner = benchmark if dm_stat > 0 else model
        else:
            winner = "No diff"
        
        print(f"{model:<20} {dm_stat:>12.4f} {p_value:>12.4f} {winner:>15}")

In [None]:
# Mincer-Zarnowitz regression results
print("\nMincer-Zarnowitz Regression Results:")
print("="*70)
print(f"{'Model':<20} {'Alpha':>10} {'Beta':>10} {'RÂ²':>10} {'Joint p-val':>15}")
print("-"*70)

for model in models:
    alpha = metrics.loc[model, 'mz_alpha']
    beta = metrics.loc[model, 'mz_beta']
    r2 = metrics.loc[model, 'mz_r_squared']
    pval = metrics.loc[model, 'mz_joint_pvalue']
    print(f"{model:<20} {alpha:>10.6f} {beta:>10.4f} {r2:>10.4f} {pval:>15.4f}")

## 8. Summary and Conclusions

Key findings from the model comparison:

1. **Best performing model**: Check metrics table above
2. **GARCH vs ML**: Compare traditional econometric vs machine learning approaches
3. **Asymmetric models**: EGARCH and GJR-GARCH capture leverage effects
4. **Forecast efficiency**: Mincer-Zarnowitz tests reveal forecast bias/efficiency

In [None]:
# Final summary table
summary = metrics[['rmse', 'mae', 'qlike', 'r_squared', 'mz_beta']].copy()
summary['rank_qlike'] = summary['qlike'].rank()
summary = summary.sort_values('rank_qlike')

print("\nFinal Model Ranking (by QLIKE):")
summary