In [14]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from arch import arch_model
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Download S&P 500 data
df = yf.download("^GSPC", start="2014-01-01", end="2024-12-31", auto_adjust=False)
df = df[['Adj Close']].copy()
df['Log_Returns'] = np.log(df['Adj Close'] / df['Adj Close'].shift(1))
df.dropna(inplace=True)

# Fit ARIMA model
model_arima = ARIMA(df['Log_Returns'], order=(1, 0, 1))
result_arima = model_arima.fit()
df['ARIMA_Residuals'] = result_arima.resid
df['ARIMA_Volatility'] = df['ARIMA_Residuals'] ** 2

# Fit GARCH(1,1)
returns_pct = df['Log_Returns'] * 100
garch = arch_model(returns_pct, vol='GARCH', p=1, q=1)
garch_fit = garch.fit(disp='off')
df['GARCH_Volatility'] = garch_fit.conditional_volatility

# Compute actual volatility
df['Actual_Volatility'] = df['Log_Returns'].rolling(window=21).std() * 100

# Drop rows with NaNs in key columns
vol_cols = ['Actual_Volatility', 'ARIMA_Volatility', 'GARCH_Volatility']
df = df.dropna(subset=vol_cols)

# Evaluation
pred_arima = np.sqrt(df['ARIMA_Volatility']) * 100
pred_garch = df['GARCH_Volatility']
actual = df['Actual_Volatility']

rmse_arima = np.sqrt(mean_squared_error(actual, pred_arima))
mae_arima = mean_absolute_error(actual, pred_arima)
rmse_garch = np.sqrt(mean_squared_error(actual, pred_garch))
mae_garch = mean_absolute_error(actual, pred_garch)

print(f"ARIMA  -> RMSE: {rmse_arima:.4f}, MAE: {mae_arima:.4f}")
print(f"GARCH  -> RMSE: {rmse_garch:.4f}, MAE: {mae_garch:.4f}")

# Plot actual vs predicted volatility
plt.figure(figsize=(15, 6))
plt.plot(df['Actual_Volatility'], label='Actual Volatility (21-day Rolling)', alpha=0.8)
plt.plot(df['GARCH_Volatility'], label='GARCH Predicted Volatility', alpha=0.8)
plt.plot(pred_arima, label='ARIMA Predicted Volatility', alpha=0.6)
plt.title('Actual vs Predicted Volatility (ARIMA vs GARCH)')
plt.legend()
plt.show()

# Bonus: Plot disagreement
df['Volatility_Diff'] = abs(pred_arima - pred_garch)
df['Volatility_Diff'].plot(figsize=(12, 4), title='Volatility Model Disagreement (ARIMA vs GARCH)')
plt.ylabel('Absolute Difference in Predicted Volatility')
plt.show()


[*********************100%***********************]  1 of 1 completed
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


KeyError: ['Actual_Volatility', 'ARIMA_Volatility', 'GARCH_Volatility']