[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/EMQA/blob/main/EMQA_arima_forecast/EMQA_arima_forecast.ipynb)

# EMQA_arima_forecast

**Rolling 1-Step-Ahead ARIMA Forecast with Confidence Intervals**

Perform an expanding-window rolling 1-step-ahead ARIMA(1,1,1) forecast on Brent crude oil prices.
Evaluate out-of-sample accuracy using **R²_OOS** (vs naive benchmark), RMSE, and Direction Accuracy.

**Key Finding:** ARIMA does NOT beat the naive forecast for daily oil prices.

**Output:** `arima_forecast.pdf`

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

plt.rcParams.update({
    'figure.facecolor': 'none',
    'axes.facecolor': 'none',
    'savefig.facecolor': 'none',
    'savefig.transparent': True,
    'axes.grid': False,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'font.size': 11,
    'figure.figsize': (12, 6),
})

COLORS = {
    'blue': '#1A3A6E', 'red': '#CD0000', 'green': '#2E7D32',
    'orange': '#E67E22', 'purple': '#8E44AD', 'gray': '#808080',
    'cyan': '#00BCD4', 'amber': '#B5853F'
}

def save_fig(fig, name):
    fig.savefig(name, bbox_inches='tight', transparent=True, dpi=300)
    print(f"Saved: {name}")

In [None]:
import yfinance as yf

def fetch(ticker, start='2020-01-01', end='2025-12-31'):
    d = yf.download(ticker, start=start, end=end, progress=False)
    if isinstance(d.columns, pd.MultiIndex):
        return d['Close'].squeeze().dropna()
    return d['Close'].dropna()

# Fetch Brent crude oil prices
brent = fetch('BZ=F')
print(f"Brent crude: {len(brent)} observations, {brent.index[0].date()} to {brent.index[-1].date()}")

# 80/20 train/test split
split = int(len(brent) * 0.8)
train, test = brent.iloc[:split], brent.iloc[split:]
print(f"Train: {len(train)} obs | Test: {len(test)} obs")

In [None]:
from statsmodels.tsa.arima.model import ARIMA

# Rolling 1-step-ahead ARIMA(1,1,1) forecast with expanding window
predictions = []
ci_lower = []
ci_upper = []

for i in range(len(test)):
    # Expanding window: all data up to current test point
    history = brent.iloc[:split + i]

    model = ARIMA(history, order=(1, 1, 1))
    result = model.fit()

    # 1-step-ahead forecast with 95% CI
    fc = result.get_forecast(steps=1)
    yhat = fc.predicted_mean.iloc[0]
    ci = fc.conf_int(alpha=0.05)

    predictions.append(yhat)
    ci_lower.append(ci.iloc[0, 0])
    ci_upper.append(ci.iloc[0, 1])

    if (i + 1) % 50 == 0 or i == 0:
        print(f"  Step {i+1}/{len(test)} completed")

predictions = np.array(predictions)
ci_lower = np.array(ci_lower)
ci_upper = np.array(ci_upper)
actual = test.values

print(f"\nRolling forecast complete: {len(predictions)} predictions")

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# --- Naive benchmark: tomorrow = today ---
naive_pred = brent.iloc[split-1:-1].values

# --- Metrics ---
mse_arima = mean_squared_error(actual, predictions)
mse_naive = mean_squared_error(actual, naive_pred)

rmse_arima = np.sqrt(mse_arima)
rmse_naive = np.sqrt(mse_naive)

# Out-of-sample R² = 1 - MSE_model / MSE_naive
r2_oos = 1 - mse_arima / mse_naive

mae_arima = mean_absolute_error(actual, predictions)
mae_naive = mean_absolute_error(actual, naive_pred)

# Direction accuracy (correct method: compare moves from yesterday)
actual_returns = (actual - naive_pred) / naive_pred
pred_returns = (predictions - naive_pred) / naive_pred
actual_dir = np.sign(actual_returns)
pred_dir = np.sign(pred_returns)
dir_acc_arima = np.mean(actual_dir == pred_dir) * 100
dir_acc_naive = 50.0  # Naive predicts no change

# CI coverage
ci_coverage = np.mean((actual >= ci_lower) & (actual <= ci_upper)) * 100

print("=" * 60)
print("  ARIMA(1,1,1) vs Naive Forecast Comparison")
print("=" * 60)
print(f"{'Metric':<25} {'ARIMA':>12} {'Naive':>12}")
print("-" * 60)
print(f"{'RMSE':<25} ${rmse_arima:>11.2f} ${rmse_naive:>11.2f}")
print(f"{'MAE':<25} ${mae_arima:>11.2f} ${mae_naive:>11.2f}")
print(f"{'Direction Accuracy':<25} {dir_acc_arima:>11.1f}% {dir_acc_naive:>11.1f}%")
print("-" * 60)
print(f"{'R²_OOS (vs naive)':<25} {r2_oos*100:>11.2f}%")
print(f"{'95% CI Coverage':<25} {ci_coverage:>11.1f}%")
print("=" * 60)
print()
if r2_oos <= 0:
    print(">>> VERDICT: ARIMA does NOT beat the naive forecast.")
else:
    print(f">>> VERDICT: ARIMA beats naive by {r2_oos*100:.1f}% R²_OOS.")

In [None]:
fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(test.index, actual, color=COLORS['blue'], linewidth=1.2, label='Actual')
ax.plot(test.index, predictions, color=COLORS['red'], linewidth=1.2, label='ARIMA(1,1,1)')
ax.fill_between(test.index, ci_lower, ci_upper,
                color=COLORS['red'], alpha=0.15, label='95% CI')

# Add metrics annotation
textstr = f'R²_OOS = {r2_oos*100:.1f}%\nDirection = {dir_acc_arima:.0f}%\nBeats Naive: No'
ax.text(0.02, 0.98, textstr, transform=ax.transAxes, fontsize=10,
        verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

ax.set_title('Rolling 1-Step-Ahead ARIMA(1,1,1) Forecast — Brent Crude Oil',
             fontsize=14, fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('Price (USD)')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.10), frameon=False, ncol=3)

fig.tight_layout()
save_fig(fig, 'arima_forecast.pdf')
plt.show()