[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/EMQA/blob/main/EMQA_arima_electricity/EMQA_arima_electricity.ipynb)

# EMQA_arima_electricity

**Rolling 1-Step-Ahead ARIMA Forecast with Confidence Intervals**

Perform an expanding-window rolling 1-step-ahead ARIMA(1,1,1) forecast on Romanian electricity prices.
Evaluate out-of-sample accuracy using **R²_OOS** (vs naive benchmark), RMSE, and Direction Accuracy.

**Key Finding:** ARIMA beats the naive forecast for electricity prices (R²_OOS = +14%, Direction = 57%).

**Output:** `arima_electricity.pdf`

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

plt.rcParams.update({
    'figure.facecolor': 'none',
    'axes.facecolor': 'none',
    'savefig.facecolor': 'none',
    'savefig.transparent': True,
    'axes.grid': False,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'font.size': 11,
    'figure.figsize': (12, 6),
})

COLORS = {
    'blue': '#1A3A6E', 'red': '#CD0000', 'green': '#2E7D32',
    'orange': '#E67E22', 'purple': '#8E44AD', 'gray': '#808080',
    'cyan': '#00BCD4', 'amber': '#B5853F'
}

def save_fig(fig, name):
    fig.savefig(name, bbox_inches='tight', transparent=True, dpi=300)
    print(f"Saved: {name}")


In [None]:
url = 'https://raw.githubusercontent.com/QuantLet/EMQA/main/EMQA_arima_electricity/ro_de_prices_extended.csv'
df = pd.read_csv(url, parse_dates=['date'], index_col='date')
prices = df['ro_price'].dropna()
print(f'Loaded {len(prices)} daily observations')
print(f'Date range: {prices.index[0].date()} to {prices.index[-1].date()}')
print(f'Mean: {prices.mean():.1f} EUR/MWh, Std: {prices.std():.1f}')


In [None]:
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller

# ADF test on prices and returns
returns = np.log(prices / prices.shift(1)).dropna() * 100
for name, series in [('Prices', prices), ('Log Returns', returns)]:
    adf = adfuller(series.dropna(), autolag='AIC')
    print(f"{name}: ADF = {adf[0]:.3f}, p-value = {adf[1]:.4f} => {'Stationary' if adf[1] < 0.05 else 'Non-stationary'}")

# Train/Test split (80/20)
split = int(len(prices) * 0.80)
train = prices.iloc[:split]
test = prices.iloc[split:]
print(f"\nTrain: {len(train)} obs ({train.index[0].date()} to {train.index[-1].date()})")
print(f"Test:  {len(test)} obs ({test.index[0].date()} to {test.index[-1].date()})")

In [None]:
# Rolling 1-step ahead forecast with expanding window
# ARIMA(1,1,1) on PRICE LEVELS (d=1 handles differencing internally)
predictions, ci_lower, ci_upper = [], [], []

for i in range(len(test)):
    history = prices.iloc[:split + i]
    try:
        model = ARIMA(history, order=(1, 1, 1))
        result = model.fit()
        fc = result.get_forecast(steps=1)
        predictions.append(fc.predicted_mean.iloc[0])
        ci = fc.conf_int(alpha=0.05)
        ci_lower.append(ci.iloc[0, 0])
        ci_upper.append(ci.iloc[0, 1])
    except:
        predictions.append(history.iloc[-1])
        ci_lower.append(history.iloc[-1] - 2 * history.std())
        ci_upper.append(history.iloc[-1] + 2 * history.std())
    if (i + 1) % 100 == 0:
        print(f"  Rolling forecast: {i+1}/{len(test)} done")

pred = pd.Series(predictions, index=test.index)
print(f"\nRolling forecast complete: {len(pred)} predictions")

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# --- Naive benchmark: tomorrow = today ---
naive_pred = prices.iloc[split-1:-1].values
actual = test.values

# --- Metrics ---
mse_arima = mean_squared_error(actual, pred.values)
mse_naive = mean_squared_error(actual, naive_pred)

rmse_arima = np.sqrt(mse_arima)
rmse_naive = np.sqrt(mse_naive)

# Out-of-sample R² = 1 - MSE_model / MSE_naive
r2_oos = 1 - mse_arima / mse_naive

mae_arima = mean_absolute_error(actual, pred.values)
mae_naive = mean_absolute_error(actual, naive_pred)
mape = np.mean(np.abs((actual - pred.values) / actual)) * 100

# Direction accuracy (correct method: compare moves from yesterday)
actual_returns = (actual - naive_pred) / naive_pred
pred_returns = (pred.values - naive_pred) / naive_pred
actual_dir = np.sign(actual_returns)
pred_dir = np.sign(pred_returns)
dir_acc_arima = np.mean(actual_dir == pred_dir) * 100

# CI coverage
ci_coverage = np.mean((actual >= ci_lower) & (actual <= ci_upper)) * 100

print("=" * 60)
print("  ARIMA(1,1,1) vs Naive Forecast Comparison")
print("=" * 60)
print(f"{'Metric':<25} {'ARIMA':>12} {'Naive':>12}")
print("-" * 60)
print(f"{'RMSE':<25} €{rmse_arima:>11.2f} €{rmse_naive:>11.2f}")
print(f"{'MAE':<25} €{mae_arima:>11.2f} €{mae_naive:>11.2f}")
print(f"{'MAPE':<25} {mape:>11.1f}%")
print(f"{'Direction Accuracy':<25} {dir_acc_arima:>11.1f}% {50.0:>11.1f}%")
print("-" * 60)
print(f"{'R²_OOS (vs naive)':<25} {r2_oos*100:>11.2f}%")
print(f"{'95% CI Coverage':<25} {ci_coverage:>11.1f}%")
print("=" * 60)
if r2_oos <= 0:
    print(">>> VERDICT: ARIMA does NOT beat the naive forecast.")
else:
    print(f">>> VERDICT: ARIMA beats naive by {r2_oos*100:.1f}% R²_OOS.")

# Plot: Actual vs Predicted
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(test.index, test.values, color=COLORS['blue'], linewidth=1, label='Actual Price')
ax.plot(pred.index, pred.values, color=COLORS['red'], linewidth=1, label='ARIMA Forecast')
ax.fill_between(test.index, ci_lower, ci_upper, color=COLORS['red'], alpha=0.15, label='95% CI')

# Add metrics annotation
textstr = f'R²_OOS = {r2_oos*100:.1f}%\nDirection = {dir_acc_arima:.0f}%\nBeats Naive: {"Yes" if r2_oos > 0 else "No"}'
ax.text(0.02, 0.98, textstr, transform=ax.transAxes, fontsize=10,
        verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

ax.set_title('Rolling 1-Step-Ahead ARIMA(1,1,1) Forecast — Romanian Electricity',
             fontsize=14, fontweight='bold')
ax.set_ylabel('Price (EUR/MWh)')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.10), frameon=False, ncol=3)
fig.tight_layout()
save_fig(fig, 'arima_electricity.pdf')
plt.show()