[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/EMQA/blob/main/EN/quantlets/EMQA_arima_electricity/EMQA_arima_electricity.ipynb)

# EMQA_arima_electricity

**Romanian Electricity: ARIMA Price Forecast with Rolling Evaluation**

Load Romanian day-ahead electricity prices, fit ARIMA(1,1,1) on price levels (d=1 handles differencing), perform rolling 1-step-ahead out-of-sample forecast, and compute R², MAE, RMSE, MAPE, direction accuracy.

**Output:** `arima_electricity.pdf`

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

plt.rcParams.update({
    'figure.facecolor': 'none',
    'axes.facecolor': 'none',
    'savefig.facecolor': 'none',
    'savefig.transparent': True,
    'axes.grid': False,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'font.size': 11,
    'figure.figsize': (12, 6),
})

COLORS = {
    'blue': '#1A3A6E', 'red': '#CD0000', 'green': '#2E7D32',
    'orange': '#E67E22', 'purple': '#8E44AD', 'gray': '#808080',
    'cyan': '#00BCD4', 'amber': '#B5853F'
}

def save_fig(fig, name):
    fig.savefig(name, bbox_inches='tight', transparent=True, dpi=300)
    print(f"Saved: {name}")


In [None]:
import os

def load_ro_prices():
    """Load Romanian day-ahead electricity prices."""
    paths = [
        '../../charts/ro_de_prices_extended.csv',
        '/Users/danielpele/Documents/Energy MBA/charts/ro_de_prices_extended.csv',
    ]
    for p in paths:
        if os.path.exists(p):
            df = pd.read_csv(p, parse_dates=['date'], index_col='date')
            return df['ro_price'].dropna()
    raise FileNotFoundError("Romanian price data not found")

prices = load_ro_prices()
print(f"Loaded {len(prices)} daily observations")
print(f"Date range: {prices.index[0].date()} to {prices.index[-1].date()}")
print(f"Mean: {prices.mean():.1f} EUR/MWh, Std: {prices.std():.1f}")
print(f"\nDescriptive statistics:")
print(prices.describe())

In [None]:
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller

# ADF test on prices and returns
returns = np.log(prices / prices.shift(1)).dropna() * 100
for name, series in [('Prices', prices), ('Log Returns', returns)]:
    adf = adfuller(series.dropna(), autolag='AIC')
    print(f"{name}: ADF = {adf[0]:.3f}, p-value = {adf[1]:.4f} => {'Stationary' if adf[1] < 0.05 else 'Non-stationary'}")

# Train/Test split (80/20)
split = int(len(prices) * 0.80)
train = prices.iloc[:split]
test = prices.iloc[split:]
print(f"\nTrain: {len(train)} obs ({train.index[0].date()} to {train.index[-1].date()})")
print(f"Test:  {len(test)} obs ({test.index[0].date()} to {test.index[-1].date()})")

In [None]:
# Rolling 1-step ahead forecast with expanding window
# ARIMA(1,1,1) on PRICE LEVELS (d=1 handles differencing internally)
predictions, ci_lower, ci_upper = [], [], []

for i in range(len(test)):
    history = prices.iloc[:split + i]
    try:
        model = ARIMA(history, order=(1, 1, 1))
        result = model.fit()
        fc = result.get_forecast(steps=1)
        predictions.append(fc.predicted_mean.iloc[0])
        ci = fc.conf_int(alpha=0.05)
        ci_lower.append(ci.iloc[0, 0])
        ci_upper.append(ci.iloc[0, 1])
    except:
        predictions.append(history.iloc[-1])
        ci_lower.append(history.iloc[-1] - 2 * history.std())
        ci_upper.append(history.iloc[-1] + 2 * history.std())
    if (i + 1) % 100 == 0:
        print(f"  Rolling forecast: {i+1}/{len(test)} done")

pred = pd.Series(predictions, index=test.index)
print(f"\nRolling forecast complete: {len(pred)} predictions")

In [None]:
# Evaluation metrics
mae = np.mean(np.abs(test.values - pred.values))
rmse = np.sqrt(np.mean((test.values - pred.values)**2))
mape = np.mean(np.abs((test.values - pred.values) / test.values)) * 100
ss_res = np.sum((test.values - pred.values)**2)
ss_tot = np.sum((test.values - test.mean())**2)
r2 = 1 - ss_res / ss_tot
dir_actual = np.sign(np.diff(test.values))
dir_pred = np.sign(np.diff(pred.values))
direction_acc = np.mean(dir_actual == dir_pred) * 100

print("Out-of-Sample Forecast Metrics")
print("=" * 40)
print(f"R²             = {r2:.3f}")
print(f"MAE            = {mae:.1f} EUR/MWh")
print(f"RMSE           = {rmse:.1f} EUR/MWh")
print(f"MAPE           = {mape:.1f}%")
print(f"Direction Acc.  = {direction_acc:.0f}%")

# Plot: Actual vs Predicted
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(test.index, test.values, color=COLORS['blue'], linewidth=1, label='Actual Price')
ax.plot(pred.index, pred.values, color=COLORS['red'], linewidth=1, label='ARIMA Forecast')
ax.fill_between(test.index, ci_lower, ci_upper, color=COLORS['red'], alpha=0.15, label='95% CI')
ax.set_title(f'Romanian Electricity: ARIMA(1,1,1) Rolling Forecast (R²={r2:.3f})',
             fontsize=14, fontweight='bold')
ax.set_ylabel('Price (EUR/MWh)')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.10), frameon=False, ncol=3)
fig.tight_layout()
save_fig(fig, 'arima_electricity.pdf')
plt.show()