[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/EMQA/blob/main/EMQA_actual_vs_predicted/EMQA_actual_vs_predicted.ipynb)

# EMQA_actual_vs_predicted

Rolling 1-step-ahead RF+GB+LSTM ensemble forecast with bootstrap confidence intervals.
Evaluates out-of-sample accuracy using **R²_OOS** (vs naive benchmark), RMSE, MAE, and Direction Accuracy.

**Key Metric:** R²_OOS = 1 - MSE_model / MSE_naive (measures improvement over naive "tomorrow = today" benchmark)

**Output:** `ml_actual_vs_predicted.pdf`

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

plt.rcParams.update({
    'figure.facecolor': 'none',
    'axes.facecolor': 'none',
    'savefig.facecolor': 'none',
    'savefig.transparent': True,
    'axes.grid': False,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'font.size': 11,
    'figure.figsize': (12, 6),
})

COLORS = {
    'blue': '#1A3A6E', 'red': '#CD0000', 'green': '#2E7D32',
    'orange': '#E67E22', 'purple': '#8E44AD', 'gray': '#808080',
    'cyan': '#00BCD4', 'amber': '#B5853F'
}

def save_fig(fig, name):
    fig.savefig(name, bbox_inches='tight', transparent=True, dpi=300)
    print(f"Saved: {name}")

In [None]:
url = 'https://raw.githubusercontent.com/QuantLet/EMQA/main/EMQA_actual_vs_predicted/ro_de_prices_full.csv'
ro = pd.read_csv(url, parse_dates=['date'], index_col='date')
print(f'Loaded {len(ro)} observations')
print(ro.columns.tolist())
ro.head()

In [None]:
# Build feature set
data = ro[['ro_price']].dropna().copy()

if 'de_price' in ro.columns:
    data['de_price'] = ro['de_price']
if 'gas_price' in ro.columns:
    data['gas_price'] = ro['gas_price']

data['target'] = data['ro_price']

# Lag features (lag1-lag5)
for lag in range(1, 6):
    data[f'ro_lag_{lag}'] = data['ro_price'].shift(lag)

# Temporal features
data['day_of_week'] = data.index.dayofweek
data['month'] = data.index.month

# Rolling mean and std (7, 30)
for w in [7, 30]:
    data[f'ro_ma_{w}'] = data['ro_price'].shift(1).rolling(w).mean()
    data[f'ro_std_{w}'] = data['ro_price'].shift(1).rolling(w).std()

if 'de_price' in data.columns:
    data['de_lag_1'] = data['de_price'].shift(1)
if 'gas_price' in data.columns:
    data['gas_lag_1'] = data['gas_price'].shift(1)

data = data.dropna()
feature_cols = [c for c in data.columns if c not in ['target', 'ro_price', 'de_price', 'gas_price']]

print(f"Dataset: {len(data)} rows, {len(feature_cols)} features")
print(f"Features: {feature_cols}")

In [None]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Rolling expanding-window RF+GB+LSTM ensemble forecast
init_train = int(len(data) * 0.6)
retrain_every = 30

rf_model = None
gb_model = None

ens_preds = []
ci_lo_list, ci_hi_list = [], []
actuals, dates_out = [], []

for i in range(init_train, len(data)):
    step = i - init_train

    # Retrain every 30 steps
    if step % retrain_every == 0:
        X_tr = data[feature_cols].iloc[:i].values
        y_tr = data['target'].iloc[:i].values

        rf_model = RandomForestRegressor(
            n_estimators=200, max_depth=10, random_state=42, n_jobs=-1).fit(X_tr, y_tr)
        gb_model = GradientBoostingRegressor(
            n_estimators=200, max_depth=5, learning_rate=0.1, random_state=42).fit(X_tr, y_tr)

    X_step = data[feature_cols].iloc[i:i+1].values

    # RF prediction + individual tree predictions
    rf_pred = rf_model.predict(X_step)[0]
    rf_tree_preds = np.array([t.predict(X_step)[0] for t in rf_model.estimators_])

    # GB prediction + staged predictions for uncertainty
    gb_pred = gb_model.predict(X_step)[0]
    gb_staged = np.array([p[0] for p in gb_model.staged_predict(X_step)])
    half = max(1, gb_model.n_estimators // 2)
    gb_tree_preds = gb_staged[half:]

    # LSTM simulation: lag-weighted smoother (matches lecture notebook approach)
    np.random.seed(i)  # reproducible per step
    if i >= 14:
        lag1 = data['ro_price'].iloc[i - 1]
        ma7 = data['ro_price'].iloc[i-7:i].mean()
        ma14 = data['ro_price'].iloc[i-14:i].mean()
        lstm_pred = 0.60 * lag1 + 0.30 * ma7 + 0.10 * ma14 + np.random.normal(0, 5.0)
    else:
        lstm_pred = data['ro_price'].iloc[i - 1]

    # Ensemble: average of RF + GB + LSTM
    ens_pred = (rf_pred + gb_pred + lstm_pred) / 3
    ens_preds.append(ens_pred)

    # CI: combine all trees from RF and GB staged predictions
    all_tree_preds = np.concatenate([rf_tree_preds, gb_tree_preds])
    ci_lo_list.append(np.percentile(all_tree_preds, 2.5))
    ci_hi_list.append(np.percentile(all_tree_preds, 97.5))

    actuals.append(data['target'].iloc[i])
    dates_out.append(data.index[i])

# Convert
ens_preds = np.array(ens_preds)
ci_lo = np.array(ci_lo_list)
ci_hi = np.array(ci_hi_list)
actuals = np.array(actuals)
dates_out = pd.DatetimeIndex(dates_out)

# --- Naive benchmark: tomorrow = today ---
naive_preds = data['target'].iloc[init_train-1:-1].values

# --- Metrics ---
mae = mean_absolute_error(actuals, ens_preds)
rmse = np.sqrt(mean_squared_error(actuals, ens_preds))

mse_model = mean_squared_error(actuals, ens_preds)
mse_naive = mean_squared_error(actuals, naive_preds)

# R²_OOS = 1 - MSE_model / MSE_naive
r2_oos = 1 - mse_model / mse_naive

# Direction accuracy (from yesterday)
actual_returns = (actuals - naive_preds) / naive_preds
pred_returns = (ens_preds - naive_preds) / naive_preds
actual_dir = np.sign(actual_returns)
pred_dir = np.sign(pred_returns)
dir_acc = np.mean(actual_dir == pred_dir) * 100

print("=" * 60)
print("   Ensemble (RF+GB+LSTM) vs Naive Forecast")
print("=" * 60)
print(f"{'MAE':<25} {mae:.2f} EUR/MWh")
print(f"{'RMSE':<25} {rmse:.2f} EUR/MWh")
print(f"{'R²_OOS (vs naive)':<25} {r2_oos*100:.1f}%")
print(f"{'Direction Accuracy':<25} {dir_acc:.1f}%")
print("=" * 60)
if r2_oos > 0:
    print(f">>> Ensemble beats naive by {r2_oos*100:.1f}% R²_OOS")
else:
    print(">>> Ensemble does NOT beat naive")

In [None]:
# Plot 1: Time series actual vs predicted with CI band
fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(dates_out, actuals, color=COLORS['blue'], lw=1.5, label='Actual')
ax.plot(dates_out, ens_preds, color=COLORS['red'], lw=1.5, ls='--',
        label='Ensemble Forecast')
ax.fill_between(dates_out, ci_lo, ci_hi,
                color=COLORS['red'], alpha=0.12, label='95% CI (tree bootstrap)')

# Metrics annotation
textstr = f'R$^2_{{OOS}}$ = {r2_oos*100:.1f}%\nDirection = {dir_acc:.0f}%\nBeats Naive: {"Yes" if r2_oos > 0 else "No"}'
ax.text(0.02, 0.98, textstr, transform=ax.transAxes, fontsize=10,
        verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

ax.set_xlabel('Date')
ax.set_ylabel('Price (EUR/MWh)')
ax.set_title('Romanian Electricity: Rolling RF+GB+LSTM Ensemble Forecast', fontsize=14, fontweight='bold')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.10), frameon=False, ncol=3)

plt.tight_layout()
plt.show()

In [None]:
# Plot 2: Scatter actual vs predicted with R2_OOS annotation
fig, ax = plt.subplots(figsize=(8, 8))

ax.scatter(actuals, ens_preds, color=COLORS['blue'], alpha=0.3, s=15, edgecolors='none')

# Perfect prediction line
lims = [min(actuals.min(), ens_preds.min()), max(actuals.max(), ens_preds.max())]
ax.plot(lims, lims, color=COLORS['red'], ls='--', lw=1.5, label='Perfect Prediction')

# Stats box
textstr = f'R$^2_{{OOS}}$ = {r2_oos*100:.1f}%\nDirection = {dir_acc:.0f}%\nMAE = {mae:.1f} EUR/MWh\nRMSE = {rmse:.1f}'
props = dict(boxstyle='round,pad=0.4', facecolor='white', alpha=0.8, edgecolor=COLORS['gray'])
ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=11,
        verticalalignment='top', bbox=props)

ax.set_xlabel('Actual Price (EUR/MWh)')
ax.set_ylabel('Predicted Price (EUR/MWh)')
ax.set_title('Scatter: Actual vs Ensemble Predicted', fontsize=14, fontweight='bold')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.10), frameon=False)

plt.tight_layout()
save_fig(fig, 'ml_actual_vs_predicted.pdf')
plt.show()