# 6 — PatchTST: Solar Generation & Electricity Price
Multivariate Patch Time Series Transformer. Context (168h) split into 7 patches of 24h,
future weather covariates as 8th token. Transformer encoder → 24h predictions.

XGBoost residual correction per horizon.

In [1]:
import pandas as pd
import numpy as np
import json
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import xgboost as xgb
import warnings
warnings.filterwarnings('ignore')

df = pd.read_parquet('../cleaned_data.parquet')
df['time'] = pd.to_datetime(df['time'], utc=True)

device = torch.device('cpu')

train_mask = df['time'].dt.year <= 2017
train_end = int(train_mask.sum())
test_start = train_end
context_length = 168
prediction_length = 24

print(f'Shape: {df.shape}')
print(f'Train: {train_end}, Test: {len(df) - train_end}')
print(f'Device: {device}')

Shape: (35056, 80)
Train: 26280, Test: 8776
Device: cpu


In [2]:
# Solar features: clouds, temp, temp_max, humidity × 5 cities + hour, month
target_col = 'generation solar'
tso_col = 'forecast solar day ahead'

weather_cols = [
    'clouds_all_madrid', 'clouds_all_bilbao', 'clouds_all_barcelona',
    'clouds_all_seville', 'clouds_all_valencia',
    'temp_madrid', 'temp_bilbao', 'temp_barcelona',
    'temp_seville', 'temp_valencia',
    'temp_max_madrid', 'temp_max_bilbao', 'temp_max_barcelona',
    'temp_max_seville', 'temp_max_valencia',
    'humidity_madrid', 'humidity_bilbao', 'humidity_barcelona',
    'humidity_seville', 'humidity_valencia',
]
time_cols = ['hour', 'month']
feature_cols = weather_cols + time_cols

target_mean = df.loc[train_mask, target_col].mean()
target_std = df.loc[train_mask, target_col].std()
feat_means = df.loc[train_mask, feature_cols].mean()
feat_stds = df.loc[train_mask, feature_cols].std().replace(0, 1)

target_norm = (df[target_col].values - target_mean) / target_std
features_norm = ((df[feature_cols] - feat_means) / feat_stds).fillna(0).values
all_data = np.column_stack([target_norm, features_norm]).astype(np.float32)

print(f'Input channels: {all_data.shape[1]} (1 target + {len(feature_cols)} features)')
print(f'Target mean: {target_mean:.0f} MW, std: {target_std:.0f} MW')

Input channels: 23 (1 target + 22 features)
Target mean: 1453 MW, std: 1687 MW


In [3]:
class TimeSeriesDataset(Dataset):
    def __init__(self, data, ctx_len, pred_len, start_idx, end_idx):
        self.data = data
        self.ctx_len = ctx_len
        self.pred_len = pred_len
        self.start = start_idx
        self.end = end_idx

    def __len__(self):
        return self.end - self.start - self.ctx_len - self.pred_len + 1

    def __getitem__(self, idx):
        i = self.start + idx
        x = self.data[i : i + self.ctx_len]
        y = self.data[i + self.ctx_len : i + self.ctx_len + self.pred_len, 0]
        x_future = self.data[i + self.ctx_len : i + self.ctx_len + self.pred_len, 1:]
        return torch.from_numpy(x), torch.from_numpy(x_future), torch.from_numpy(y)

val_split = int(train_end * 0.8)
train_ds = TimeSeriesDataset(all_data, context_length, prediction_length, 0, val_split)
val_ds = TimeSeriesDataset(all_data, context_length, prediction_length, val_split, train_end)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=0)
val_loader = DataLoader(val_ds, batch_size=128, shuffle=False, num_workers=0)

print(f'Train: {len(train_ds)}, Val: {len(val_ds)}')

Train: 20846, Val: 5069


In [4]:
class PatchTST(nn.Module):
    def __init__(self, n_input, n_future, d_model=64, n_heads=4, n_layers=2,
                 pred_len=24, ctx_len=168, patch_len=24, dropout=0.1):
        super().__init__()
        self.n_patches = ctx_len // patch_len  # 7
        self.patch_embed = nn.Linear(patch_len * n_input, d_model)
        self.future_embed = nn.Linear(pred_len * n_future, d_model)
        self.pos_embed = nn.Parameter(torch.randn(1, self.n_patches + 1, d_model) * 0.02)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=n_heads, dim_feedforward=d_model * 2,
            dropout=dropout, batch_first=True, norm_first=True,
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)
        self.norm = nn.LayerNorm(d_model)

        total_dim = (self.n_patches + 1) * d_model
        self.head = nn.Sequential(
            nn.Linear(total_dim, d_model * 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(d_model * 2, pred_len),
        )

    def forward(self, x_context, x_future):
        bs = x_context.shape[0]
        patches = x_context.reshape(bs, self.n_patches, -1)
        patch_tokens = self.patch_embed(patches)
        future_token = self.future_embed(x_future.reshape(bs, -1)).unsqueeze(1)
        tokens = torch.cat([patch_tokens, future_token], dim=1) + self.pos_embed
        out = self.norm(self.transformer(tokens))
        return self.head(out.reshape(bs, -1))

n_input = all_data.shape[1]
n_future = len(feature_cols)
model = PatchTST(n_input, n_future).to(device)
print(f'PatchTST parameters: {sum(p.numel() for p in model.parameters()):,}')

PatchTST parameters: 205,592


In [5]:
def train_patchtst(model, train_loader, val_loader, n_epochs=60, patience=10, lr=1e-3):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs)
    criterion = nn.MSELoss()

    best_val = float('inf')
    best_state = None
    no_improve = 0

    for epoch in range(n_epochs):
        model.train()
        train_losses = []
        for x, xf, y in train_loader:
            x, xf, y = x.to(device), xf.to(device), y.to(device)
            loss = criterion(model(x, xf), y)
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            train_losses.append(loss.item())

        model.eval()
        val_losses = []
        with torch.no_grad():
            for x, xf, y in val_loader:
                x, xf, y = x.to(device), xf.to(device), y.to(device)
                val_losses.append(criterion(model(x, xf), y).item())

        scheduler.step()
        tl, vl = np.mean(train_losses), np.mean(val_losses)

        if vl < best_val:
            best_val = vl
            best_state = {k: v.clone() for k, v in model.state_dict().items()}
            no_improve = 0
            mark = ' *'
        else:
            no_improve += 1
            mark = ''

        if (epoch + 1) % 10 == 0 or epoch == 0 or mark:
            print(f'Epoch {epoch+1:3d}/{n_epochs}, Train: {tl:.5f}, Val: {vl:.5f}{mark}')

        if no_improve >= patience:
            print(f'Early stopping at epoch {epoch+1}')
            break

    model.load_state_dict(best_state)
    print(f'Best val loss: {best_val:.5f}')
    return model

In [6]:
print('=== Training PatchTST for Solar ===')
model = train_patchtst(model, train_loader, val_loader)

# Test predictions
model.eval()
test_preds, test_actuals, test_times = [], [], []

with torch.no_grad():
    for i in range(test_start, len(all_data) - prediction_length, prediction_length):
        if i - context_length < 0:
            continue
        x = torch.from_numpy(all_data[i - context_length : i]).unsqueeze(0).to(device)
        xf = torch.from_numpy(all_data[i : i + prediction_length, 1:]).unsqueeze(0).to(device)
        pred = model(x, xf).squeeze().cpu().numpy() * target_std + target_mean
        pred = np.clip(pred, 0, None)
        actual = all_data[i : i + prediction_length, 0] * target_std + target_mean
        test_preds.append(pred)
        test_actuals.append(actual)
        test_times.append(df['time'].iloc[i : i + prediction_length].values)

raw_mae = np.mean([np.mean(np.abs(a - p)) for a, p in zip(test_actuals, test_preds)])
print(f'\nTest windows: {len(test_preds)}')
print(f'Raw PatchTST Solar MAE: {raw_mae:.1f} MW')

# Train predictions (for stacking notebook)
train_preds_raw, train_windows_info = [], []
with torch.no_grad():
    w = 0
    for i in range(context_length, train_end - prediction_length, prediction_length):
        x = torch.from_numpy(all_data[i - context_length : i]).unsqueeze(0).to(device)
        xf = torch.from_numpy(all_data[i : i + prediction_length, 1:]).unsqueeze(0).to(device)
        pred = model(x, xf).squeeze().cpu().numpy() * target_std + target_mean
        pred = np.clip(pred, 0, None)
        train_preds_raw.append(pred)
        train_windows_info.append((w, i))
        w += 1

print(f'Train prediction windows: {len(train_preds_raw)}')

=== Training PatchTST for Solar ===
Epoch   1/60, Train: 0.20665, Val: 0.16789 *
Epoch   2/60, Train: 0.13268, Val: 0.16303 *
Early stopping at epoch 12
Best val loss: 0.16303

Test windows: 364
Raw PatchTST Solar MAE: 469.3 MW
Train prediction windows: 1088


In [7]:
# XGBoost residual correction for solar
xgb_feats, xgb_actuals, xgb_tso = [], [], []
for w_idx, (w, start_i) in enumerate(train_windows_info):
    xgb_feats.append(all_data[start_i : start_i + prediction_length, 1:])
    xgb_actuals.append(all_data[start_i : start_i + prediction_length, 0] * target_std + target_mean)
    xgb_tso.append(df[tso_col].iloc[start_i : start_i + prediction_length].values)

n_w = len(train_preds_raw)
xgb_split = int(n_w * 0.8)
print(f'XGBoost correction: {xgb_split} train, {n_w - xgb_split} val windows')

correction_models = []
for h in range(prediction_length):
    X = np.array([np.concatenate([f[h], [p[h], t[h]]])
                  for f, p, t in zip(xgb_feats, train_preds_raw, xgb_tso)])
    y = np.array([a[h] - p[h] for a, p in zip(xgb_actuals, train_preds_raw)])
    X_tr, X_va = X[:xgb_split], X[xgb_split:]
    y_tr, y_va = y[:xgb_split], y[xgb_split:]
    xgbr = xgb.XGBRegressor(
        n_estimators=500, max_depth=4, learning_rate=0.05,
        tree_method='hist', random_state=42, verbosity=0, early_stopping_rounds=50,
    )
    xgbr.fit(X_tr, y_tr, eval_set=[(X_va, y_va)], verbose=False)
    correction_models.append(xgbr)

stopped = [m.best_iteration + 1 for m in correction_models]
print(f'Trees per horizon: min={min(stopped)}, max={max(stopped)}, mean={np.mean(stopped):.0f}')

# Apply to test
corrected_preds = []
for i, pred in enumerate(test_preds):
    idx = test_start + i * prediction_length
    feats = all_data[idx : idx + prediction_length, 1:]
    tso = df[tso_col].iloc[idx : idx + prediction_length].values
    corrected = np.array([
        pred[h] + correction_models[h].predict(
            np.concatenate([feats[h], [pred[h], tso[h]]]).reshape(1, -1)
        )[0] for h in range(prediction_length)
    ])
    corrected_preds.append(np.clip(corrected, 0, None))

corr_mae = np.mean([np.mean(np.abs(a - c)) for a, c in zip(test_actuals, corrected_preds)])
flat_actual = np.concatenate(test_actuals)
flat_corr = np.concatenate(corrected_preds)
flat_raw = np.concatenate(test_preds)
corr_rmse = np.sqrt(np.mean((flat_actual - flat_corr) ** 2))
corr_mape = np.mean(np.abs((flat_actual - flat_corr) / np.clip(np.abs(flat_actual), 1, None))) * 100

tso_flat = np.concatenate([df[tso_col].iloc[test_start + w * prediction_length :
    test_start + w * prediction_length + prediction_length].values for w in range(len(test_preds))])
tso_mae = np.mean(np.abs(flat_actual - tso_flat))
tso_rmse = np.sqrt(np.mean((flat_actual - tso_flat) ** 2))

if corr_mae < raw_mae:
    final_preds_s = corrected_preds
    final_mae_s = corr_mae
    print(f'\nCorrection helps! Raw: {raw_mae:.1f} -> Corrected: {corr_mae:.1f} MW')
else:
    final_preds_s = test_preds
    final_mae_s = raw_mae
    corr_rmse = np.sqrt(np.mean((flat_actual - flat_raw) ** 2))
    corr_mape = np.mean(np.abs((flat_actual - flat_raw) / np.clip(np.abs(flat_actual), 1, None))) * 100
    print(f'\nCorrection does not help. Using raw: {raw_mae:.1f} MW')

print(f'TSO MAE: {tso_mae:.1f} MW')

XGBoost correction: 870 train, 218 val windows
Trees per horizon: min=71, max=300, mean=187

Correction helps! Raw: 469.3 -> Corrected: 366.6 MW
TSO MAE: 141.1 MW


In [8]:
# Export solar: JSON + CSVs
os.makedirs('../dashboard/public/data', exist_ok=True)

sample_data = []
for w in range(9, 16):
    if w >= len(final_preds_s):
        break
    for h in range(prediction_length):
        idx = test_start + w * prediction_length + h
        t = pd.Timestamp(test_times[w][h])
        sample_data.append({
            'time': t.strftime('%Y-%m-%d %H:%M'),
            'actual': round(float(test_actuals[w][h]), 1),
            'predicted': round(float(final_preds_s[w][h]), 1),
            'tso': round(float(df[tso_col].iloc[idx]), 1),
        })

avg_trees = int(np.mean(stopped))
output = {
    'target': 'solar',
    'model': f'PatchTST + XGBoost ({avg_trees} trees avg)',
    'prediction_length_hours': prediction_length,
    'context_length_hours': context_length,
    'metrics': {
        'mae': round(float(final_mae_s), 1),
        'rmse': round(float(corr_rmse), 1),
        'mape': round(float(corr_mape), 1),
        'tso_mae': round(float(tso_mae), 1),
        'tso_rmse': round(float(tso_rmse), 1),
        'raw_mae': round(float(raw_mae), 1),
        'raw_rmse': round(float(np.sqrt(np.mean((flat_actual - flat_raw) ** 2))), 1),
    },
    'sample_forecast': sample_data,
}

with open('../dashboard/public/data/patchtst_solar.json', 'w') as f:
    json.dump(output, f, indent=2)

# Test CSV
rows = []
for w, (pred, actual, times) in enumerate(zip(test_preds, test_actuals, test_times)):
    idx = test_start + w * prediction_length
    tso = df[tso_col].iloc[idx : idx + prediction_length].values
    for h in range(prediction_length):
        rows.append({
            'time': pd.Timestamp(times[h]).strftime('%Y-%m-%d %H:%M'),
            'window': w, 'horizon': h,
            'actual': round(float(actual[h]), 2),
            'patchtst_pred': round(float(pred[h]), 2),
            'tso': round(float(tso[h]), 2),
        })
pd.DataFrame(rows).to_csv('patchtst_solar_predictions.csv', index=False)

# Train CSV
train_rows = []
for pred, (w, start_i) in zip(train_preds_raw, train_windows_info):
    for h in range(prediction_length):
        train_rows.append({
            'global_idx': start_i + h,
            'window': w, 'horizon': h,
            'patchtst_pred': round(float(pred[h]), 2),
        })
pd.DataFrame(train_rows).to_csv('patchtst_solar_train_predictions.csv', index=False)

print(f'Saved patchtst_solar.json (MAE: {output["metrics"]["mae"]} MW)')
print(f'Saved patchtst_solar_predictions.csv ({len(rows)} rows)')
print(f'Saved patchtst_solar_train_predictions.csv ({len(train_rows)} rows)')

Saved patchtst_solar.json (MAE: 366.6 MW)
Saved patchtst_solar_predictions.csv (8736 rows)
Saved patchtst_solar_train_predictions.csv (26112 rows)


## Price Forecast

In [9]:
# Price features: pressure, temp, temp_max, temp_min, humidity, wind_speed × 5 cities + hour, month
target_col_p = 'price actual'
tso_col_p = 'price day ahead'

weather_cols_p = [
    'pressure_madrid', 'pressure_bilbao', 'pressure_barcelona',
    'pressure_seville', 'pressure_valencia',
    'temp_madrid', 'temp_bilbao', 'temp_barcelona',
    'temp_seville', 'temp_valencia',
    'temp_max_madrid', 'temp_max_bilbao', 'temp_max_barcelona',
    'temp_max_seville', 'temp_max_valencia',
    'temp_min_madrid', 'temp_min_bilbao', 'temp_min_barcelona',
    'temp_min_seville', 'temp_min_valencia',
    'humidity_madrid', 'humidity_bilbao', 'humidity_barcelona',
    'humidity_seville', 'humidity_valencia',
    'wind_speed_madrid', 'wind_speed_bilbao', 'wind_speed_barcelona',
    'wind_speed_seville', 'wind_speed_valencia',
]
time_cols_p = ['hour', 'month']
feature_cols_p = weather_cols_p + time_cols_p

target_mean_p = df.loc[train_mask, target_col_p].mean()
target_std_p = df.loc[train_mask, target_col_p].std()
feat_means_p = df.loc[train_mask, feature_cols_p].mean()
feat_stds_p = df.loc[train_mask, feature_cols_p].std().replace(0, 1)

target_norm_p = (df[target_col_p].values - target_mean_p) / target_std_p
features_norm_p = ((df[feature_cols_p] - feat_means_p) / feat_stds_p).fillna(0).values
all_data_p = np.column_stack([target_norm_p, features_norm_p]).astype(np.float32)

# Free solar model/data to save memory
del model, all_data, train_ds, val_ds, train_loader, val_loader
del correction_models, xgb_feats, xgb_actuals, xgb_tso
import gc; gc.collect()

train_ds_p = TimeSeriesDataset(all_data_p, context_length, prediction_length, 0, val_split)
val_ds_p = TimeSeriesDataset(all_data_p, context_length, prediction_length, val_split, train_end)
train_loader_p = DataLoader(train_ds_p, batch_size=64, shuffle=True, num_workers=0)
val_loader_p = DataLoader(val_ds_p, batch_size=128, shuffle=False, num_workers=0)

n_input_p = all_data_p.shape[1]
n_future_p = len(feature_cols_p)
model_p = PatchTST(n_input_p, n_future_p).to(device)

print(f'Price input: {all_data_p.shape[1]} (1 target + {len(feature_cols_p)} features)')
print(f'Price PatchTST parameters: {sum(p.numel() for p in model_p.parameters()):,}')

Price input: 33 (1 target + 32 features)
Price PatchTST parameters: 236,312


In [10]:
print('=== Training PatchTST for Price ===')
model_p = train_patchtst(model_p, train_loader_p, val_loader_p)

# Test predictions
model_p.eval()
test_preds_p, test_actuals_p, test_times_p = [], [], []

with torch.no_grad():
    for i in range(test_start, len(all_data_p) - prediction_length, prediction_length):
        if i - context_length < 0:
            continue
        x = torch.from_numpy(all_data_p[i - context_length : i]).unsqueeze(0).to(device)
        xf = torch.from_numpy(all_data_p[i : i + prediction_length, 1:]).unsqueeze(0).to(device)
        pred = model_p(x, xf).squeeze().cpu().numpy() * target_std_p + target_mean_p
        actual = all_data_p[i : i + prediction_length, 0] * target_std_p + target_mean_p
        test_preds_p.append(pred)
        test_actuals_p.append(actual)
        test_times_p.append(df['time'].iloc[i : i + prediction_length].values)

raw_mae_p = np.mean([np.mean(np.abs(a - p)) for a, p in zip(test_actuals_p, test_preds_p)])
print(f'\nTest windows: {len(test_preds_p)}')
print(f'Raw PatchTST Price MAE: {raw_mae_p:.2f} EUR/MWh')

# Train predictions for stacking
train_preds_raw_p, train_windows_info_p = [], []
with torch.no_grad():
    w = 0
    for i in range(context_length, train_end - prediction_length, prediction_length):
        x = torch.from_numpy(all_data_p[i - context_length : i]).unsqueeze(0).to(device)
        xf = torch.from_numpy(all_data_p[i : i + prediction_length, 1:]).unsqueeze(0).to(device)
        pred = model_p(x, xf).squeeze().cpu().numpy() * target_std_p + target_mean_p
        train_preds_raw_p.append(pred)
        train_windows_info_p.append((w, i))
        w += 1

print(f'Train prediction windows: {len(train_preds_raw_p)}')

=== Training PatchTST for Price ===
Epoch   1/60, Train: 0.23254, Val: 0.17584 *
Epoch   2/60, Train: 0.11312, Val: 0.15701 *
Epoch   3/60, Train: 0.08715, Val: 0.14961 *
Epoch   6/60, Train: 0.06377, Val: 0.14886 *
Early stopping at epoch 16
Best val loss: 0.14886

Test windows: 364
Raw PatchTST Price MAE: 5.13 EUR/MWh
Train prediction windows: 1088


In [11]:
# XGBoost residual correction for price
xgb_feats_p, xgb_actuals_p, xgb_tso_p = [], [], []
for w_idx, (w, start_i) in enumerate(train_windows_info_p):
    xgb_feats_p.append(all_data_p[start_i : start_i + prediction_length, 1:])
    xgb_actuals_p.append(all_data_p[start_i : start_i + prediction_length, 0] * target_std_p + target_mean_p)
    xgb_tso_p.append(df[tso_col_p].iloc[start_i : start_i + prediction_length].values)

n_w_p = len(train_preds_raw_p)
xgb_split_p = int(n_w_p * 0.8)
print(f'XGBoost correction: {xgb_split_p} train, {n_w_p - xgb_split_p} val windows')

correction_models_p = []
for h in range(prediction_length):
    X = np.array([np.concatenate([f[h], [p[h], t[h]]])
                  for f, p, t in zip(xgb_feats_p, train_preds_raw_p, xgb_tso_p)])
    y = np.array([a[h] - p[h] for a, p in zip(xgb_actuals_p, train_preds_raw_p)])
    X_tr, X_va = X[:xgb_split_p], X[xgb_split_p:]
    y_tr, y_va = y[:xgb_split_p], y[xgb_split_p:]
    xgbr = xgb.XGBRegressor(
        n_estimators=500, max_depth=4, learning_rate=0.05,
        tree_method='hist', random_state=42, verbosity=0, early_stopping_rounds=50,
    )
    xgbr.fit(X_tr, y_tr, eval_set=[(X_va, y_va)], verbose=False)
    correction_models_p.append(xgbr)

stopped_p = [m.best_iteration + 1 for m in correction_models_p]
print(f'Trees per horizon: min={min(stopped_p)}, max={max(stopped_p)}, mean={np.mean(stopped_p):.0f}')

# Apply to test
corrected_preds_p = []
for i, pred in enumerate(test_preds_p):
    idx = test_start + i * prediction_length
    feats = all_data_p[idx : idx + prediction_length, 1:]
    tso = df[tso_col_p].iloc[idx : idx + prediction_length].values
    corrected = np.array([
        pred[h] + correction_models_p[h].predict(
            np.concatenate([feats[h], [pred[h], tso[h]]]).reshape(1, -1)
        )[0] for h in range(prediction_length)
    ])
    corrected_preds_p.append(corrected)

corr_mae_p = np.mean([np.mean(np.abs(a - c)) for a, c in zip(test_actuals_p, corrected_preds_p)])
flat_actual_p = np.concatenate(test_actuals_p)
flat_corr_p = np.concatenate(corrected_preds_p)
flat_raw_p = np.concatenate(test_preds_p)
corr_rmse_p = np.sqrt(np.mean((flat_actual_p - flat_corr_p) ** 2))
corr_mape_p = np.mean(np.abs((flat_actual_p - flat_corr_p) / np.clip(np.abs(flat_actual_p), 1, None))) * 100

tso_flat_p = np.concatenate([df[tso_col_p].iloc[test_start + w * prediction_length :
    test_start + w * prediction_length + prediction_length].values for w in range(len(test_preds_p))])
tso_mae_p = np.mean(np.abs(flat_actual_p - tso_flat_p))
tso_rmse_p = np.sqrt(np.mean((flat_actual_p - tso_flat_p) ** 2))

if corr_mae_p < raw_mae_p:
    final_preds_p = corrected_preds_p
    final_mae_p = corr_mae_p
    print(f'\nCorrection helps! Raw: {raw_mae_p:.2f} -> Corrected: {corr_mae_p:.2f} EUR/MWh')
else:
    final_preds_p = test_preds_p
    final_mae_p = raw_mae_p
    corr_rmse_p = np.sqrt(np.mean((flat_actual_p - flat_raw_p) ** 2))
    corr_mape_p = np.mean(np.abs((flat_actual_p - flat_raw_p) / np.clip(np.abs(flat_actual_p), 1, None))) * 100
    print(f'\nCorrection does not help. Using raw: {raw_mae_p:.2f} EUR/MWh')

print(f'TSO MAE: {tso_mae_p:.2f} EUR/MWh')

XGBoost correction: 870 train, 218 val windows
Trees per horizon: min=1, max=284, mean=87

Correction helps! Raw: 5.13 -> Corrected: 5.06 EUR/MWh
TSO MAE: 8.87 EUR/MWh


In [12]:
# Export price: JSON + CSVs
sample_data_p = []
for w in range(9, 16):
    if w >= len(final_preds_p):
        break
    for h in range(prediction_length):
        idx = test_start + w * prediction_length + h
        t = pd.Timestamp(test_times_p[w][h])
        sample_data_p.append({
            'time': t.strftime('%Y-%m-%d %H:%M'),
            'actual': round(float(test_actuals_p[w][h]), 2),
            'predicted': round(float(final_preds_p[w][h]), 2),
            'tso': round(float(df[tso_col_p].iloc[idx]), 2),
        })

avg_trees_p = int(np.mean(stopped_p))
output_p = {
    'target': 'price',
    'model': f'PatchTST + XGBoost ({avg_trees_p} trees avg)',
    'prediction_length_hours': prediction_length,
    'context_length_hours': context_length,
    'metrics': {
        'mae': round(float(final_mae_p), 2),
        'rmse': round(float(corr_rmse_p), 2),
        'mape': round(float(corr_mape_p), 2),
        'tso_mae': round(float(tso_mae_p), 2),
        'tso_rmse': round(float(tso_rmse_p), 2),
        'raw_mae': round(float(raw_mae_p), 2),
        'raw_rmse': round(float(np.sqrt(np.mean((flat_actual_p - flat_raw_p) ** 2))), 2),
    },
    'sample_forecast': sample_data_p,
}

with open('../dashboard/public/data/patchtst_price.json', 'w') as f:
    json.dump(output_p, f, indent=2)

# Test CSV
rows_p = []
for w, (pred, actual, times) in enumerate(zip(test_preds_p, test_actuals_p, test_times_p)):
    idx = test_start + w * prediction_length
    tso = df[tso_col_p].iloc[idx : idx + prediction_length].values
    for h in range(prediction_length):
        rows_p.append({
            'time': pd.Timestamp(times[h]).strftime('%Y-%m-%d %H:%M'),
            'window': w, 'horizon': h,
            'actual': round(float(actual[h]), 2),
            'patchtst_pred': round(float(pred[h]), 2),
            'tso': round(float(tso[h]), 2),
        })
pd.DataFrame(rows_p).to_csv('patchtst_price_predictions.csv', index=False)

# Train CSV
train_rows_p = []
for pred, (w, start_i) in zip(train_preds_raw_p, train_windows_info_p):
    for h in range(prediction_length):
        train_rows_p.append({
            'global_idx': start_i + h,
            'window': w, 'horizon': h,
            'patchtst_pred': round(float(pred[h]), 2),
        })
pd.DataFrame(train_rows_p).to_csv('patchtst_price_train_predictions.csv', index=False)

print(f'Saved patchtst_price.json (MAE: {output_p["metrics"]["mae"]} EUR/MWh)')
print(f'Saved patchtst_price_predictions.csv ({len(rows_p)} rows)')
print(f'Saved patchtst_price_train_predictions.csv ({len(train_rows_p)} rows)')

Saved patchtst_price.json (MAE: 5.06 EUR/MWh)
Saved patchtst_price_predictions.csv (8736 rows)
Saved patchtst_price_train_predictions.csv (26112 rows)
