
# 03 — Modeling Tabular

## Baselines + RNNs (LSTM, GRU, Dilated, Clockwork) con datos tabulados.

**Optimización Bayesiana**

## Setup

In [None]:
from pathlib import Path
import os, json, math, time, random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)

## Config

In [None]:
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))

Device: cuda
GPU: NVIDIA GeForce RTX 3050 Laptop GPU


In [None]:
DATA_CLEAN = Path("../data/clean/base_dataset.csv")
OUT_DIR = Path("../outputs"); OUT_DIR.mkdir(parents=True, exist_ok=True)
ART_DIR = OUT_DIR / "artifacts"; ART_DIR.mkdir(parents=True, exist_ok=True)
FIG_DIR = OUT_DIR / "figures"; FIG_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
TARGET_COL = "GHI"
FREQ = "10T"
INPUT_STEPS   = 36   # 6h past
HORIZON_STEPS = 6    # 1h ahead
BATCH_SIZE    = 256
EPOCHS        = 40
PATIENCE      = 6    # Early stopping patience

In [None]:
# Optuna runs

OPTUNA_STORAGE = "sqlite:///../outputs/artifacts/optuna_tabular.db"

N_TRIALS_RF   = 30
N_TRIALS_LSTM = 30
N_TRIALS_GRU  = 30
N_TRIALS_DIL  = 30
N_TRIALS_CW   = 30

In [None]:
# Pruner (cut bad trials short)
PRUNER = optuna.pruners.MedianPruner(n_warmup_steps=4)

## Data

In [None]:
df = pd.read_csv(DATA_CLEAN, parse_dates=[0], index_col=0).sort_index()
df.index.name = "time"

In [5]:
df.columns

Index(['CSI', 'GHI', 'Presion', 'TempAmb', 'Wind Y', 'Wind X', 'DoY Sin',
       'DoY Cos', 'horas', '__missing_target', 'flag_GHI_range',
       'flag_TempAmb_range', 'flag_Presion_range', 'flag_CSI_range', 'hour',
       'dow', 'month', 'minute', 'is_weekend', 'hour_sin', 'hour_cos',
       'WindSpeed', 'WindDirection', 'GHI_roll1h_mean', 'GHI_roll3h_mean',
       'GHI_roll6h_mean', 'GHI_roll1h_max', 'TempAmb_roll1h_mean',
       'TempAmb_roll3h_mean', 'TempAmb_roll6h_mean', 'TempAmb_roll1h_max',
       'Presion_roll1h_mean', 'Presion_roll3h_mean', 'Presion_roll6h_mean',
       'Presion_roll1h_max', 'WindSpeed_roll1h_mean', 'WindSpeed_roll3h_mean',
       'WindSpeed_roll6h_mean', 'WindSpeed_roll1h_max', 'GHI_lag1', 'GHI_lag3',
       'GHI_lag6', 'GHI_lag12', 'GHI_lag36', 'solar_zenith', 'solar_azimuth',
       'solar_elevation', 'ETR', 'clear_sky_ghi', 'CSI_advanced',
       'ghi_1min_change', 'ghi_5min_std', 'ghi_persistence_1h',
       'temp_pressure_ratio', 'wind_temp_interaction'

In [None]:
base_feats = [
    'Presion','TempAmb','WindSpeed','WindDirection',
    'hour_sin','hour_cos','DoY Sin','DoY Cos',
    'solar_zenith','solar_azimuth','solar_elevation',
    'TempAmb_roll1h_mean','TempAmb_roll6h_mean',
    'Presion_roll1h_mean','Presion_roll6h_mean',
    'WindSpeed_roll1h_mean','WindSpeed_roll6h_mean',
    'temp_pressure_ratio','wind_temp_interaction'
]

ghi_lags = [c for c in ['GHI_lag1','GHI_lag3','GHI_lag6','GHI_lag12','GHI_lag36'] if c in df.columns]
ghi_rolls = [c for c in ['GHI_roll1h_mean','GHI_roll3h_mean','GHI_roll6h_mean','GHI_roll1h_max'] if c in df.columns]

In [None]:
feat_cols = [c for c in base_feats if c in df.columns] + ghi_lags + ghi_rolls
print(f"Total features used: {len(feat_cols)}")
print(feat_cols)

## Split

Temporal split  70/15/15

In [None]:
n = len(df); i_tr = int(0.7*n); i_va = int(0.85*n)
df_train, df_val, df_test = df.iloc[:i_tr], df.iloc[i_tr:i_va], df.iloc[i_va:]

X_scaler = StandardScaler(); y_scaler = StandardScaler()

X_train = X_scaler.fit_transform(df_train[feat_cols].values)
y_train = y_scaler.fit_transform(df_train[[TARGET_COL]].values).ravel()
X_val   = X_scaler.transform(df_val[feat_cols].values)
y_val   = y_scaler.transform(df_val[[TARGET_COL]].values).ravel()
X_test  = X_scaler.transform(df_test[feat_cols].values)
y_test  = y_scaler.transform(df_test[[TARGET_COL]].values).ravel()

In [None]:
# NaN before split
print("Valores NaN en X_train:", np.isnan(X_train).sum())
print("Valores NaN en y_train:", np.isnan(y_train).sum())
print("Valores NaN en X_val:", np.isnan(X_val).sum())
print("Valores NaN en X_test:", np.isnan(X_test).sum())

Valores NaN en X_train: 57
Valores NaN en y_train: 0
Valores NaN en X_val: 0
Valores NaN en X_test: 0


In [None]:
imp = SimpleImputer(strategy="median")
X_train = imp.fit_transform(X_train)
X_val   = imp.transform(X_val)
X_test  = imp.transform(X_test)

In [None]:
for name, arr in [("X_train",X_train),("X_val",X_val),("X_test",X_test),("y_train",y_train),("y_val",y_val),("y_test",y_test)]:
    assert np.isfinite(arr).all(), f"{name} tiene NaN/Inf"

## Helpers - Metrics

In [None]:
def metrics_from_scaled(pred_scaled, true_scaled, y_scaler):
    p = y_scaler.inverse_transform(pred_scaled.reshape(-1,1)).ravel()
    t = y_scaler.inverse_transform(true_scaled.reshape(-1,1)).ravel()
    mae = mean_absolute_error(t, p)
    rmse = math.sqrt(mean_squared_error(t, p))
    mape = np.mean(np.abs((t + 1e-6) - p) / (np.abs(t) + 1e-6)) * 100
    return {"MAE": mae, "RMSE": rmse, "MAPE": mape}, (t, p)

def persistence_baseline(y_scaled, horizon):
    # ŷ(t+h) = y(t) en espacio ESCALADO 
    y_hat = np.roll(y_scaled, horizon)
    y_hat[:horizon] = y_scaled[horizon]  # simple fill
    return y_hat

## Baselines

In [None]:
lin = LinearRegression().fit(X_train, y_train)
lin_metrics, (y_true_lin, y_pred_lin) = metrics_from_scaled(lin.predict(X_test), y_test, y_scaler)

rf0 = RandomForestRegressor(n_estimators=300, random_state=SEED, n_jobs=-1).fit(X_train, y_train)
rf0_metrics, (y_true_rf0, y_pred_rf0) = metrics_from_scaled(rf0.predict(X_test), y_test, y_scaler)

y_pers_test = persistence_baseline(y_test, HORIZON_STEPS)
pers_metrics, (y_true_pers, y_pred_pers) = metrics_from_scaled(y_pers_test, y_test, y_scaler)

print("Persistence:", pers_metrics)
print("Linear     :", lin_metrics)
print("RF baseline:", rf0_metrics)

Linear: {'MAE': 133.67257677220636, 'RMSE': 180.89666474680828, 'MAPE': np.float64(2010103435.0315397)}
RF baseline: {'MAE': 53.15648075886887, 'RMSE': 98.82656751538924, 'MAPE': np.float64(504496106.27778584)}


## Sequentials

In [None]:
def make_loaders_from_arrays(X_tr, y_tr, X_va, y_va, steps, horizon, batch=256):
    ds_tr = SeqDataset(X_tr, y_tr, steps, horizon)
    ds_va = SeqDataset(X_va, y_va, steps, horizon)
    return (DataLoader(ds_tr, batch_size=batch, shuffle=True, drop_last=True),
            DataLoader(ds_va, batch_size=batch, shuffle=False, drop_last=False))

def make_loaders(X_tr, y_tr, X_va, y_va, X_te, y_te, steps, horizon, batch=256):
    ds_tr = SeqDataset(X_tr, y_tr, steps, horizon)
    ds_va = SeqDataset(X_va, y_va, steps, horizon)
    ds_te = SeqDataset(X_te, y_te, steps, horizon)
    return (DataLoader(ds_tr, batch_size=batch, shuffle=True, drop_last=True),
            DataLoader(ds_va, batch_size=batch, shuffle=False, drop_last=False),
            DataLoader(ds_te, batch_size=batch, shuffle=False, drop_last=False))

In [None]:
class SeqDataset(Dataset):
    def __init__(self, X, y, input_steps=36, horizon=6):
        self.X, self.y = X, y
        self.input_steps, self.horizon = input_steps, horizon
        self.max_i = len(X) - input_steps - horizon
        assert self.max_i > 0, "No hay suficientes muestras para ventanas."
    def __len__(self): return self.max_i
    def __getitem__(self, idx):
        i0, i1 = idx, idx + self.input_steps
        ih = i1 + self.horizon - 1
        return (torch.tensor(self.X[i0:i1], dtype=torch.float32),
                torch.tensor(self.y[ih], dtype=torch.float32))

### PyTorch

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, in_dim, hidden=64, num_layers=1, dropout=0.0, bidirectional=False):
        super().__init__()
        self.rnn = nn.LSTM(in_dim, hidden, num_layers=num_layers, batch_first=True,
                           dropout=(dropout if num_layers>1 else 0.0), bidirectional=bidirectional)
        out_dim = hidden * (2 if bidirectional else 1)
        self.fc = nn.Linear(out_dim, 1)
    def forward(self, x):
        out, _ = self.rnn(x)
        return self.fc(out[:, -1, :]).squeeze(1)

In [None]:
class GRUModel(nn.Module):
    def __init__(self, in_dim, hidden=64, num_layers=1, dropout=0.0, bidirectional=False):
        super().__init__()
        self.rnn = nn.GRU(in_dim, hidden, num_layers=num_layers, batch_first=True,
                          dropout=(dropout if num_layers>1 else 0.0), bidirectional=bidirectional)
        out_dim = hidden * (2 if bidirectional else 1)
        self.fc = nn.Linear(out_dim, 1)
    def forward(self, x):
        out, _ = self.rnn(x)
        return self.fc(out[:, -1, :]).squeeze(1)

In [None]:
class DilatedRNNCell(nn.Module):
    def __init__(self, in_dim, hidden):
        super().__init__()
        self.Wx = nn.Linear(in_dim, hidden)
        self.Wh = nn.Linear(hidden, hidden)
        self.act = nn.Tanh()
    def forward(self, x_t, h):
        return self.act(self.Wx(x_t) + self.Wh(h))

In [None]:
class DilatedRNN(nn.Module):
    def __init__(self, in_dim, hidden=64, dilation=2, dropout=0.0):
        super().__init__()
        self.cell = DilatedRNNCell(in_dim, hidden)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden, 1)
        self.d = max(1, int(dilation))
        self.hidden_size = hidden
    def forward(self, x):
        B, T, F = x.size()
        h = torch.zeros(B, self.hidden_size, device=x.device)
        for t in range(0, T, self.d):
            h = self.cell(x[:, t, :], h)
            h = self.dropout(h)
        return self.fc(h).squeeze(1)

In [None]:
class ClockworkRNN(nn.Module):
    def __init__(self, in_dim, hidden=60, modules=3, base_period=1, dropout=0.0):
        super().__init__()
        assert hidden % modules == 0, "hidden debe ser múltiplo de modules"
        self.modules = modules
        self.h_per = hidden // modules
        self.periods = [base_period * (2**m) for m in range(modules)]
        self.Wx = nn.ModuleList([nn.Linear(in_dim, self.h_per) for _ in range(modules)])
        self.Wh = nn.ModuleList([nn.Linear(self.h_per, self.h_per) for _ in range(modules)])
        self.act = nn.Tanh()
        self.dropout = nn.Dropout(dropout)
        self.fc  = nn.Linear(hidden, 1)
    def forward(self, x):
        B, T, F = x.size()
        hs = [torch.zeros(B, self.h_per, device=x.device) for _ in range(self.modules)]
        for t in range(T):
            for m in range(self.modules):
                if t % self.periods[m] == 0:
                    hs[m] = self.act(self.Wx[m](x[:, t, :]) + self.Wh[m](hs[m]))
                    hs[m] = self.dropout(hs[m])
        h_all = torch.cat(hs, dim=1)
        return self.fc(h_all).squeeze(1)

In [None]:
# class LSTMModel(nn.Module):
#     def __init__(self, in_dim, hidden=64, num_layers=1, dropout=0.0, bidirectional=False):
#         super().__init__()
#         self.rnn = nn.LSTM(in_dim, hidden, num_layers=num_layers, batch_first=True,
#                            dropout=(dropout if num_layers>1 else 0.0), bidirectional=bidirectional)
#         out_dim = hidden * (2 if bidirectional else 1)
#         self.fc = nn.Linear(out_dim, 1)
#     def forward(self, x):
#         out, _ = self.rnn(x)
#         return self.fc(out[:, -1, :]).squeeze(1)

# class GRUModel(nn.Module):
#     def __init__(self, in_dim, hidden=64, num_layers=1, dropout=0.0, bidirectional=False):
#         super().__init__()
#         self.rnn = nn.GRU(in_dim, hidden, num_layers=num_layers, batch_first=True,
#                           dropout=(dropout if num_layers>1 else 0.0), bidirectional=bidirectional)
#         out_dim = hidden * (2 if bidirectional else 1)
#         self.fc = nn.Linear(out_dim, 1)
#     def forward(self, x):
#         out, _ = self.rnn(x)
#         return self.fc(out[:, -1, :]).squeeze(1)

# # Dilated RNN sencilla: procesa cada d-ésimo paso
# class DilatedRNNCell(nn.Module):
#     def __init__(self, in_dim, hidden):
#         super().__init__()
#         self.Wx = nn.Linear(in_dim, hidden)
#         self.Wh = nn.Linear(hidden, hidden)
#         self.act = nn.Tanh()
#     def forward(self, x_t, h):
#         return self.act(self.Wx(x_t) + self.Wh(h))

# class DilatedRNN(nn.Module):
#     def __init__(self, in_dim, hidden=64, dilation=2, dropout=0.0):
#         super().__init__()
#         self.cell = DilatedRNNCell(in_dim, hidden)
#         self.dropout = nn.Dropout(dropout)
#         self.fc = nn.Linear(hidden, 1)
#         self.d = max(1, int(dilation))
#         self.hidden_size = hidden
#     def forward(self, x):
#         B, T, F = x.size()
#         h = torch.zeros(B, self.hidden_size, device=x.device)
#         for t in range(0, T, self.d):
#             h = self.cell(x[:, t, :], h)
#             h = self.dropout(h)
#         return self.fc(h).squeeze(1)
# class ClockworkRNN(nn.Module):
#     def __init__(self, in_dim, hidden=60, modules=3, base_period=1, dropout=0.0):
#         super().__init__()
#         assert hidden % modules == 0, "hidden debe ser múltiplo de modules"
#         self.modules = modules
#         self.h_per = hidden // modules
#         # periodos tipo 1, 2, 4... * base_period
#         self.periods = [base_period * (2**m) for m in range(modules)]
#         self.Wx = nn.ModuleList([nn.Linear(in_dim, self.h_per) for _ in range(modules)])
#         self.Wh = nn.ModuleList([nn.Linear(self.h_per, self.h_per) for _ in range(modules)])
#         self.act = nn.Tanh()
#         self.dropout = nn.Dropout(dropout)
#         self.fc  = nn.Linear(hidden, 1)
#     def forward(self, x):
#         B, T, F = x.size()
#         hs = [torch.zeros(B, self.h_per, device=x.device) for _ in range(self.modules)]
#         for t in range(T):
#             for m in range(self.modules):
#                 if t % self.periods[m] == 0:
#                     hs[m] = self.act(self.Wx[m](x[:, t, :]) + self.Wh[m](hs[m]))
#                     hs[m] = self.dropout(hs[m])
#         h_all = torch.cat(hs, dim=1)
#         return self.fc(h_all).squeeze(1)

#### Training

In [None]:
def train_torch_model(model, dl_train, dl_val, epochs=40, lr=1e-3, patience=6, device=DEVICE, trial=None, weight_decay=0.0):
    model = model.to(device)
    opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    sched = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, mode="min", factor=0.5, patience=2, verbose=False)
    loss_fn = nn.MSELoss()
    best_val, best_state, no_improve = float("inf"), None, 0

    for ep in range(1, epochs+1):
        model.train(); tr_losses=[]
        for xb, yb in dl_train:
            xb, yb = xb.to(device), yb.to(device)
            opt.zero_grad(); pred = model(xb)
            loss = loss_fn(pred, yb); loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            opt.step()
            tr_losses.append(loss.item())

        model.eval(); va_losses=[]
        with torch.no_grad():
            for xb, yb in dl_val:
                xb, yb = xb.to(device), yb.to(device)
                va_losses.append(loss_fn(model(xb), yb).item())
        val_mse = float(np.mean(va_losses))
        sched.step(val_mse)

        if trial is not None:
            trial.report(val_mse, ep)
            if trial.should_prune(): raise optuna.TrialPruned()

        if val_mse < best_val - 1e-6:
            best_val, best_state, no_improve = val_mse, model.state_dict(), 0
        else:
            no_improve += 1
            if no_improve >= patience: break

    if best_state is not None: model.load_state_dict(best_state)
    return model, best_val

In [None]:
def eval_sequence_model(model, dl, y_scaler, device=DEVICE):
    model.eval(); preds, trues = [], []
    with torch.no_grad():
        for xb, yb in dl:
            xb = xb.to(device)
            preds.append(model(xb).cpu().numpy())
            trues.append(yb.cpu().numpy())
    p = np.concatenate(preds); t = np.concatenate(trues)
    p_o = y_scaler.inverse_transform(p.reshape(-1,1)).ravel()
    t_o = y_scaler.inverse_transform(t.reshape(-1,1)).ravel()
    mae  = mean_absolute_error(t_o, p_o)
    rmse = math.sqrt(mean_squared_error(t_o, p_o))
    mape = np.mean(np.abs((t_o + 1e-6) - p_o) / (np.abs(t_o) + 1e-6)) * 100
    return {"MAE":mae, "RMSE":rmse, "MAPE":mape}, (t_o, p_o)

## Optuna

In [None]:
def _create_study(name):
    return optuna.create_study(direction="minimize", study_name=name, pruner=PRUNER, storage=OPTUNA_STORAGE, load_if_exists=bool(OPTUNA_STORAGE))

#### RF

In [None]:
def objective_rf(trial):
    rf = RandomForestRegressor(
        n_estimators      = trial.suggest_int("n_estimators", 200, 700),
        max_depth         = trial.suggest_int("max_depth", 6, 28),
        min_samples_split = trial.suggest_int("min_samples_split", 2, 20),
        min_samples_leaf  = trial.suggest_int("min_samples_leaf", 1, 10),
        n_jobs=-1, random_state=SEED
    )
    rf.fit(X_train, y_train)
    pred_val = rf.predict(X_val)
    p_o = y_scaler.inverse_transform(pred_val.reshape(-1,1)).ravel()
    t_o = y_scaler.inverse_transform(y_val.reshape(-1,1)).ravel()
    return math.sqrt(mean_squared_error(t_o, p_o))

#### RNN
(causals: bidirectional=False)

In [None]:
def objective_rnn_builder(model_kind="LSTM"):
    def _obj(trial):
        hidden  = trial.suggest_int("hidden", 64, 256, step=32)
        layers  = trial.suggest_int("num_layers", 1, 3)
        dropout = trial.suggest_float("dropout", 0.0, 0.5)
        bidir   = False  # causal para operación real
        lr      = trial.suggest_float("lr", 3e-4, 3e-3, log=True)
        steps   = trial.suggest_categorical("input_steps", [24, 36, 48, 60])
        horizon = trial.suggest_categorical("horizon_steps", [3, 6, 12])
        batch   = trial.suggest_categorical("batch", [128, 256, 512])
        wd      = trial.suggest_float("weight_decay", 0.0, 5e-4)  # regularización ligera

        dl_tr, dl_va = make_loaders_from_arrays(X_train, y_train, X_val, y_val, steps, horizon, batch=batch)
        in_dim = X_train.shape[1]
        model = LSTMModel(in_dim, hidden, layers, dropout, bidir) if model_kind=="LSTM" else GRUModel(in_dim, hidden, layers, dropout, bidir)

        _, best_val = train_torch_model(model, dl_tr, dl_va, epochs=EPOCHS, lr=lr, patience=PATIENCE, device=DEVICE, trial=trial, weight_decay=wd)
        return best_val
    return _obj

In [None]:
def objective_dilated(trial: optuna.Trial):
    hidden  = trial.suggest_int("hidden", 32, 192, step=32)
    dilation= trial.suggest_categorical("dilation", [1, 2, 3, 4, 6])
    dropout = trial.suggest_float("dropout", 0.0, 0.5)
    lr      = trial.suggest_float("lr", 3e-4, 3e-3, log=True)
    steps   = trial.suggest_categorical("input_steps", [24, 36, 48, 60])
    horizon = trial.suggest_categorical("horizon_steps", [3, 6, 12])
    batch   = trial.suggest_categorical("batch", [128, 256, 512])
    wd      = trial.suggest_float("weight_decay", 0.0, 5e-4)

    dl_tr, dl_va = make_loaders_from_arrays(X_train, y_train, X_val, y_val, steps, horizon, batch=batch)
    model = DilatedRNN(X_train.shape[1], hidden=hidden, dilation=dilation, dropout=dropout)
    _, best_val = train_torch_model(model, dl_tr, dl_va, epochs=EPOCHS, lr=lr, patience=PATIENCE, device=DEVICE, trial=trial, weight_decay=wd)
    return best_val

In [None]:
def objective_clockwork(trial: optuna.Trial):
    hidden  = trial.suggest_int("hidden", 60, 180, step=30)
    modules = trial.suggest_categorical("modules", [3, 4, 5])
    if hidden % modules != 0: raise optuna.TrialPruned()
    base_period = trial.suggest_categorical("base_period", [1, 2])
    dropout = trial.suggest_float("dropout", 0.0, 0.5)
    lr      = trial.suggest_float("lr", 3e-4, 3e-3, log=True)
    steps   = trial.suggest_categorical("input_steps", [24, 36, 48, 60])
    horizon = trial.suggest_categorical("horizon_steps", [3, 6, 12])
    batch   = trial.suggest_categorical("batch", [128, 256, 512])
    wd      = trial.suggest_float("weight_decay", 0.0, 5e-4)

    dl_tr, dl_va = make_loaders_from_arrays(X_train, y_train, X_val, y_val, steps, horizon, batch=batch)
    model = ClockworkRNN(X_train.shape[1], hidden=hidden, modules=modules, base_period=base_period, dropout=dropout)
    _, best_val = train_torch_model(model, dl_tr, dl_va, epochs=EPOCHS, lr=lr, patience=PATIENCE, device=DEVICE, trial=trial, weight_decay=wd)
    return best_val

### Run Studies

In [None]:
study_rf   = _create_study("RF_RMSE");      study_rf.optimize(objective_rf,                n_trials=N_TRIALS_RF)
study_lstm = _create_study("LSTM_MSEval");  study_lstm.optimize(objective_rnn_builder("LSTM"), n_trials=N_TRIALS_LSTM)
study_gru  = _create_study("GRU_MSEval");   study_gru.optimize(objective_rnn_builder("GRU"),  n_trials=N_TRIALS_GRU)
study_dil  = _create_study("DilatedRNN_MSEval"); study_dil.optimize(objective_dilated,     n_trials=N_TRIALS_DIL)
study_cw   = _create_study("ClockworkRNN_MSEval"); study_cw.optimize(objective_clockwork,  n_trials=N_TRIALS_CW)

## Best Model

In [None]:
print("Best LSTM      :", study_lstm.best_trial.params)
print("Best GRU       :", study_gru.best_trial.params)
print("Best Dilated   :", study_dil.best_trial.params)
print("Best Clockwork :", study_cw.best_trial.params)

Best LSTM      : {'hidden': 128, 'num_layers': 3, 'dropout': 0.01767602417948283, 'bidirectional': False, 'lr': 0.0015974153776496232, 'input_steps': 36, 'horizon_steps': 3, 'batch': 128}
Best GRU       : {'hidden': 64, 'num_layers': 3, 'dropout': 0.1717267069132688, 'bidirectional': True, 'lr': 0.0015539401909891695, 'input_steps': 36, 'horizon_steps': 3, 'batch': 128}
Best Dilated   : {'hidden': 64, 'dilation': 1, 'dropout': 0.28304300372038427, 'lr': 0.0005606575429489244, 'input_steps': 36, 'horizon_steps': 3, 'batch': 128}
Best Clockwork : {'hidden': 90, 'modules': 5, 'base_period': 2, 'dropout': 0.05935084995017281, 'lr': 0.0014238921494951263, 'input_steps': 24, 'horizon_steps': 3, 'batch': 128}


In [None]:
best_rf = RandomForestRegressor(random_state=SEED, n_jobs=-1, **study_rf.best_trial.params)
best_rf.fit(np.vstack([X_train, X_val]), np.concatenate([y_train, y_val]))
rf_opt_metrics, (y_true_rf_opt, y_pred_rf_opt) = metrics_from_scaled(best_rf.predict(X_test), y_test, y_scaler)

## Retraining

In [None]:
def retrain_and_test_build(build_fn, best_params, label):
    steps, horizon = best_params["input_steps"], best_params["horizon_steps"]
    batch = best_params["batch"]; lr = best_params["lr"]
    wd    = best_params.get("weight_decay", 0.0)
    dl_tr, dl_va, dl_te = make_loaders(
        np.vstack([X_train, X_val]), np.concatenate([y_train, y_val]),
        X_val, y_val, X_test, y_test, steps, horizon, batch
    )
    model = build_fn(best_params)
    model, best_val = train_torch_model(model, dl_tr, dl_va, epochs=EPOCHS, lr=lr, patience=PATIENCE, device=DEVICE, weight_decay=wd)
    torch.save(model.state_dict(), ART_DIR / f"best_{label}.pt")
    return eval_sequence_model(model, dl_te, y_scaler)

in_dim = X_train.shape[1]
build_lstm = lambda p: LSTMModel(in_dim, p["hidden"], p["num_layers"], p["dropout"], False)
build_gru  = lambda p: GRUModel(in_dim,  p["hidden"], p["num_layers"], p["dropout"], False)
build_dil  = lambda p: DilatedRNN(in_dim, p["hidden"], p["dilation"], p["dropout"])
build_cw   = lambda p: ClockworkRNN(in_dim, p["hidden"], p["modules"], p["base_period"], p["dropout"])

lstm_metrics, (yt_lstm, yp_lstm) = retrain_and_test_build(build_lstm, study_lstm.best_trial.params, "lstm")
gru_metrics,  (yt_gru,  yp_gru)  = retrain_and_test_build(build_gru,  study_gru.best_trial.params,  "gru")
dil_metrics,  (yt_dil,  yp_dil)  = retrain_and_test_build(build_dil,  study_dil.best_trial.params,  "dilated")
cw_metrics,   (yt_cw,   yp_cw)   = retrain_and_test_build(build_cw,   study_cw.best_trial.params,   "clockwork")


## Results

In [None]:
results = {
    "Persistence": pers_metrics,
    "LinearRegression": lin_metrics,
    "RandomForest_baseline": rf0_metrics,
    "RandomForest_Optuna": rf_opt_metrics,
    "LSTM_Optuna": lstm_metrics,
    "GRU_Optuna":  gru_metrics,
    "DilatedRNN_Optuna": dil_metrics,
    "ClockworkRNN_Optuna": cw_metrics,
}
res_df = pd.DataFrame(results).T.sort_values("RMSE")
display(res_df.round(3))

with open(ART_DIR/"tabular_results_optuna.json","w") as f:
    json.dump({k:{m:float(vv) for m,vv in v.items()} for k,v in results.items()}, f, indent=2)
print("Saved:", ART_DIR/"tabular_results_optuna.json")

Unnamed: 0,MAE,RMSE,MAPE
RandomForest_Optuna,51.439,93.832,510536500.0
RandomForest_baseline,53.156,98.827,504496100.0
ClockworkRNN_Optuna,68.803,105.409,189301900.0
DilatedRNN_Optuna,71.063,106.877,191465600.0
GRU_Optuna,62.026,112.381,37252140.0
LSTM_Optuna,77.717,143.675,179507000.0
LinearRegression,133.673,180.897,2010103000.0


In [24]:
with open(ART_DIR/"tabular_results_optuna.json","w") as f:
    json.dump({k:{m:float(vv) for m,vv in v.items()} for k,v in results.items()}, f, indent=2)
print("Saved:", ART_DIR/"tabular_results_optuna.json")

Saved: ..\outputs\artifacts\tabular_results_optuna.json


## Plots

In [None]:
def plot_sample(y_true, y_pred, title, n=1000):
    n = min(n, len(y_true))
    plt.figure(figsize=(11,3.8))
    plt.plot(y_true[:n], label="Real", lw=1.5)
    plt.plot(y_pred[:n], label="Pred", lw=1.2, alpha=0.9)
    plt.title(title); plt.xlabel("Time steps (10-min)"); plt.ylabel("GHI (W/m²)")
    plt.legend(frameon=False); plt.tight_layout()

plot_sample(y_true_rf_opt, y_pred_rf_opt, "RandomForest Optuna — Test (sample)")
plt.savefig(FIG_DIR / "pred_rf_opt_sample.png"); plt.show()

for name, (yt, yp) in {
    "LSTM": (yt_lstm, yp_lstm),
    "GRU":  (yt_gru,  yp_gru),
    "Dilated": (yt_dil, yp_dil),
    "Clockwork": (yt_cw, yp_cw),
}.items():
    plot_sample(yt, yp, f"{name} — Test (sample)")
    plt.savefig(FIG_DIR / f"pred_{name.lower()}_sample.png"); plt.show()