# Ground HPO with Optuna (MLP, LSTM, BiLSTM, CNN-LSTM, Transformer)

## Libraries

In [7]:
import os, numpy as np, pandas as pd, matplotlib.pyplot as plt
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers

import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
from optuna.integration import TFKerasPruningCallback

## Config

In [None]:
SEED = 42
np.random.seed(SEED); tf.random.set_seed(SEED)

DATA_DIR = Path("../data_processed")
OUT_DIR  = Path("../models"); OUT_DIR.mkdir(parents=True, exist_ok=True)
STUDY_DIR= Path("../models/optuna_studies"); STUDY_DIR.mkdir(parents=True, exist_ok=True)

TRAIN_PQ = DATA_DIR / "ground_train_h6.parquet"
VAL_PQ   = DATA_DIR / "ground_val_h6.parquet"
TEST_PQ  = DATA_DIR / "ground_test_h6.parquet"
TARGET   = "y_ghi_h6"

### Data loading and preprocessing

In [9]:
train = pd.read_parquet(TRAIN_PQ).sort_index()
val   = pd.read_parquet(VAL_PQ).sort_index()
test  = pd.read_parquet(TEST_PQ).sort_index()
assert TARGET in train and TARGET in val and TARGET in test

feat_cols = sorted(list(set(train.columns) & set(val.columns) & set(test.columns) - {TARGET}))
feat_cols = [c for c in feat_cols if pd.api.types.is_numeric_dtype(train[c])]
Xtr_df, ytr = train[feat_cols], train[TARGET]
Xva_df, yva = val[feat_cols],   val[TARGET]
Xte_df, yte = test[feat_cols],  test[TARGET]

scaler = StandardScaler()
Xtr = scaler.fit_transform(Xtr_df)
Xva = scaler.transform(Xva_df)
Xte = scaler.transform(Xte_df)

In [10]:
def _rmse(a,b): return float(np.sqrt(mean_squared_error(a,b)))

## Baseline

In [11]:
base_src = None
for c in ["ghi_qc","ghi_sg_definitive","ghi_qc_lag1"]:
    if c in test.columns: base_src = test[c]; break
if base_src is None:
    base_src = pd.Series(np.nanmedian(ytr), index=test.index)
y_base = base_src.to_numpy()
print(f"Baseline → RMSE: {_rmse(yte, y_base):.4f} | MAE: {mean_absolute_error(yte, y_base):.4f}")

Baseline → RMSE: 196.2835 | MAE: 102.1871


## Track A - MLP

In [12]:
def objective_mlp(trial: optuna.Trial) -> float:
    n1  = trial.suggest_int("n1", 64, 512, step=64)
    n2  = trial.suggest_int("n2", 32, max(64, n1//2), step=32)
    do1 = trial.suggest_float("do1", 0.0, 0.5)
    do2 = trial.suggest_float("do2", 0.0, 0.5)
    lr  = trial.suggest_float("lr", 1e-4, 5e-3, log=True)
    l2w = trial.suggest_float("l2", 1e-8, 1e-3, log=True)
    act = trial.suggest_categorical("act", ["relu","selu","gelu"])
    bs  = trial.suggest_categorical("batch", [64, 128, 256, 512])
    eps = trial.suggest_int("epochs", 40, 150)

    model = models.Sequential([
        layers.Input(shape=(Xtr.shape[1],)),
        layers.Dense(n1, activation=act, kernel_regularizer=regularizers.l2(l2w)),
        layers.Dropout(do1),
        layers.Dense(n2, activation=act, kernel_regularizer=regularizers.l2(l2w)),
        layers.Dropout(do2),
        layers.Dense(1)
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  loss="mse", metrics=["mae"])

    es = callbacks.EarlyStopping(monitor="val_loss", patience=12, restore_best_weights=True, verbose=0)
    rlr = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=6, min_lr=1e-5, verbose=0)
    prune_cb = TFKerasPruningCallback(trial, "val_loss")

    model.fit(Xtr, ytr, validation_data=(Xva, yva),
              epochs=eps, batch_size=bs, verbose=0,
              callbacks=[es, rlr, prune_cb])

    yhat = model.predict(Xva, verbose=0).squeeze()
    val_rmse = _rmse(yva, yhat)
    tmp_path = OUT_DIR / f"optuna_mlp_trial{trial.number}.keras"
    #model.save(tmp_path)
    trial.set_user_attr("model_path", str(tmp_path))
    return val_rmse


In [13]:

storageA = f"sqlite:///{(STUDY_DIR / 'ground_trackA_mlp.sqlite').absolute()}"
studyA = optuna.create_study(direction="minimize",
                             sampler=TPESampler(seed=SEED),
                             pruner=MedianPruner(n_startup_trials=8, n_warmup_steps=5),
                             study_name="ground_trackA_mlp",
                             storage=storageA, load_if_exists=True)
print("Running Study A (MLP)…")
studyA.optimize(objective_mlp, n_trials=40, show_progress_bar=True)

bestA_path = studyA.best_trial.user_attrs["model_path"]
best_mlp = tf.keras.models.load_model(bestA_path)
yhatA = best_mlp.predict(Xte, verbose=0).squeeze()
print("Best MLP params:", studyA.best_trial.params)
print(f"MLP test → RMSE: {_rmse(yte, yhatA):.4f} | MAE: {mean_absolute_error(yte, yhatA):.4f} | R2: {r2_score(yte, yhatA):.4f}")
print(f"Skill (MLP vs base): {1.0 - (_rmse(yte, yhatA)/_rmse(yte, y_base)):.3f}")

OperationalError: (sqlite3.OperationalError) database is locked
[SQL: 
CREATE TABLE studies (
	study_id INTEGER NOT NULL, 
	study_name VARCHAR(512) NOT NULL, 
	PRIMARY KEY (study_id)
)

]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

## Track B - Sequentials

### Helper

In [None]:
Xtr_s = pd.DataFrame(Xtr, index=Xtr_df.index, columns=feat_cols)
Xva_s = pd.DataFrame(Xva, index=Xva_df.index, columns=feat_cols)
Xte_s = pd.DataFrame(Xte, index=Xte_df.index, columns=feat_cols)

# def _build_seq(X_df, y_ser, L):
#     Xv, yv = X_df.values, y_ser.values
#     xs, ys = [], []
#     for i in range(L-1, len(X_df)):
#         block = Xv[i-L+1:i+1]
#         if np.isnan(block).any():
#             continue
#         xs.append(block); ys.append(yv[i])
#     return np.asarray(xs, dtype="float32"), np.asarray(ys, dtype="float32")

def build_seq_with_idx(X_df, y_ser, L):
    Xv, yv = X_df.values, y_ser.values
    xs, ys, idx = [], [], []
    for i in range(L-1, len(X_df)):
        block = Xv[i-L+1:i+1]
        if np.isnan(block).any():
            continue
        xs.append(block); ys.append(yv[i]); idx.append(X_df.index[i])
    return (np.asarray(xs, dtype="float32"),
            np.asarray(ys, dtype="float32"),
            pd.DatetimeIndex(idx))

### LSTM

In [None]:
def objective_lstm(trial: optuna.Trial) -> float:
    L   = trial.suggest_categorical("seq_len", [6, 12, 18, 24])
    u   = trial.suggest_int("units", 32, 128, step=32)
    do  = trial.suggest_float("dropout", 0.0, 0.4)
    lr  = trial.suggest_float("lr", 5e-5, 5e-3, log=True)
    bs  = trial.suggest_categorical("batch", [64, 128, 256])
    eps = trial.suggest_int("epochs", 40, 120)

    Xtr_seq, ytr_seq = _build_seq(Xtr_s, ytr, L)
    Xva_seq, yva_seq = _build_seq(Xva_s, yva, L)
    if min(map(len,[Xtr_seq, Xva_seq])) == 0: raise optuna.TrialPruned()

    model = models.Sequential([layers.Input(shape=(L, Xtr_seq.shape[2])),
                               layers.LSTM(u, dropout=do),
                               layers.Dense(1)])
    model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss="mse", metrics=["mae"])
    es = callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True, verbose=0)
    prune_cb = TFKerasPruningCallback(trial, "val_loss")
    model.fit(Xtr_seq, ytr_seq, validation_data=(Xva_seq, yva_seq),
              epochs=eps, batch_size=bs, verbose=0, callbacks=[es, prune_cb])

    yhat = model.predict(Xva_seq, verbose=0).squeeze()
    val_rmse = _rmse(yva_seq, yhat)
    tmp_path = OUT_DIR / f"optuna_lstm_trial{trial.number}.keras"
    #model.save(tmp_path)
    trial.set_user_attr("model_path", str(tmp_path))
    trial.set_user_attr("seq_len_used", L)
    return val_rmse

In [None]:
storageB1 = f"sqlite:///{(STUDY_DIR / 'ground_trackB_lstm.sqlite').absolute()}"
studyB1 = optuna.create_study(direction="minimize",
                              sampler=TPESampler(seed=SEED),
                              pruner=MedianPruner(n_startup_trials=8, n_warmup_steps=5),
                              study_name="ground_trackB_lstm",
                              storage=storageB1, load_if_exists=True)
print("Running Study B1 (LSTM)…")
studyB1.optimize(objective_lstm, n_trials=40, show_progress_bar=True)

bestB1_path = studyB1.best_trial.user_attrs["model_path"]
bestL1      = studyB1.best_trial.user_attrs["seq_len_used"]
best_lstm   = tf.keras.models.load_model(bestB1_path)
Xte_seq, yte_seq = _build_seq(Xte_s, yte, bestL1)
yhatB1 = best_lstm.predict(Xte_seq, verbose=0).squeeze()
y_base_seq = pd.Series(y_base, index=Xte_df.index).iloc[bestL1-1:].to_numpy()[:len(yte_seq)]
print("Best LSTM params:", studyB1.best_trial.params | {"seq_len": bestL1})
print(f"LSTM test → RMSE: {_rmse(yte_seq, yhatB1):.4f} | MAE: {mean_absolute_error(yte_seq, yhatB1):.4f} | R2: {r2_score(yte_seq, yhatB1):.4f} | Skill: {1.0 - (_rmse(yte_seq, yhatB1)/_rmse(yte_seq, y_base_seq)):.3f}")


### BiLSTM

In [None]:
def objective_bilstm(trial: optuna.Trial) -> float:
    L   = trial.suggest_categorical("seq_len", [6, 12, 18, 24])
    u   = trial.suggest_int("units", 32, 128, step=32)
    do  = trial.suggest_float("dropout", 0.0, 0.4)
    lr  = trial.suggest_float("lr", 5e-5, 5e-3, log=True)
    bs  = trial.suggest_categorical("batch", [64, 128, 256])
    eps = trial.suggest_int("epochs", 40, 120)

    Xtr_seq, ytr_seq = _build_seq(Xtr_s, ytr, L)
    Xva_seq, yva_seq = _build_seq(Xva_s, yva, L)
    if min(map(len,[Xtr_seq, Xva_seq])) == 0: raise optuna.TrialPruned()

    model = models.Sequential([
        layers.Input(shape=(L, Xtr_seq.shape[2])),
        layers.Bidirectional(layers.LSTM(u, dropout=do)),
        layers.Dense(1)
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss="mse", metrics=["mae"])
    es = callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True, verbose=0)
    prune_cb = TFKerasPruningCallback(trial, "val_loss")
    model.fit(Xtr_seq, ytr_seq, validation_data=(Xva_seq, yva_seq),
              epochs=eps, batch_size=bs, verbose=0, callbacks=[es, prune_cb])

    yhat = model.predict(Xva_seq, verbose=0).squeeze()
    val_rmse = _rmse(yva_seq, yhat)
    tmp_path = OUT_DIR / f"optuna_bilstm_trial{trial.number}.keras"
    #model.save(tmp_path)
    trial.set_user_attr("model_path", str(tmp_path))
    trial.set_user_attr("seq_len_used", L)
    return val_rmse

In [None]:
storageB2 = f"sqlite:///{(STUDY_DIR / 'ground_trackB_bilstm.sqlite').absolute()}"
studyB2 = optuna.create_study(direction="minimize",
                              sampler=TPESampler(seed=SEED),
                              pruner=MedianPruner(n_startup_trials=8, n_warmup_steps=5),
                              study_name="ground_trackB_bilstm",
                              storage=storageB2, load_if_exists=True)
print("Running Study B2 (BiLSTM)…")
studyB2.optimize(objective_bilstm, n_trials=35, show_progress_bar=True)

bestB2_path = studyB2.best_trial.user_attrs["model_path"]
bestL2      = studyB2.best_trial.user_attrs["seq_len_used"]
best_bi     = tf.keras.models.load_model(bestB2_path)
Xte_seq, yte_seq = _build_seq(Xte_s, yte, bestL2)
yhatB2 = best_bi.predict(Xte_seq, verbose=0).squeeze()
y_base_seq = pd.Series(y_base, index=Xte_df.index).iloc[bestL2-1:].to_numpy()[:len(yte_seq)]
print("Best BiLSTM params:", studyB2.best_trial.params | {"seq_len": bestL2})
print(f"BiLSTM test → RMSE: {_rmse(yte_seq, yhatB2):.4f} | MAE: {mean_absolute_error(yte_seq, yhatB2):.4f} | R2: {r2_score(yte_seq, yhatB2):.4f} | Skill: {1.0 - (_rmse(yte_seq, yhatB2)/_rmse(yte_seq, y_base_seq)):.3f}")


### CNN-LSTM

In [None]:
def objective_cnnlstm(trial: optuna.Trial) -> float:
    L     = trial.suggest_categorical("seq_len", [6, 12, 18, 24])
    filt  = trial.suggest_int("filters", 16, 64, step=16)
    ksz   = trial.suggest_categorical("kernel_size", [2,3,5])
    pool  = trial.suggest_categorical("pool", [1,2])
    u     = trial.suggest_int("lstm_units", 32, 128, step=32)
    do    = trial.suggest_float("dropout", 0.0, 0.4)
    lr    = trial.suggest_float("lr", 5e-5, 5e-3, log=True)
    bs    = trial.suggest_categorical("batch", [64, 128, 256])
    eps   = trial.suggest_int("epochs", 40, 120)

    Xtr_seq, ytr_seq = _build_seq(Xtr_s, ytr, L)
    Xva_seq, yva_seq = _build_seq(Xva_s, yva, L)
    if min(map(len,[Xtr_seq, Xva_seq])) == 0: raise optuna.TrialPruned()

    model = models.Sequential([
        layers.Input(shape=(L, Xtr_seq.shape[2])),
        layers.Conv1D(filt, kernel_size=ksz, padding="causal", activation="relu"),
        layers.MaxPooling1D(pool_size=pool) if pool>1 else layers.Lambda(lambda z: z),
        layers.LSTM(u, dropout=do),
        layers.Dense(1)
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss="mse", metrics=["mae"])
    es = callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True, verbose=0)
    prune_cb = TFKerasPruningCallback(trial, "val_loss")
    model.fit(Xtr_seq, ytr_seq, validation_data=(Xva_seq, yva_seq),
              epochs=eps, batch_size=bs, verbose=0, callbacks=[es, prune_cb])

    yhat = model.predict(Xva_seq, verbose=0).squeeze()
    val_rmse = _rmse(yva_seq, yhat)
    tmp_path = OUT_DIR / f"optuna_cnnlstm_trial{trial.number}.keras"
    #model.save(tmp_path)
    trial.set_user_attr("model_path", str(tmp_path))
    trial.set_user_attr("seq_len_used", L)
    return val_rmse

In [None]:
storageB3 = f"sqlite:///{(STUDY_DIR / 'ground_trackB_cnnlstm.sqlite').absolute()}"
studyB3 = optuna.create_study(direction="minimize",
                              sampler=TPESampler(seed=SEED),
                              pruner=MedianPruner(n_startup_trials=8, n_warmup_steps=5),
                              study_name="ground_trackB_cnnlstm",
                              storage=storageB3, load_if_exists=True)
print("Running Study B3 (CNN-LSTM)…")
studyB3.optimize(objective_cnnlstm, n_trials=35, show_progress_bar=True)

bestB3_path = studyB3.best_trial.user_attrs["model_path"]
bestL3      = studyB3.best_trial.user_attrs["seq_len_used"]
best_cnn    = tf.keras.models.load_model(bestB3_path)
Xte_seq, yte_seq = _build_seq(Xte_s, yte, bestL3)
yhatB3 = best_cnn.predict(Xte_seq, verbose=0).squeeze()
y_base_seq = pd.Series(y_base, index=Xte_df.index).iloc[bestL3-1:].to_numpy()[:len(yte_seq)]
print("Best CNN-LSTM params:", studyB3.best_trial.params | {"seq_len": bestL3})
print(f"CNN-LSTM test → RMSE: {_rmse(yte_seq, yhatB3):.4f} | MAE: {mean_absolute_error(yte_seq, yhatB3):.4f} | R2: {r2_score(yte_seq, yhatB3):.4f} | Skill: {1.0 - (_rmse(yte_seq, yhatB3)/_rmse(yte_seq, y_base_seq)):.3f}")


### Transformer

In [None]:
def objective_transformer(trial: optuna.Trial) -> float:
    L       = trial.suggest_categorical("seq_len", [6, 12, 18, 24])
    d_model = trial.suggest_categorical("d_model", [32, 64, 96, 128])
    heads   = trial.suggest_categorical("heads", [2, 4, 8])
    if d_model % heads != 0:  # asegurar divisibilidad
        raise optuna.TrialPruned()

    ff_dim  = trial.suggest_categorical("ff_dim", [64, 96, 128, 192])
    att_do  = trial.suggest_float("att_dropout", 0.0, 0.3)
    do      = trial.suggest_float("dropout", 0.0, 0.4)
    lr      = trial.suggest_float("lr", 5e-5, 5e-3, log=True)
    bs      = trial.suggest_categorical("batch", [64, 128, 256])
    eps     = trial.suggest_int("epochs", 40, 120)

    Xtr_seq, ytr_seq = _build_seq(Xtr_s, ytr, L)
    Xva_seq, yva_seq = _build_seq(Xva_s, yva, L)
    if min(map(len,[Xtr_seq, Xva_seq])) == 0: raise optuna.TrialPruned()

    inp = layers.Input(shape=(L, Xtr_seq.shape[2]))
    x   = layers.Dense(d_model)(inp)  # proyección a d_model
    x2  = layers.MultiHeadAttention(num_heads=heads, key_dim=d_model//heads, dropout=att_do)(x, x)
    x   = layers.Add()([x, x2]); x = layers.LayerNormalization()(x)
    ff  = layers.Dense(ff_dim, activation="relu")(x)
    ff  = layers.Dense(d_model)(ff)
    x   = layers.Add()([x, ff]); x = layers.LayerNormalization()(x)
    x   = layers.GlobalAveragePooling1D()(x)
    x   = layers.Dropout(do)(x)
    out = layers.Dense(1)(x)
    model = models.Model(inp, out)

    model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss="mse", metrics=["mae"])
    es = callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True, verbose=0)
    prune_cb = TFKerasPruningCallback(trial, "val_loss")
    model.fit(Xtr_seq, ytr_seq, validation_data=(Xva_seq, yva_seq),
              epochs=eps, batch_size=bs, verbose=0, callbacks=[es, prune_cb])

    yhat = model.predict(Xva_seq, verbose=0).squeeze()
    val_rmse = _rmse(yva_seq, yhat)
    tmp_path = OUT_DIR / f"optuna_transformer_trial{trial.number}.keras"
    #model.save(tmp_path)
    trial.set_user_attr("model_path", str(tmp_path))
    trial.set_user_attr("seq_len_used", L)
    return val_rmse

In [None]:
storageB4 = f"sqlite:///{(STUDY_DIR / 'ground_trackB_transformer.sqlite').absolute()}"
studyB4 = optuna.create_study(direction="minimize",
                              sampler=TPESampler(seed=SEED),
                              pruner=MedianPruner(n_startup_trials=8, n_warmup_steps=5),
                              study_name="ground_trackB_transformer",
                              storage=storageB4, load_if_exists=True)
print("Running Study B4 (Transformer)…")
studyB4.optimize(objective_transformer, n_trials=40, show_progress_bar=True)

bestB4_path = studyB4.best_trial.user_attrs["model_path"]
bestL4      = studyB4.best_trial.user_attrs["seq_len_used"]
best_tr     = tf.keras.models.load_model(bestB4_path)
Xte_seq, yte_seq = _build_seq(Xte_s, yte, bestL4)
yhatB4 = best_tr.predict(Xte_seq, verbose=0).squeeze()
y_base_seq = pd.Series(y_base, index=Xte_df.index).iloc[bestL4-1:].to_numpy()[:len(yte_seq)]
print("Best Transformer params:", studyB4.best_trial.params | {"seq_len": bestL4})
print(f"Transformer test → RMSE: {_rmse(yte_seq, yhatB4):.4f} | MAE: {mean_absolute_error(yte_seq, yhatB4):.4f} | R2: {r2_score(yte_seq, yhatB4):.4f} | Skill: {1.0 - (_rmse(yte_seq, yhatB4)/_rmse(yte_seq, y_base_seq)):.3f}")


## Best

In [None]:
best_params = {
    "MLP":        studyA.best_trial.params,
    "LSTM":       studyB1.best_trial.params | {"seq_len": studyB1.best_trial.user_attrs["seq_len_used"]},
    "BiLSTM":     studyB2.best_trial.params | {"seq_len": studyB2.best_trial.user_attrs["seq_len_used"]},
    "CNN_LSTM":   studyB3.best_trial.params | {"seq_len": studyB3.best_trial.user_attrs["seq_len_used"]},
    "Transformer":studyB4.best_trial.params | {"seq_len": studyB4.best_trial.user_attrs["seq_len_used"]},
}
pd.Series(best_params, dtype="object").to_json(OUT_DIR / "best_hpo_params_all.json")
print("Saved params →", OUT_DIR / "best_hpo_params_all.json")

## Visualization

In [None]:
models_info = {
    "MLP": {
        "type": "tabular",
        "model": best_mlp,          # del estudio A
    },
    "LSTM": {
        "type": "seq",
        "model": best_lstm,         # del estudio B1
        "L": bestL1
    },
    "BiLSTM": {
        "type": "seq",
        "model": best_bi,           # del estudio B2
        "L": bestL2
    },
    "CNN-LSTM": {
        "type": "seq",
        "model": best_cnn,          # del estudio B3
        "L": bestL3
    },
    "Transformer": {
        "type": "seq",
        "model": best_tr,           # del estudio B4
        "L": bestL4
    }
}

In [None]:
OUT_FIG = OUT_DIR  

rows = []

for name, cfg in models_info.items():
    print(f"\n=== {name} ===")
    if cfg["type"] == "tabular":
        # Predicciones directas en el set de test tabular
        y_true = yte
        y_pred = cfg["model"].predict(Xte, verbose=0).squeeze()
        idx    = Xte_df.index
        # Baseline alineado
        y_base_aligned = y_base
    else:
        # Secuencial: reconstruir ventanas con la mejor L
        L = int(cfg["L"])
        X_seq, y_seq, idx = build_seq_with_idx(Xte_s, yte, L)
        if len(X_seq) == 0:
            print("No hay secuencias válidas (NaNs). Se omite.")
            continue
        y_true = y_seq
        y_pred = cfg["model"].predict(X_seq, verbose=0).squeeze()
        # Baseline reindexado a los timestamps válidos
        y_base_aligned = pd.Series(y_base, index=Xte_df.index).reindex(idx).to_numpy()

    # Métricas
    rmse = _rmse(y_true, y_pred)
    mae  = mean_absolute_error(y_true, y_pred)
    r2   = r2_score(y_true, y_pred)
    skill = 1.0 - (rmse / _rmse(y_true, y_base_aligned))
    print(f"RMSE={rmse:.4f} | MAE={mae:.4f} | R2={r2:.4f} | Skill vs baseline={skill:.3f}")

    rows.append({"model": name, "RMSE": rmse, "MAE": mae, "R2": r2, "Skill": skill})

    # ---------- PLOTS (tres tipos por modelo) ----------
    N = min(400, len(y_true))  # recorte para visualización

    # 1) Serie temporal: verdad vs predicción vs baseline
    plt.figure(figsize=(12, 3.6))
    plt.plot(idx[:N], y_true[:N], label="truth", lw=1.4)
    plt.plot(idx[:N], y_pred[:N], label=name, lw=1.1)
    plt.plot(idx[:N], y_base_aligned[:N], label="baseline", lw=1.0, alpha=0.7)
    plt.title(f"Test — Truth vs {name} vs Baseline ({TARGET})")
    plt.ylabel("GHI (W/m²)" if TARGET.startswith("y_ghi") else "k-index")
    plt.xlabel("Time")
    plt.grid(True, ls="--", alpha=0.3); plt.legend(); plt.xticks(rotation=45); plt.tight_layout()
#    plt.savefig(OUT_FIG / f"{name}_ts_test.png", dpi=140)
    plt.show()

    # 2) Dispersión: y_true vs y_pred
    lim_min = float(min(np.min(y_true), np.min(y_pred)))
    lim_max = float(max(np.max(y_true), np.max(y_pred)))
    plt.figure(figsize=(4.8, 4.8))
    plt.scatter(y_true, y_pred, s=10, alpha=0.5)
    plt.plot([lim_min, lim_max], [lim_min, lim_max], 'r--', lw=1.0)
    plt.xlabel("Actual"); plt.ylabel("Predicted")
    plt.title(f"{name} — Actual vs Predicted\nRMSE={rmse:.3f} MAE={mae:.3f} R2={r2:.3f}")
    plt.grid(True, ls="--", alpha=0.3); plt.tight_layout()
    plt.savefig(OUT_FIG / f"{name}_scatter.png", dpi=140)
#    plt.show()

    # 3) Histograma de residuales
    resid = y_pred - y_true
    plt.figure(figsize=(6, 3.2))
    plt.hist(resid, bins=50, alpha=0.85)
    plt.axvline(0, color='r', ls='--', lw=1)
    plt.title(f"{name} — Residuals (mean={np.mean(resid):.3f})")
    plt.xlabel("Residual"); plt.ylabel("Frequency")
    plt.grid(True, ls="--", alpha=0.3); plt.tight_layout()
#    plt.savefig(OUT_F