# Ground HPO with Optuna (MLP, LSTM, BiLSTM, CNN-LSTM, Transformer)

## Libraries

In [1]:
import os, numpy as np, pandas as pd, matplotlib.pyplot as plt
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, regularizers

import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
from optuna.integration import TFKerasPruningCallback
from optuna.storages import JournalStorage
from optuna.storages import JournalFileStorage, JournalFileOpenLock
from pathlib import Path

2025-09-23 10:57:40.483062: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-09-23 10:57:40.488697: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758643060.495306   39037 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758643060.497519   39037 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-09-23 10:57:40.505082: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

## Config

In [2]:
SEED = 42
np.random.seed(SEED); tf.random.set_seed(SEED)

DATA_DIR = Path("../data_processed")
OUT_DIR  = Path("../models"); OUT_DIR.mkdir(parents=True, exist_ok=True)
# STUDY_DIR= Path("../models/optuna_studies"); STUDY_DIR.mkdir(parents=True, exist_ok=True)
STUDY_DIR= Path("../optuna_studies"); STUDY_DIR.mkdir(parents=True, exist_ok=True)


TRAIN_PQ = DATA_DIR / "ground_train_h6.parquet"
VAL_PQ   = DATA_DIR / "ground_val_h6.parquet"
TEST_PQ  = DATA_DIR / "ground_test_h6.parquet"
TARGET   = "y_ghi_h6"

In [3]:
import os
print(f"Directorio existe: {STUDY_DIR.exists()}")
print(f"Permisos de escritura: {os.access(STUDY_DIR, os.W_OK)}")

Directorio existe: True
Permisos de escritura: True


### Data loading and preprocessing

In [4]:
train = pd.read_parquet(TRAIN_PQ).sort_index()
val   = pd.read_parquet(VAL_PQ).sort_index()
test  = pd.read_parquet(TEST_PQ).sort_index()
assert TARGET in train and TARGET in val and TARGET in test

feat_cols = sorted(list(set(train.columns) & set(val.columns) & set(test.columns) - {TARGET}))
feat_cols = [c for c in feat_cols if pd.api.types.is_numeric_dtype(train[c])]
Xtr_df, ytr = train[feat_cols], train[TARGET]
Xva_df, yva = val[feat_cols],   val[TARGET]
Xte_df, yte = test[feat_cols],  test[TARGET]

scaler = StandardScaler()
Xtr = scaler.fit_transform(Xtr_df)
Xva = scaler.transform(Xva_df)
Xte = scaler.transform(Xte_df)

In [5]:
def _rmse(a,b): return float(np.sqrt(mean_squared_error(a,b)))

## Baseline

In [6]:
base_src = None
for c in ["ghi_qc","ghi_sg_definitive","ghi_qc_lag1"]:
    if c in test.columns: base_src = test[c]; break
if base_src is None:
    base_src = pd.Series(np.nanmedian(ytr), index=test.index)
y_base = base_src.to_numpy()
print(f"Baseline → RMSE: {_rmse(yte, y_base):.4f} | MAE: {mean_absolute_error(yte, y_base):.4f}")

Baseline → RMSE: 196.2835 | MAE: 102.1871


## Track A - MLP

In [7]:
def objective_mlp(trial: optuna.Trial) -> float:
    n1  = trial.suggest_int("n1", 64, 512, step=64)
    n2  = trial.suggest_int("n2", 32, max(64, n1//2), step=32)
    do1 = trial.suggest_float("do1", 0.0, 0.5)
    do2 = trial.suggest_float("do2", 0.0, 0.5)
    lr  = trial.suggest_float("lr", 1e-4, 5e-3, log=True)
    l2w = trial.suggest_float("l2", 1e-8, 1e-3, log=True)
    act = trial.suggest_categorical("act", ["relu","selu","gelu"])
    bs  = trial.suggest_categorical("batch", [64, 128, 256, 512])
    eps = trial.suggest_int("epochs", 40, 150)

    model = models.Sequential([
        layers.Input(shape=(Xtr.shape[1],)),
        layers.Dense(n1, activation=act, kernel_regularizer=regularizers.l2(l2w)),
        layers.Dropout(do1),
        layers.Dense(n2, activation=act, kernel_regularizer=regularizers.l2(l2w)),
        layers.Dropout(do2),
        layers.Dense(1)
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  loss="mse", metrics=["mae"])

    es = callbacks.EarlyStopping(monitor="val_loss", patience=12, restore_best_weights=True, verbose=0)
    rlr = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=6, min_lr=1e-5, verbose=0)
    prune_cb = TFKerasPruningCallback(trial, "val_loss")

    model.fit(Xtr, ytr, validation_data=(Xva, yva),
              epochs=eps, batch_size=bs, verbose=0,
              callbacks=[es, rlr, prune_cb])

    yhat = model.predict(Xva, verbose=0).squeeze()
    val_rmse = _rmse(yva, yhat)
    tmp_path = OUT_DIR / f"optuna_mlp_trial{trial.number}.keras"
    #model.save(tmp_path)
    trial.set_user_attr("model_path", str(tmp_path))
    return val_rmse


In [8]:
study_name = "ground_trackA_mlp"
log_path   = STUDY_DIR / f"{study_name}.log"
lock_path  = STUDY_DIR / f"{study_name}.lock"

try:
    lock_path.unlink()
except FileNotFoundError:
    pass

file_storage = JournalFileStorage(
    str(log_path),
    lock_obj=JournalFileOpenLock(str(lock_path))
)
storage = JournalStorage(file_storage)

studyA = optuna.create_study(direction="minimize",
                             sampler=TPESampler(seed=SEED),
                             pruner=MedianPruner(n_startup_trials=8, n_warmup_steps=5),
                             study_name="ground_trackA_mlp",
                             storage=storage, load_if_exists=True)
print("Running Study A (MLP)…")
studyA.optimize(objective_mlp, n_trials=40, show_progress_bar=True)

bestA_path = studyA.best_trial.user_attrs["model_path"]
best_mlp = tf.keras.models.load_model(bestA_path)
yhatA = best_mlp.predict(Xte, verbose=0).squeeze()
print("Best MLP params:", studyA.best_trial.params)
print(f"MLP test → RMSE: {_rmse(yte, yhatA):.4f} | MAE: {mean_absolute_error(yte, yhatA):.4f} | R2: {r2_score(yte, yhatA):.4f}")
print(f"Skill (MLP vs base): {1.0 - (_rmse(yte, yhatA)/_rmse(yte, y_base)):.3f}")

  lock_obj=JournalFileOpenLock(str(lock_path))
  file_storage = JournalFileStorage(
[I 2025-09-23 10:57:42,774] A new study created in Journal with name: ground_trackA_mlp


Running Study A (MLP)…


  0%|          | 0/40 [00:00<?, ?it/s]

2025-09-23 10:57:42.887883: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: forward compatibility was attempted on non supported HW
2025-09-23 10:57:42.887917: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:137] retrieving CUDA diagnostic information for host: solarlivinglabx
2025-09-23 10:57:42.887920: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:144] hostname: solarlivinglabx
2025-09-23 10:57:42.887998: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:168] libcuda reported version is: 580.65.6
2025-09-23 10:57:42.888010: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:172] kernel reported version is: 575.64.3
2025-09-23 10:57:42.888013: E external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:262] kernel version 575.64.3 does not match DSO version 580.65.6 -- cannot find wor

[I 2025-09-23 10:58:02,206] Trial 0 finished with value: 131.05627816657622 and parameters: {'n1': 192, 'n2': 96, 'do1': 0.36599697090570255, 'do2': 0.2993292420985183, 'lr': 0.00018410729205738696, 'l2': 6.02521573620385e-08, 'act': 'selu', 'batch': 256, 'epochs': 63}. Best is trial 0 with value: 131.05627816657622.


Best trial: 1. Best value: 130.579:   5%|▌         | 2/40 [00:36<11:31, 18.19s/it]

[I 2025-09-23 10:58:19,531] Trial 1 finished with value: 130.57891681134822 and parameters: {'n1': 128, 'n2': 32, 'do1': 0.15212112147976886, 'do2': 0.2623782158161189, 'lr': 0.0005418282319533242, 'l2': 2.8585493941961875e-07, 'act': 'relu', 'batch': 256, 'epochs': 97}. Best is trial 1 with value: 130.57891681134822.


Best trial: 1. Best value: 130.579:   8%|▊         | 3/40 [01:11<15:48, 25.65s/it]

[I 2025-09-23 10:58:54,049] Trial 2 finished with value: 130.7351210463355 and parameters: {'n1': 320, 'n2': 32, 'do1': 0.3037724259507192, 'do2': 0.08526206184364576, 'lr': 0.00012897950480855554, 'l2': 0.000555172168524472, 'act': 'relu', 'batch': 128, 'epochs': 94}. Best is trial 1 with value: 130.57891681134822.


Best trial: 3. Best value: 130.531:  10%|█         | 4/40 [01:31<14:09, 23.59s/it]

[I 2025-09-23 10:59:14,474] Trial 3 finished with value: 130.5314856653367 and parameters: {'n1': 64, 'n2': 64, 'do1': 0.12938999080000846, 'do2': 0.331261142176991, 'lr': 0.00033852267834519784, 'l2': 3.984190594434684e-06, 'act': 'gelu', 'batch': 128, 'epochs': 142}. Best is trial 3 with value: 130.5314856653367.


Best trial: 3. Best value: 130.531:  12%|█▎        | 5/40 [01:46<11:56, 20.46s/it]

[I 2025-09-23 10:59:29,404] Trial 4 finished with value: 130.92496956749693 and parameters: {'n1': 64, 'n2': 32, 'do1': 0.022613644455269033, 'do2': 0.16266516538163217, 'lr': 0.00045745782054754043, 'l2': 2.2737628102536842e-07, 'act': 'relu', 'batch': 256, 'epochs': 149}. Best is trial 3 with value: 130.5314856653367.


Best trial: 5. Best value: 130.418:  15%|█▌        | 6/40 [01:57<09:40, 17.07s/it]

[I 2025-09-23 10:59:39,899] Trial 5 finished with value: 130.4183248363703 and parameters: {'n1': 448, 'n2': 64, 'do1': 0.0027610585618011996, 'do2': 0.4077307142274171, 'lr': 0.0015882886211970053, 'l2': 4.416068895118589e-05, 'act': 'relu', 'batch': 128, 'epochs': 47}. Best is trial 5 with value: 130.4183248363703.


Best trial: 6. Best value: 129.847:  18%|█▊        | 7/40 [02:12<09:07, 16.60s/it]

[I 2025-09-23 10:59:55,511] Trial 6 finished with value: 129.8467696700615 and parameters: {'n1': 192, 'n2': 32, 'do1': 0.36480308916903204, 'do2': 0.31877873567760656, 'lr': 0.00321623546920742, 'l2': 2.2965432344634307e-06, 'act': 'gelu', 'batch': 128, 'epochs': 87}. Best is trial 6 with value: 129.8467696700615.


Best trial: 6. Best value: 129.847:  20%|██        | 8/40 [02:34<09:43, 18.22s/it]

[I 2025-09-23 11:00:17,211] Trial 7 finished with value: 131.19324665831698 and parameters: {'n1': 64, 'n2': 32, 'do1': 0.015714592843367126, 'do2': 0.3182052056318902, 'lr': 0.00034204353211648276, 'l2': 3.4902252478382486e-06, 'act': 'relu', 'batch': 64, 'epochs': 57}. Best is trial 6 with value: 129.8467696700615.


Best trial: 6. Best value: 129.847:  22%|██▎       | 9/40 [02:59<10:34, 20.48s/it]

[I 2025-09-23 11:00:42,645] Trial 8 finished with value: 130.6689967771621 and parameters: {'n1': 512, 'n2': 224, 'do1': 0.31670187825521173, 'do2': 0.43573029509385885, 'lr': 0.0023196088229293613, 'l2': 8.56742466787504e-08, 'act': 'relu', 'batch': 64, 'epochs': 87}. Best is trial 6 with value: 129.8467696700615.


Best trial: 6. Best value: 129.847:  25%|██▌       | 10/40 [03:16<09:41, 19.38s/it]

[I 2025-09-23 11:00:59,563] Trial 9 finished with value: 130.64891377992777 and parameters: {'n1': 448, 'n2': 224, 'do1': 0.0034760652655953517, 'do2': 0.25537365128878287, 'lr': 0.0005118807219845018, 'l2': 1.2898495377182638e-07, 'act': 'gelu', 'batch': 256, 'epochs': 147}. Best is trial 6 with value: 129.8467696700615.


Best trial: 10. Best value: 129.787:  28%|██▊       | 11/40 [03:27<08:01, 16.60s/it]

[I 2025-09-23 11:01:09,879] Trial 10 finished with value: 129.7865209319712 and parameters: {'n1': 256, 'n2': 96, 'do1': 0.4847685553939329, 'do2': 0.008980937807409828, 'lr': 0.004689276414499836, 'l2': 4.3570159468740904e-05, 'act': 'gelu', 'batch': 512, 'epochs': 115}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787:  30%|███       | 12/40 [03:37<06:52, 14.72s/it]

[I 2025-09-23 11:01:20,296] Trial 11 finished with value: 130.02859511228675 and parameters: {'n1': 256, 'n2': 96, 'do1': 0.4834928448104276, 'do2': 0.0360661398032989, 'lr': 0.004841816993877469, 'l2': 4.4105000338301814e-05, 'act': 'gelu', 'batch': 512, 'epochs': 116}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787:  32%|███▎      | 13/40 [03:49<06:12, 13.81s/it]

[I 2025-09-23 11:01:31,998] Trial 12 finished with value: 129.82969614075202 and parameters: {'n1': 320, 'n2': 128, 'do1': 0.4781260740645811, 'do2': 0.16088358880013232, 'lr': 0.004943270385632307, 'l2': 3.0483578956793632e-05, 'act': 'gelu', 'batch': 512, 'epochs': 116}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787:  35%|███▌      | 14/40 [03:52<04:33, 10.52s/it]

[I 2025-09-23 11:01:34,912] Trial 13 pruned. Trial was pruned at epoch 7.


Best trial: 10. Best value: 129.787:  38%|███▊      | 15/40 [03:54<03:19,  7.98s/it]

[I 2025-09-23 11:01:37,021] Trial 14 pruned. Trial was pruned at epoch 5.


Best trial: 10. Best value: 129.787:  40%|████      | 16/40 [03:56<02:30,  6.26s/it]

[I 2025-09-23 11:01:39,271] Trial 15 pruned. Trial was pruned at epoch 5.


Best trial: 10. Best value: 129.787:  42%|████▎     | 17/40 [04:05<02:42,  7.06s/it]

[I 2025-09-23 11:01:48,182] Trial 16 finished with value: 130.1110703399215 and parameters: {'n1': 384, 'n2': 160, 'do1': 0.2298356408716007, 'do2': 0.09328219056619014, 'lr': 0.0026794612225895832, 'l2': 0.00019872356792327133, 'act': 'gelu', 'batch': 512, 'epochs': 131}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787:  45%|████▌     | 18/40 [04:07<02:01,  5.52s/it]

[I 2025-09-23 11:01:50,134] Trial 17 pruned. Trial was pruned at epoch 5.


Best trial: 10. Best value: 129.787:  48%|████▊     | 19/40 [04:19<02:34,  7.38s/it]

[I 2025-09-23 11:02:01,839] Trial 18 finished with value: 130.1209473432314 and parameters: {'n1': 320, 'n2': 128, 'do1': 0.4289313456815502, 'do2': 0.05022133057304116, 'lr': 0.003533410777346932, 'l2': 5.565700553302796e-07, 'act': 'selu', 'batch': 512, 'epochs': 74}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787:  50%|█████     | 20/40 [04:30<02:51,  8.57s/it]

[I 2025-09-23 11:02:13,169] Trial 19 finished with value: 129.8599004696985 and parameters: {'n1': 384, 'n2': 160, 'do1': 0.23488955154339042, 'do2': 0.4956385980717998, 'lr': 0.0017877346068381798, 'l2': 1.2892918765992693e-05, 'act': 'gelu', 'batch': 512, 'epochs': 133}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787:  52%|█████▎    | 21/40 [04:35<02:22,  7.50s/it]

[I 2025-09-23 11:02:18,182] Trial 20 pruned. Trial was pruned at epoch 7.


Best trial: 10. Best value: 129.787:  55%|█████▌    | 22/40 [04:50<02:58,  9.89s/it]

[I 2025-09-23 11:02:33,654] Trial 21 finished with value: 130.0163601484444 and parameters: {'n1': 192, 'n2': 64, 'do1': 0.3863466032461569, 'do2': 0.3681465439943042, 'lr': 0.003306378776660835, 'l2': 6.212094774650723e-06, 'act': 'gelu', 'batch': 128, 'epochs': 81}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787:  57%|█████▊    | 23/40 [04:53<02:12,  7.82s/it]

[I 2025-09-23 11:02:36,653] Trial 22 pruned. Trial was pruned at epoch 7.


Best trial: 10. Best value: 129.787:  60%|██████    | 24/40 [05:13<03:00, 11.25s/it]

[I 2025-09-23 11:02:55,896] Trial 23 finished with value: 130.2292074934613 and parameters: {'n1': 192, 'n2': 64, 'do1': 0.44821008113607297, 'do2': 0.12122533130153981, 'lr': 0.002555379192703343, 'l2': 8.513683942226853e-05, 'act': 'gelu', 'batch': 128, 'epochs': 98}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787:  62%|██████▎   | 25/40 [05:27<03:01, 12.09s/it]

[I 2025-09-23 11:03:09,941] Trial 24 finished with value: 129.82892138551026 and parameters: {'n1': 256, 'n2': 96, 'do1': 0.38799892206009745, 'do2': 0.2814182469796943, 'lr': 0.004908691007766086, 'l2': 1.4391573390639231e-08, 'act': 'gelu', 'batch': 512, 'epochs': 88}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787:  65%|██████▌   | 26/40 [05:39<02:48, 12.04s/it]

[I 2025-09-23 11:03:21,877] Trial 25 finished with value: 129.97904728984207 and parameters: {'n1': 256, 'n2': 96, 'do1': 0.45935216229906217, 'do2': 0.18902855602054186, 'lr': 0.003988041293163814, 'l2': 2.884704141156684e-08, 'act': 'selu', 'batch': 512, 'epochs': 69}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787:  68%|██████▊   | 27/40 [05:51<02:37, 12.09s/it]

[I 2025-09-23 11:03:34,087] Trial 26 finished with value: 129.84219689203508 and parameters: {'n1': 320, 'n2': 128, 'do1': 0.40798818780220436, 'do2': 0.008699159019866726, 'lr': 0.002034220578296104, 'l2': 0.00037647892844942115, 'act': 'gelu', 'batch': 512, 'epochs': 114}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787:  70%|███████   | 28/40 [05:54<01:52,  9.36s/it]

[I 2025-09-23 11:03:37,062] Trial 27 pruned. Trial was pruned at epoch 7.


Best trial: 10. Best value: 129.787:  72%|███████▎  | 29/40 [05:55<01:17,  7.01s/it]

[I 2025-09-23 11:03:38,609] Trial 28 pruned. Trial was pruned at epoch 5.


Best trial: 10. Best value: 129.787:  75%|███████▌  | 30/40 [05:57<00:54,  5.44s/it]

[I 2025-09-23 11:03:40,379] Trial 29 pruned. Trial was pruned at epoch 5.


Best trial: 10. Best value: 129.787:  78%|███████▊  | 31/40 [05:59<00:40,  4.47s/it]

[I 2025-09-23 11:03:42,572] Trial 30 pruned. Trial was pruned at epoch 5.


Best trial: 10. Best value: 129.787:  80%|████████  | 32/40 [06:11<00:53,  6.70s/it]

[I 2025-09-23 11:03:54,491] Trial 31 finished with value: 129.92804499029452 and parameters: {'n1': 320, 'n2': 128, 'do1': 0.40891730159427014, 'do2': 0.034846483944236104, 'lr': 0.0024038209831905325, 'l2': 0.0003924080017097791, 'act': 'gelu', 'batch': 512, 'epochs': 112}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787:  82%|████████▎ | 33/40 [06:21<00:53,  7.62s/it]

[I 2025-09-23 11:04:04,242] Trial 32 finished with value: 129.9157103543486 and parameters: {'n1': 256, 'n2': 96, 'do1': 0.39082707455880916, 'do2': 0.004778775563889064, 'lr': 0.002098317309868563, 'l2': 2.9931612198297768e-05, 'act': 'gelu', 'batch': 512, 'epochs': 116}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787:  85%|████████▌ | 34/40 [06:39<01:04, 10.73s/it]

[I 2025-09-23 11:04:22,230] Trial 33 finished with value: 129.8712252820077 and parameters: {'n1': 320, 'n2': 128, 'do1': 0.4594057604309428, 'do2': 0.283475975008851, 'lr': 0.004155640472411891, 'l2': 0.00035520651444124216, 'act': 'gelu', 'batch': 256, 'epochs': 94}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787:  88%|████████▊ | 35/40 [06:41<00:40,  8.12s/it]

[I 2025-09-23 11:04:24,253] Trial 34 pruned. Trial was pruned at epoch 5.


Best trial: 10. Best value: 129.787:  90%|█████████ | 36/40 [06:43<00:25,  6.36s/it]

[I 2025-09-23 11:04:26,501] Trial 35 pruned. Trial was pruned at epoch 5.


Best trial: 10. Best value: 129.787:  92%|█████████▎| 37/40 [06:46<00:16,  5.37s/it]

[I 2025-09-23 11:04:29,558] Trial 36 pruned. Trial was pruned at epoch 5.


[I 2025-09-23 11:04:33,838] Trial 37 pruned. Trial was pruned at epoch 5.


Best trial: 10. Best value: 129.787:  98%|█████████▊| 39/40 [07:00<00:06,  6.38s/it]

[I 2025-09-23 11:04:43,331] Trial 38 finished with value: 129.8264015185278 and parameters: {'n1': 256, 'n2': 96, 'do1': 0.3615661793747623, 'do2': 0.35007468423112287, 'lr': 0.004171693240332882, 'l2': 8.198679253001079e-05, 'act': 'gelu', 'batch': 512, 'epochs': 114}. Best is trial 10 with value: 129.7865209319712.


Best trial: 10. Best value: 129.787: 100%|██████████| 40/40 [07:02<00:00, 10.56s/it]

[I 2025-09-23 11:04:44,987] Trial 39 pruned. Trial was pruned at epoch 5.





ValueError: File not found: filepath=../models/optuna_mlp_trial10.keras. Please ensure the file is an accessible `.keras` zip file.

## Track B - Sequentials

### Helper

In [None]:
Xtr_s = pd.DataFrame(Xtr, index=Xtr_df.index, columns=feat_cols)
Xva_s = pd.DataFrame(Xva, index=Xva_df.index, columns=feat_cols)
Xte_s = pd.DataFrame(Xte, index=Xte_df.index, columns=feat_cols)

# def _build_seq(X_df, y_ser, L):
#     Xv, yv = X_df.values, y_ser.values
#     xs, ys = [], []
#     for i in range(L-1, len(X_df)):
#         block = Xv[i-L+1:i+1]
#         if np.isnan(block).any():
#             continue
#         xs.append(block); ys.append(yv[i])
#     return np.asarray(xs, dtype="float32"), np.asarray(ys, dtype="float32")

def build_seq_with_idx(X_df, y_ser, L):
    Xv, yv = X_df.values, y_ser.values
    xs, ys, idx = [], [], []
    for i in range(L-1, len(X_df)):
        block = Xv[i-L+1:i+1]
        if np.isnan(block).any():
            continue
        xs.append(block); ys.append(yv[i]); idx.append(X_df.index[i])
    return (np.asarray(xs, dtype="float32"),
            np.asarray(ys, dtype="float32"),
            pd.DatetimeIndex(idx))

### LSTM

In [None]:
def objective_lstm(trial: optuna.Trial) -> float:
    L   = trial.suggest_categorical("seq_len", [6, 12, 18, 24])
    u   = trial.suggest_int("units", 32, 128, step=32)
    do  = trial.suggest_float("dropout", 0.0, 0.4)
    lr  = trial.suggest_float("lr", 5e-5, 5e-3, log=True)
    bs  = trial.suggest_categorical("batch", [64, 128, 256])
    eps = trial.suggest_int("epochs", 40, 120)

    Xtr_seq, ytr_seq = _build_seq(Xtr_s, ytr, L)
    Xva_seq, yva_seq = _build_seq(Xva_s, yva, L)
    if min(map(len,[Xtr_seq, Xva_seq])) == 0: raise optuna.TrialPruned()

    model = models.Sequential([layers.Input(shape=(L, Xtr_seq.shape[2])),
                               layers.LSTM(u, dropout=do),
                               layers.Dense(1)])
    model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss="mse", metrics=["mae"])
    es = callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True, verbose=0)
    prune_cb = TFKerasPruningCallback(trial, "val_loss")
    model.fit(Xtr_seq, ytr_seq, validation_data=(Xva_seq, yva_seq),
              epochs=eps, batch_size=bs, verbose=0, callbacks=[es, prune_cb])

    yhat = model.predict(Xva_seq, verbose=0).squeeze()
    val_rmse = _rmse(yva_seq, yhat)
    tmp_path = OUT_DIR / f"optuna_lstm_trial{trial.number}.keras"
    #model.save(tmp_path)
    trial.set_user_attr("model_path", str(tmp_path))
    trial.set_user_attr("seq_len_used", L)
    return val_rmse

In [None]:
study_name = "ground_trackB_lstm"

log_path   = STUDY_DIR / f"{study_name}.log"
lock_path  = STUDY_DIR / f"{study_name}.lock"

try:
    lock_path.unlink()
except FileNotFoundError:
    pass

file_storage = JournalFileStorage(
    str(log_path),
    lock_obj=JournalFileOpenLock(str(lock_path))
)
storage = JournalStorage(file_storage)

studyB1 = optuna.create_study(direction="minimize",
                              sampler=TPESampler(seed=SEED),
                              pruner=MedianPruner(n_startup_trials=8, n_warmup_steps=5),
                              study_name="ground_trackB_lstm",
                              storage=storage, load_if_exists=True)
print("Running Study B1 (LSTM)…")
studyB1.optimize(objective_lstm, n_trials=40, show_progress_bar=True)

bestB1_path = studyB1.best_trial.user_attrs["model_path"]
bestL1      = studyB1.best_trial.user_attrs["seq_len_used"]
best_lstm   = tf.keras.models.load_model(bestB1_path)
Xte_seq, yte_seq = _build_seq(Xte_s, yte, bestL1)
yhatB1 = best_lstm.predict(Xte_seq, verbose=0).squeeze()
y_base_seq = pd.Series(y_base, index=Xte_df.index).iloc[bestL1-1:].to_numpy()[:len(yte_seq)]
print("Best LSTM params:", studyB1.best_trial.params | {"seq_len": bestL1})
print(f"LSTM test → RMSE: {_rmse(yte_seq, yhatB1):.4f} | MAE: {mean_absolute_error(yte_seq, yhatB1):.4f} | R2: {r2_score(yte_seq, yhatB1):.4f} | Skill: {1.0 - (_rmse(yte_seq, yhatB1)/_rmse(yte_seq, y_base_seq)):.3f}")


### BiLSTM

In [None]:
def objective_bilstm(trial: optuna.Trial) -> float:
    L   = trial.suggest_categorical("seq_len", [6, 12, 18, 24])
    u   = trial.suggest_int("units", 32, 128, step=32)
    do  = trial.suggest_float("dropout", 0.0, 0.4)
    lr  = trial.suggest_float("lr", 5e-5, 5e-3, log=True)
    bs  = trial.suggest_categorical("batch", [64, 128, 256])
    eps = trial.suggest_int("epochs", 40, 120)

    Xtr_seq, ytr_seq = _build_seq(Xtr_s, ytr, L)
    Xva_seq, yva_seq = _build_seq(Xva_s, yva, L)
    if min(map(len,[Xtr_seq, Xva_seq])) == 0: raise optuna.TrialPruned()

    model = models.Sequential([
        layers.Input(shape=(L, Xtr_seq.shape[2])),
        layers.Bidirectional(layers.LSTM(u, dropout=do)),
        layers.Dense(1)
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss="mse", metrics=["mae"])
    es = callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True, verbose=0)
    prune_cb = TFKerasPruningCallback(trial, "val_loss")
    model.fit(Xtr_seq, ytr_seq, validation_data=(Xva_seq, yva_seq),
              epochs=eps, batch_size=bs, verbose=0, callbacks=[es, prune_cb])

    yhat = model.predict(Xva_seq, verbose=0).squeeze()
    val_rmse = _rmse(yva_seq, yhat)
    tmp_path = OUT_DIR / f"optuna_bilstm_trial{trial.number}.keras"
    #model.save(tmp_path)
    trial.set_user_attr("model_path", str(tmp_path))
    trial.set_user_attr("seq_len_used", L)
    return val_rmse

In [None]:
study_name = "ground_trackB_bilstm"

log_path   = STUDY_DIR / f"{study_name}.log"
lock_path  = STUDY_DIR / f"{study_name}.lock"

try:
    lock_path.unlink()
except FileNotFoundError:
    pass

file_storage = JournalFileStorage(
    str(log_path),
    lock_obj=JournalFileOpenLock(str(lock_path))
)
storage = JournalStorage(file_storage)

studyB2 = optuna.create_study(direction="minimize",
                              sampler=TPESampler(seed=SEED),
                              pruner=MedianPruner(n_startup_trials=8, n_warmup_steps=5),
                              study_name="ground_trackB_bilstm",
                              storage=storage, load_if_exists=True)
print("Running Study B2 (BiLSTM)…")
studyB2.optimize(objective_bilstm, n_trials=35, show_progress_bar=True)

bestB2_path = studyB2.best_trial.user_attrs["model_path"]
bestL2      = studyB2.best_trial.user_attrs["seq_len_used"]
best_bi     = tf.keras.models.load_model(bestB2_path)
Xte_seq, yte_seq = _build_seq(Xte_s, yte, bestL2)
yhatB2 = best_bi.predict(Xte_seq, verbose=0).squeeze()
y_base_seq = pd.Series(y_base, index=Xte_df.index).iloc[bestL2-1:].to_numpy()[:len(yte_seq)]
print("Best BiLSTM params:", studyB2.best_trial.params | {"seq_len": bestL2})
print(f"BiLSTM test → RMSE: {_rmse(yte_seq, yhatB2):.4f} | MAE: {mean_absolute_error(yte_seq, yhatB2):.4f} | R2: {r2_score(yte_seq, yhatB2):.4f} | Skill: {1.0 - (_rmse(yte_seq, yhatB2)/_rmse(yte_seq, y_base_seq)):.3f}")


### CNN-LSTM

In [None]:
def objective_cnnlstm(trial: optuna.Trial) -> float:
    L     = trial.suggest_categorical("seq_len", [6, 12, 18, 24])
    filt  = trial.suggest_int("filters", 16, 64, step=16)
    ksz   = trial.suggest_categorical("kernel_size", [2,3,5])
    pool  = trial.suggest_categorical("pool", [1,2])
    u     = trial.suggest_int("lstm_units", 32, 128, step=32)
    do    = trial.suggest_float("dropout", 0.0, 0.4)
    lr    = trial.suggest_float("lr", 5e-5, 5e-3, log=True)
    bs    = trial.suggest_categorical("batch", [64, 128, 256])
    eps   = trial.suggest_int("epochs", 40, 120)

    Xtr_seq, ytr_seq = _build_seq(Xtr_s, ytr, L)
    Xva_seq, yva_seq = _build_seq(Xva_s, yva, L)
    if min(map(len,[Xtr_seq, Xva_seq])) == 0: raise optuna.TrialPruned()

    model = models.Sequential([
        layers.Input(shape=(L, Xtr_seq.shape[2])),
        layers.Conv1D(filt, kernel_size=ksz, padding="causal", activation="relu"),
        layers.MaxPooling1D(pool_size=pool) if pool>1 else layers.Lambda(lambda z: z),
        layers.LSTM(u, dropout=do),
        layers.Dense(1)
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss="mse", metrics=["mae"])
    es = callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True, verbose=0)
    prune_cb = TFKerasPruningCallback(trial, "val_loss")
    model.fit(Xtr_seq, ytr_seq, validation_data=(Xva_seq, yva_seq),
              epochs=eps, batch_size=bs, verbose=0, callbacks=[es, prune_cb])

    yhat = model.predict(Xva_seq, verbose=0).squeeze()
    val_rmse = _rmse(yva_seq, yhat)
    tmp_path = OUT_DIR / f"optuna_cnnlstm_trial{trial.number}.keras"
    #model.save(tmp_path)
    trial.set_user_attr("model_path", str(tmp_path))
    trial.set_user_attr("seq_len_used", L)
    return val_rmse

In [None]:
study_name = "ground_trackB_cnnlstm"

log_path   = STUDY_DIR / f"{study_name}.log"
lock_path  = STUDY_DIR / f"{study_name}.lock"

try:
    lock_path.unlink()
except FileNotFoundError:
    pass

file_storage = JournalFileStorage(
    str(log_path),
    lock_obj=JournalFileOpenLock(str(lock_path))
)
storage = JournalStorage(file_storage)

studyB3 = optuna.create_study(direction="minimize",
                              sampler=TPESampler(seed=SEED),
                              pruner=MedianPruner(n_startup_trials=8, n_warmup_steps=5),
                              study_name="ground_trackB_cnnlstm",
                              storage=storage, load_if_exists=True)
print("Running Study B3 (CNN-LSTM)…")
studyB3.optimize(objective_cnnlstm, n_trials=35, show_progress_bar=True)

bestB3_path = studyB3.best_trial.user_attrs["model_path"]
bestL3      = studyB3.best_trial.user_attrs["seq_len_used"]
best_cnn    = tf.keras.models.load_model(bestB3_path)
Xte_seq, yte_seq = _build_seq(Xte_s, yte, bestL3)
yhatB3 = best_cnn.predict(Xte_seq, verbose=0).squeeze()
y_base_seq = pd.Series(y_base, index=Xte_df.index).iloc[bestL3-1:].to_numpy()[:len(yte_seq)]
print("Best CNN-LSTM params:", studyB3.best_trial.params | {"seq_len": bestL3})
print(f"CNN-LSTM test → RMSE: {_rmse(yte_seq, yhatB3):.4f} | MAE: {mean_absolute_error(yte_seq, yhatB3):.4f} | R2: {r2_score(yte_seq, yhatB3):.4f} | Skill: {1.0 - (_rmse(yte_seq, yhatB3)/_rmse(yte_seq, y_base_seq)):.3f}")


### Transformer

In [None]:
def objective_transformer(trial: optuna.Trial) -> float:
    L       = trial.suggest_categorical("seq_len", [6, 12, 18, 24])
    d_model = trial.suggest_categorical("d_model", [32, 64, 96, 128])
    heads   = trial.suggest_categorical("heads", [2, 4, 8])
    if d_model % heads != 0:  # asegurar divisibilidad
        raise optuna.TrialPruned()

    ff_dim  = trial.suggest_categorical("ff_dim", [64, 96, 128, 192])
    att_do  = trial.suggest_float("att_dropout", 0.0, 0.3)
    do      = trial.suggest_float("dropout", 0.0, 0.4)
    lr      = trial.suggest_float("lr", 5e-5, 5e-3, log=True)
    bs      = trial.suggest_categorical("batch", [64, 128, 256])
    eps     = trial.suggest_int("epochs", 40, 120)

    Xtr_seq, ytr_seq = _build_seq(Xtr_s, ytr, L)
    Xva_seq, yva_seq = _build_seq(Xva_s, yva, L)
    if min(map(len,[Xtr_seq, Xva_seq])) == 0: raise optuna.TrialPruned()

    inp = layers.Input(shape=(L, Xtr_seq.shape[2]))
    x   = layers.Dense(d_model)(inp)  # proyección a d_model
    x2  = layers.MultiHeadAttention(num_heads=heads, key_dim=d_model//heads, dropout=att_do)(x, x)
    x   = layers.Add()([x, x2]); x = layers.LayerNormalization()(x)
    ff  = layers.Dense(ff_dim, activation="relu")(x)
    ff  = layers.Dense(d_model)(ff)
    x   = layers.Add()([x, ff]); x = layers.LayerNormalization()(x)
    x   = layers.GlobalAveragePooling1D()(x)
    x   = layers.Dropout(do)(x)
    out = layers.Dense(1)(x)
    model = models.Model(inp, out)

    model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss="mse", metrics=["mae"])
    es = callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True, verbose=0)
    prune_cb = TFKerasPruningCallback(trial, "val_loss")
    model.fit(Xtr_seq, ytr_seq, validation_data=(Xva_seq, yva_seq),
              epochs=eps, batch_size=bs, verbose=0, callbacks=[es, prune_cb])

    yhat = model.predict(Xva_seq, verbose=0).squeeze()
    val_rmse = _rmse(yva_seq, yhat)
    tmp_path = OUT_DIR / f"optuna_transformer_trial{trial.number}.keras"
    #model.save(tmp_path)
    trial.set_user_attr("model_path", str(tmp_path))
    trial.set_user_attr("seq_len_used", L)
    return val_rmse

In [None]:
study_name = "ground_trackB_transformer"

log_path   = STUDY_DIR / f"{study_name}.log"
lock_path  = STUDY_DIR / f"{study_name}.lock"

try:
    lock_path.unlink()
except FileNotFoundError:
    pass

file_storage = JournalFileStorage(
    str(log_path),
    lock_obj=JournalFileOpenLock(str(lock_path))
)
storage = JournalStorage(file_storage)

studyB4 = optuna.create_study(direction="minimize",
                              sampler=TPESampler(seed=SEED),
                              pruner=MedianPruner(n_startup_trials=8, n_warmup_steps=5),
                              study_name="ground_trackB_transformer",
                              storage=storage, load_if_exists=True)
print("Running Study B4 (Transformer)…")
studyB4.optimize(objective_transformer, n_trials=40, show_progress_bar=True)

bestB4_path = studyB4.best_trial.user_attrs["model_path"]
bestL4      = studyB4.best_trial.user_attrs["seq_len_used"]
best_tr     = tf.keras.models.load_model(bestB4_path)
Xte_seq, yte_seq = _build_seq(Xte_s, yte, bestL4)
yhatB4 = best_tr.predict(Xte_seq, verbose=0).squeeze()
y_base_seq = pd.Series(y_base, index=Xte_df.index).iloc[bestL4-1:].to_numpy()[:len(yte_seq)]
print("Best Transformer params:", studyB4.best_trial.params | {"seq_len": bestL4})
print(f"Transformer test → RMSE: {_rmse(yte_seq, yhatB4):.4f} | MAE: {mean_absolute_error(yte_seq, yhatB4):.4f} | R2: {r2_score(yte_seq, yhatB4):.4f} | Skill: {1.0 - (_rmse(yte_seq, yhatB4)/_rmse(yte_seq, y_base_seq)):.3f}")


## Best

In [None]:
best_params = {
    "MLP":        studyA.best_trial.params,
    "LSTM":       studyB1.best_trial.params | {"seq_len": studyB1.best_trial.user_attrs["seq_len_used"]},
    "BiLSTM":     studyB2.best_trial.params | {"seq_len": studyB2.best_trial.user_attrs["seq_len_used"]},
    "CNN_LSTM":   studyB3.best_trial.params | {"seq_len": studyB3.best_trial.user_attrs["seq_len_used"]},
    "Transformer":studyB4.best_trial.params | {"seq_len": studyB4.best_trial.user_attrs["seq_len_used"]},
}
pd.Series(best_params, dtype="object").to_json(OUT_DIR / "best_hpo_params_all.json")
print("Saved params →", OUT_DIR / "best_hpo_params_all.json")

## Visualization

In [None]:
models_info = {
    "MLP": {
        "type": "tabular",
        "model": best_mlp,          # del estudio A
    },
    "LSTM": {
        "type": "seq",
        "model": best_lstm,         # del estudio B1
        "L": bestL1
    },
    "BiLSTM": {
        "type": "seq",
        "model": best_bi,           # del estudio B2
        "L": bestL2
    },
    "CNN-LSTM": {
        "type": "seq",
        "model": best_cnn,          # del estudio B3
        "L": bestL3
    },
    "Transformer": {
        "type": "seq",
        "model": best_tr,           # del estudio B4
        "L": bestL4
    }
}

In [None]:
OUT_FIG = OUT_DIR  

rows = []

for name, cfg in models_info.items():
    print(f"\n=== {name} ===")
    if cfg["type"] == "tabular":
        # Predicciones directas en el set de test tabular
        y_true = yte
        y_pred = cfg["model"].predict(Xte, verbose=0).squeeze()
        idx    = Xte_df.index
        # Baseline alineado
        y_base_aligned = y_base
    else:
        # Secuencial: reconstruir ventanas con la mejor L
        L = int(cfg["L"])
        X_seq, y_seq, idx = build_seq_with_idx(Xte_s, yte, L)
        if len(X_seq) == 0:
            print("No hay secuencias válidas (NaNs). Se omite.")
            continue
        y_true = y_seq
        y_pred = cfg["model"].predict(X_seq, verbose=0).squeeze()
        # Baseline reindexado a los timestamps válidos
        y_base_aligned = pd.Series(y_base, index=Xte_df.index).reindex(idx).to_numpy()

    # Métricas
    rmse = _rmse(y_true, y_pred)
    mae  = mean_absolute_error(y_true, y_pred)
    r2   = r2_score(y_true, y_pred)
    skill = 1.0 - (rmse / _rmse(y_true, y_base_aligned))
    print(f"RMSE={rmse:.4f} | MAE={mae:.4f} | R2={r2:.4f} | Skill vs baseline={skill:.3f}")

    rows.append({"model": name, "RMSE": rmse, "MAE": mae, "R2": r2, "Skill": skill})

    # ---------- PLOTS (tres tipos por modelo) ----------
    N = min(400, len(y_true))  # recorte para visualización

    # 1) Serie temporal: verdad vs predicción vs baseline
    plt.figure(figsize=(12, 3.6))
    plt.plot(idx[:N], y_true[:N], label="truth", lw=1.4)
    plt.plot(idx[:N], y_pred[:N], label=name, lw=1.1)
    plt.plot(idx[:N], y_base_aligned[:N], label="baseline", lw=1.0, alpha=0.7)
    plt.title(f"Test — Truth vs {name} vs Baseline ({TARGET})")
    plt.ylabel("GHI (W/m²)" if TARGET.startswith("y_ghi") else "k-index")
    plt.xlabel("Time")
    plt.grid(True, ls="--", alpha=0.3); plt.legend(); plt.xticks(rotation=45); plt.tight_layout()
#    plt.savefig(OUT_FIG / f"{name}_ts_test.png", dpi=140)
    plt.show()

    # 2) Dispersión: y_true vs y_pred
    lim_min = float(min(np.min(y_true), np.min(y_pred)))
    lim_max = float(max(np.max(y_true), np.max(y_pred)))
    plt.figure(figsize=(4.8, 4.8))
    plt.scatter(y_true, y_pred, s=10, alpha=0.5)
    plt.plot([lim_min, lim_max], [lim_min, lim_max], 'r--', lw=1.0)
    plt.xlabel("Actual"); plt.ylabel("Predicted")
    plt.title(f"{name} — Actual vs Predicted\nRMSE={rmse:.3f} MAE={mae:.3f} R2={r2:.3f}")
    plt.grid(True, ls="--", alpha=0.3); plt.tight_layout()
    plt.savefig(OUT_FIG / f"{name}_scatter.png", dpi=140)
#    plt.show()

    # 3) Histograma de residuales
    resid = y_pred - y_true
    plt.figure(figsize=(6, 3.2))
    plt.hist(resid, bins=50, alpha=0.85)
    plt.axvline(0, color='r', ls='--', lw=1)
    plt.title(f"{name} — Residuals (mean={np.mean(resid):.3f})")
    plt.xlabel("Residual"); plt.ylabel("Frequency")
    plt.grid(True, ls="--", alpha=0.3); plt.tight_layout()
#    plt.savefig(OUT_F