# 05 — Modeling Satellite + Tabular

## Baselines + Models

**DSRF + MCMIPF fusion with tabular, with MLflow tracking**

**Optuna (JournalStorage + lock)**

In [1]:
from pathlib import Path
import os, json, math, time, random, warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

import mlflow
import mlflow.keras

2025-11-16 13:32:03.385903: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-16 13:32:03.396340: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763317923.403986 1939700 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763317923.406482 1939700 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-11-16 13:32:03.416619: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

### Config

In [2]:
SEED = 42
random.seed(SEED); np.random.seed(SEED); tf.random.set_seed(SEED)

gpus = tf.config.list_physical_devices('GPU')
for g in gpus:
    try:
        tf.config.experimental.set_memory_growth(g, True)
    except Exception:
        pass
print("TF GPUs:", gpus)

# Rutas
ROOT = Path("..").resolve()
DATA_CLEAN = ROOT / "data" / "clean" / "base_dataset.csv"
GOES_DIR = ROOT / "data" / "GOES_v2"

OUT_DIR = ROOT / "outputs"
OUT_DIR.mkdir(parents=True, exist_ok=True)
ART_DIR = OUT_DIR / "artifacts_satellite"
ART_DIR.mkdir(parents=True, exist_ok=True)
FIG_DIR = OUT_DIR / "figures"
FIG_DIR.mkdir(parents=True, exist_ok=True)

# Target y ventanas
TARGET_COL = "GHI"
FREQ = "10T"
DEFAULT_INPUT_STEPS   = 36   # 6h pasado (36 * 10min)
DEFAULT_HORIZON_STEPS = 6    # 1h adelante

PATIENCE = 6

# MLflow
MLFLOW_DIR = (OUT_DIR / "mlruns").resolve()
mlflow.set_tracking_uri("file://" + str(MLFLOW_DIR))
mlflow.set_experiment("pg_industrial_satellite_fusion")
print("MLflow tracking:", MLFLOW_DIR)

TF GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
MLflow tracking: /mnt/SOLARLAB/E_Ladino/Repo_2/solar-forecasting-colombia/outputs/mlruns


### Data

In [3]:
df = pd.read_csv(DATA_CLEAN, parse_dates=[0], index_col=0).sort_index()
df.index.name = "time"

# Features ya definidas en tu script anterior
base_feats = [
    'Presion','TempAmb','WindSpeed','WindDirection',
    'hour_sin','hour_cos','DoY Sin','DoY Cos',
    'solar_zenith','solar_azimuth','solar_elevation',
    'TempAmb_roll1h_mean','TempAmb_roll6h_mean',
    'Presion_roll1h_mean','Presion_roll6h_mean',
    'WindSpeed_roll1h_mean','WindSpeed_roll6h_mean',
    'temp_pressure_ratio','wind_temp_interaction'
]
ghi_lags  = [c for c in ['GHI_lag1','GHI_lag3','GHI_lag6','GHI_lag12','GHI_lag36'] if c in df.columns]
ghi_rolls = [c for c in ['GHI_roll1h_mean','GHI_roll3h_mean','GHI_roll6h_mean','GHI_roll1h_max'] if c in df.columns]
feat_cols = [c for c in base_feats if c in df.columns] + ghi_lags + ghi_rolls
print(f"Total features used: {len(feat_cols)}")
print(feat_cols)

assert TARGET_COL in df.columns, f"TARGET_COL='{TARGET_COL}' no existe en el dataset"

# Split temporal
n = len(df); i_tr = int(0.7*n); i_va = int(0.85*n)
df_train, df_val, df_test = df.iloc[:i_tr], df.iloc[i_tr:i_va], df.iloc[i_va:]

# Escalado
X_scaler = StandardScaler(); y_scaler = StandardScaler()
X_train = X_scaler.fit_transform(df_train[feat_cols].values)
X_val   = X_scaler.transform(df_val[feat_cols].values)
X_test  = X_scaler.transform(df_test[feat_cols].values)

y_train = y_scaler.fit_transform(df_train[[TARGET_COL]].values).ravel()
y_val   = y_scaler.transform(df_val[[TARGET_COL]].values).ravel()
y_test  = y_scaler.transform(df_test[[TARGET_COL]].values).ravel()

# Imputación robusta
imp = SimpleImputer(strategy="median")
X_train = imp.fit_transform(X_train)
X_val   = imp.transform(X_val)
X_test  = imp.transform(X_test)

for name, arr in [("X_train",X_train),("X_val",X_val),("X_test",X_test),
                  ("y_train",y_train),("y_val",y_val),("y_test",y_test)]:
    assert np.isfinite(arr).all(), f"{name} tiene NaN/Inf"

# Guardamos índices para alinear con imágenes
time_index = df.index
time_train, time_val, time_test = time_index[:i_tr], time_index[i_tr:i_va], time_index[i_va:]
print("N train/val/test:", len(time_train), len(time_val), len(time_test))

Total features used: 28
['Presion', 'TempAmb', 'WindSpeed', 'WindDirection', 'hour_sin', 'hour_cos', 'DoY Sin', 'DoY Cos', 'solar_zenith', 'solar_azimuth', 'solar_elevation', 'TempAmb_roll1h_mean', 'TempAmb_roll6h_mean', 'Presion_roll1h_mean', 'Presion_roll6h_mean', 'WindSpeed_roll1h_mean', 'WindSpeed_roll6h_mean', 'temp_pressure_ratio', 'wind_temp_interaction', 'GHI_lag1', 'GHI_lag3', 'GHI_lag6', 'GHI_lag12', 'GHI_lag36', 'GHI_roll1h_mean', 'GHI_roll3h_mean', 'GHI_roll6h_mean', 'GHI_roll1h_max']
N train/val/test: 75020 16076 16076


### Helpers

In [4]:
def metrics_from_scaled(pred_scaled, true_scaled, y_scaler):
    p = y_scaler.inverse_transform(pred_scaled.reshape(-1,1)).ravel()
    t = y_scaler.inverse_transform(true_scaled.reshape(-1,1)).ravel()
    mae = mean_absolute_error(t, p)
    rmse = float(np.sqrt(mean_squared_error(t, p)))
    mape = float(np.mean(np.abs((t + 1e-6) - p) / (np.abs(t) + 1e-6)) * 100)
    smape = float(100 * np.mean(2*np.abs(p - t) / (np.abs(t) + np.abs(p) + 1e-6)))
    r2 = float(r2_score(t, p))
    return {"MAE": mae, "RMSE": rmse, "MAPE": mape, "sMAPE": smape, "R2": r2}, (t, p)

def _rmse(a,b): 
    return float(np.sqrt(mean_squared_error(a,b)))


# ### Sequences (tabular + imágenes)

# %%
def build_seq_arrays_tabular(X_2d, y_1d, L, horizon):
    """
    Tabular solamente (como tu build_seq_arrays original, pero sin imágenes).
    """
    N, F = X_2d.shape
    outX, outy = [], []
    last = N - L - horizon + 1
    if last <= 0:
        return np.zeros((0, L, F), dtype="float32"), np.zeros((0,), dtype="float32")
    for i in range(last):
        block = X_2d[i:i+L]
        outX.append(block)
        outy.append(y_1d[i + L + horizon - 1])
    return np.asarray(outX, dtype="float32"), np.asarray(outy, dtype="float32")


def build_seq_arrays_fusion(X_2d, y_1d, imgs_4d, L, horizon):
    """
    Secuencias sincronizadas tabular + imágenes.

    X_2d   : (N, F_tab)
    y_1d   : (N,)
    imgs_4d: (N, H, W, C)
    L      : input_steps
    horizon: horizon_steps

    Devuelve:
      X_tab_seq: (N', L, F_tab)
      X_img_seq: (N', L, H, W, C)
      y_seq    : (N',)

    Nota: se SALTAN las ventanas donde imgs_4d tiene NaNs
          (huecos de NOAA).
    """
    N, F = X_2d.shape
    assert imgs_4d.shape[0] == N, "imgs_4d y X_2d no alinean en N"
    H, W, C = imgs_4d.shape[1:]
    X_tab_seq, X_img_seq, y_seq = [], [], []
    last = N - L - horizon + 1
    if last <= 0:
        return (np.zeros((0, L, F), dtype="float32"),
                np.zeros((0, L, H, W, C), dtype="float32"),
                np.zeros((0,), dtype="float32"))
    skipped = 0
    for i in range(last):
        block_tab = X_2d[i:i+L]
        block_img = imgs_4d[i:i+L]
        # si hay NaNs en el bloque de imágenes, saltamos esa ventana
        if np.isnan(block_img).any():
            skipped += 1
            continue
        X_tab_seq.append(block_tab)
        X_img_seq.append(block_img)
        y_seq.append(y_1d[i + L + horizon - 1])
    if skipped > 0:
        print(f"build_seq_arrays_fusion: ventanas saltadas por NaNs en imágenes = {skipped}")
    return (np.asarray(X_tab_seq, dtype="float32"),
            np.asarray(X_img_seq, dtype="float32"),
            np.asarray(y_seq,     dtype="float32"))


### Satellite

In [5]:
DSRF_DIR   = GOES_DIR / "DSRF"
MCMIPF_DIR = GOES_DIR / "MCMIPF"

def build_dsrf_by_hour(time_index):
    """
    Construye:
      - dsrf_all: (N, 128,128,1)
      - también imprime cuántas horas faltan.

    Estrategia:
      1. Tomamos las horas únicas de df.index (floor a hora).
      2. Cargamos cada archivo DSRF solo UNA vez por hora.
      3. Para horas sin archivo, ponemos imagen llena de NaNs.
      4. Luego expandimos a todos los timestamps (10-min).
    """
    # horas únicas
    hours = time_index.floor("H")
    unique_hours = sorted(hours.unique())

    # mapa hora -> imagen (128x128x1)
    hour_to_img = {}
    missing_hours = 0

    for h in unique_hours:
        key = h.strftime("%Y%m%d_%H")
        year = key[:4]
        month = key[4:6]
        fname = f"{key}_DSRF.npz"
        path = DSRF_DIR / year / month / fname
        if not path.exists():
            # hora sin DSRF -> NaNs
            hour_to_img[key] = np.full((128,128,1), np.nan, dtype="float32")
            missing_hours += 1
            continue
        data = np.load(path)
        arr = data[list(data.files)[0]]   # (1,256,256)
        img = arr[0].astype("float32")[::2, ::2]  # 256 -> 128
        hour_to_img[key] = img[..., None]        # (128,128,1)

    print(f"DSRF: horas únicas en df      = {len(unique_hours)}")
    print(f"DSRF: horas sin archivo NOAA  = {missing_hours}")

    # expandir a todos los timestamps (uno por fila de df)
    imgs = []
    for ts in time_index:
        key = ts.strftime("%Y%m%d_%H")
        imgs.append(hour_to_img[key])
    dsrf_all = np.stack(imgs, axis=0).astype("float32")
    return dsrf_all

def build_mcmipf_by_hour(time_index):
    """
    Igual idea para MCMIPF, pero:
      - cada hora -> tensor (6,128,128,16)
      - para cada timestamp (10-min) elegimos el slot 0..5.
    """
    hours = time_index.floor("H")
    unique_hours = sorted(hours.unique())

    hour_to_seq = {}
    missing_hours = 0

    for h in unique_hours:
        key = h.strftime("%Y%m%d_%H")
        year = key[:4]
        month = key[4:6]
        fname = f"{key}_MCMIPF.npz"
        path = MCMIPF_DIR / year / month / fname
        if not path.exists():
            hour_to_seq[key] = None
            missing_hours += 1
            continue
        data = np.load(path)
        arr = data[list(data.files)[0]]  # (6,16,256,256)
        arr = arr.astype("float32")[:, :, ::2, ::2]  # 256->128
        arr = np.transpose(arr, (0, 2, 3, 1))        # (6,128,128,16)
        hour_to_seq[key] = arr

    print(f"MCMIPF: horas únicas en df      = {len(unique_hours)}")
    print(f"MCMIPF: horas sin archivo NOAA  = {missing_hours}")

    imgs = []
    for ts in time_index:
        key = ts.strftime("%Y%m%d_%H")
        seq = hour_to_seq[key]
        if seq is None:
            # sin MCMIPF -> NaNs
            imgs.append(np.full((128,128,16), np.nan, dtype="float32"))
            continue
        slot = ts.minute // 10  # 0..5
        imgs.append(seq[slot])
    mcm_all = np.stack(imgs, axis=0).astype("float32")
    return mcm_all

In [None]:
# ### Construcción de tensores alineados

print("→ Cargando DSRF completo (por hora)...")
dsrf_all = build_dsrf_by_hour(time_index)
print("DSRF_all:", dsrf_all.shape)  # (N,128,128,1)

print("→ Cargando MCMIPF completo (por hora)...")
mcmipf_all = build_mcmipf_by_hour(time_index)
print("MCMIPF_all:", mcmipf_all.shape)  # (N,128,128,16)

# Split
dsrf_train, dsrf_val, dsrf_test = dsrf_all[:i_tr], dsrf_all[i_tr:i_va], dsrf_all[i_va:]
mcm_train, mcm_val, mcm_test   = mcmipf_all[:i_tr], mcmipf_all[i_tr:i_va], mcmipf_all[i_va:]

for name, arr in [("dsrf_train",dsrf_train),("dsrf_val",dsrf_val),("dsrf_test",dsrf_test),
                  ("mcm_train",mcm_train),("mcm_val",mcm_val),("mcm_test",mcm_test)]:
    assert np.isfinite(arr).all() or np.isnan(arr).any(), f"{name} tiene valores no finitos raros"
print("Satellite tensors listos.")

→ Cargando DSRF completo (por hora)...
DSRF: horas únicas en df      = 17862
DSRF: horas sin archivo NOAA  = 237
DSRF_all: (107172, 128, 128, 1)
→ Cargando MCMIPF completo (por hora)...


### Models

#### 1) ConvLSTM con DSRF (solo satélite)

In [None]:
# %%
def build_convlstm_dsrf(L, H=128, W=128, C=1, filters1=32, filters2=64, k=5):
    """
    ConvLSTM encoder-forecast style simple.
    Input: (L, H, W, C)
    Output: (1,) GHI (escalar) via GlobalPooling + Dense.
    """
    inp = layers.Input(shape=(L, H, W, C))

    x = layers.ConvLSTM2D(filters=filters1, kernel_size=(k,k),
                          padding="same", return_sequences=True,
                          activation="relu")(inp)
    x = layers.BatchNormalization()(x)

    x = layers.ConvLSTM2D(filters=filters2, kernel_size=(k,k),
                          padding="same", return_sequences=False,
                          activation="relu")(x)
    x = layers.BatchNormalization()(x)

    # mapa final HxWxfilters2 -> pooling -> Dense
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(64, activation="relu")(x)
    out = layers.Dense(1, dtype="float32")(x)

    model = models.Model(inp, out, name="ConvLSTM_DSRF")
    return model

#### 2) VGG19 + LSTM (MCMIPF + Tabular fusion)

In [None]:
from tensorflow.keras.applications import VGG19

def build_vgg19_lstm_fusion(L, n_feat_tab, H=128, W=128, C=16,
                            lstm_units_img=64, lstm_units_tab=64,
                            dense_units=64, train_base=False):
    """
    Fusiona:
      - rama imágenes: MCMIPF -> Conv2D 1x1 (16->3) -> VGG19 (no top) -> LSTM
      - rama tabular: LSTM
      - concat -> Dense -> 1
    """
    # --- Rama imágenes ---
    inp_img = layers.Input(shape=(L, H, W, C), name="img_seq")

    # 16 canales -> 3 canales
    x_img = layers.TimeDistributed(
        layers.Conv2D(3, (1,1), padding="same", activation="linear"),
        name="td_1x1_conv"
    )(inp_img)

    # VGG19 base
    vgg_base = VGG19(include_top=False, weights="imagenet",
                     input_shape=(H, W, 3))
    vgg_base.trainable = train_base

    x_img = layers.TimeDistributed(vgg_base, name="td_vgg19")(x_img)
    x_img = layers.TimeDistributed(layers.GlobalAveragePooling2D(),
                                   name="td_gap")(x_img)
    # Ahora shape: (batch, L, feat_vgg)
    x_img = layers.LSTM(lstm_units_img, activation="tanh",
                        name="lstm_img")(x_img)

    # --- Rama tabular ---
    inp_tab = layers.Input(shape=(L, n_feat_tab), name="tab_seq")
    x_tab = layers.LSTM(lstm_units_tab, activation="tanh",
                        name="lstm_tab")(inp_tab)

    # --- Fusion ---
    x = layers.Concatenate(name="fusion")([x_img, x_tab])
    x = layers.Dense(dense_units, activation="relu")(x)
    x = layers.Dropout(0.2)(x)
    out = layers.Dense(1, dtype="float32", name="y_hat")(x)

    model = models.Model([inp_img, inp_tab], out, name="VGG19_LSTM_Fusion")
    return model

### Sequences + Training + Mlflow

#### ConvLSTM con DSRF

In [None]:
# %%
def train_convlstm_dsrf(input_steps=DEFAULT_INPUT_STEPS,
                        horizon_steps=DEFAULT_HORIZON_STEPS,
                        lr=1e-3,
                        batch_size=16,
                        epochs=40):

    # Construir secuencias con DSRF (solo satélite, ignorando tabular)
    Xtr_seq, ytr_seq = build_seq_arrays_tabular(
        X_2d=np.zeros_like(X_train),  # dummy (no usamos tabular aquí)
        y_1d=y_train,
        L=input_steps,
        horizon=horizon_steps
    )
    Xva_seq, yva_seq = build_seq_arrays_tabular(
        X_2d=np.zeros_like(X_val),
        y_1d=y_val,
        L=input_steps,
        horizon=horizon_steps
    )
    # Pero necesitamos las imágenes: construimos con mismo esquema
    # Nuestro build_seq_arrays_fusion se encarga de alinear imágenes y y's
    _, Xtr_img_seq, ytr_seq = build_seq_arrays_fusion(
        X_2d=np.zeros_like(X_train), y_1d=y_train,
        imgs_4d=dsrf_train, L=input_steps, horizon=horizon_steps
    )
    _, Xva_img_seq, yva_seq = build_seq_arrays_fusion(
        X_2d=np.zeros_like(X_val), y_1d=y_val,
        imgs_4d=dsrf_val, L=input_steps, horizon=horizon_steps
    )

    print("ConvLSTM DSRF shapes:")
    print("Xtr_img_seq:", Xtr_img_seq.shape)
    print("ytr_seq    :", ytr_seq.shape)

    H, W, C = Xtr_img_seq.shape[2:]
    model = build_convlstm_dsrf(L=input_steps, H=H, W=W, C=C)

    model.compile(optimizer=tf.keras.optimizers.Adam(lr),
                  loss="mse")

    ckpt = (ART_DIR / "best_convlstm_dsrf.weights.h5").resolve()
    cbs = [
        callbacks.EarlyStopping(monitor="val_loss", patience=PATIENCE,
                                restore_best_weights=True, verbose=1),
        callbacks.ModelCheckpoint(filepath=str(ckpt), monitor="val_loss",
                                  save_best_only=True, save_weights_only=True)
    ]

    with mlflow.start_run(run_name="ConvLSTM_DSRF"):
        mlflow.log_param("input_steps",   input_steps)
        mlflow.log_param("horizon_steps", horizon_steps)
        mlflow.log_param("lr",           lr)
        mlflow.log_param("batch_size",   batch_size)
        mlflow.log_param("epochs",       epochs)

        hist = model.fit(
            Xtr_img_seq, ytr_seq,
            validation_data=(Xva_img_seq, yva_seq),
            epochs=epochs,
            batch_size=batch_size,
            verbose=1,
            callbacks=cbs
        )

        # Test
        _, Xte_img_seq, yte_seq = build_seq_arrays_fusion(
            X_2d=np.zeros_like(X_test), y_1d=y_test,
            imgs_4d=dsrf_test,
            L=input_steps, horizon=horizon_steps
        )
        yhat_test_scaled = model.predict(Xte_img_seq, verbose=0).squeeze()
        # yte_seq está en espacio escalado
        metrics, (t_o, p_o) = metrics_from_scaled(yhat_test_scaled, yte_seq, y_scaler)

        for k,v in metrics.items():
            mlflow.log_metric(f"test_{k}", v)

        # Guardar modelo
        mlflow.keras.log_model(model, "model")

        print("ConvLSTM DSRF test metrics:", metrics)

    return model, metrics, (t_o, p_o)

#### VGG19 + LSTM + Tabular (MCMIPF fusion)

In [None]:
# %%
def train_vgg19_lstm_fusion(input_steps=DEFAULT_INPUT_STEPS,
                            horizon_steps=DEFAULT_HORIZON_STEPS,
                            lr=5e-4,
                            batch_size=4,
                            epochs=25,
                            train_base=False):

    # Construimos secuencias tabulares + MCMIPF
    Xtr_tab_seq, Xtr_img_seq, ytr_seq = build_seq_arrays_fusion(
        X_2d=X_train, y_1d=y_train,
        imgs_4d=mcm_train, L=input_steps, horizon=horizon_steps
    )
    Xva_tab_seq, Xva_img_seq, yva_seq = build_seq_arrays_fusion(
        X_2d=X_val, y_1d=y_val,
        imgs_4d=mcm_val, L=input_steps, horizon=horizon_steps
    )

    print("Fusion shapes:")
    print("Xtr_tab_seq:", Xtr_tab_seq.shape)
    print("Xtr_img_seq:", Xtr_img_seq.shape)
    print("ytr_seq    :", ytr_seq.shape)

    L = input_steps
    n_feat_tab = Xtr_tab_seq.shape[2]
    H, W, C = Xtr_img_seq.shape[2:]

    model = build_vgg19_lstm_fusion(
        L=L, n_feat_tab=n_feat_tab,
        H=H, W=W, C=C,
        lstm_units_img=64,
        lstm_units_tab=64,
        dense_units=64,
        train_base=train_base
    )

    model.compile(optimizer=tf.keras.optimizers.Adam(lr),
                  loss="mse")

    ckpt = (ART_DIR / "best_vgg19_lstm_fusion.weights.h5").resolve()
    cbs = [
        callbacks.EarlyStopping(monitor="val_loss", patience=PATIENCE+2,
                                restore_best_weights=True, verbose=1),
        callbacks.ModelCheckpoint(filepath=str(ckpt), monitor="val_loss",
                                  save_best_only=True, save_weights_only=True)
    ]

    with mlflow.start_run(run_name="VGG19_LSTM_Fusion"):
        mlflow.log_param("input_steps",   input_steps)
        mlflow.log_param("horizon_steps", horizon_steps)
        mlflow.log_param("lr",           lr)
        mlflow.log_param("batch_size",   batch_size)
        mlflow.log_param("epochs",       epochs)
        mlflow.log_param("train_base",   train_base)
        mlflow.log_param("img_H",        H)
        mlflow.log_param("img_W",        W)
        mlflow.log_param("img_C",        C)

        hist = model.fit(
            {"img_seq": Xtr_img_seq, "tab_seq": Xtr_tab_seq},
            ytr_seq,
            validation_data=(
                {"img_seq": Xva_img_seq, "tab_seq": Xva_tab_seq},
                yva_seq
            ),
            epochs=epochs,
            batch_size=batch_size,
            verbose=1,
            callbacks=cbs
        )

        # Test
        Xte_tab_seq, Xte_img_seq, yte_seq = build_seq_arrays_fusion(
            X_2d=X_test, y_1d=y_test,
            imgs_4d=mcm_test,
            L=input_steps, horizon=horizon_steps
        )
        yhat_test_scaled = model.predict(
            {"img_seq": Xte_img_seq, "tab_seq": Xte_tab_seq},
            verbose=0
        ).squeeze()

        metrics, (t_o, p_o) = metrics_from_scaled(yhat_test_scaled, yte_seq, y_scaler)
        for k,v in metrics.items():
            mlflow.log_metric(f"test_{k}", v)

        mlflow.keras.log_model(model, "model")

        print("VGG19+LSTM fusion test metrics:", metrics)

    return model, metrics, (t_o, p_o)

### Plots

In [None]:
def plot_sample_series(y_true, y_pred, title, n=1000, fname=None):
    n = min(n, len(y_true))
    plt.figure(figsize=(11,3.8))
    plt.plot(y_true[:n], label="Real", lw=1.5)
    plt.plot(y_pred[:n], label="Pred", lw=1.2, alpha=0.9)
    plt.title(title); plt.xlabel("Time steps (10-min)")
    plt.ylabel("GHI (W/m²)")
    plt.legend(frameon=False); plt.tight_layout()
    if fname is not None:
        plt.savefig(fname, dpi=140)
    plt.show()