In [1]:
# ============================================================
# DAILY POLLUTANT PREDICTION WITH LSTM (NaN-SAFE)
# NaNs are strictly ignored (no interpolation, no filling)
# ============================================================

import numpy as np
import xarray as xr

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

# ============================================================
# FILE PATHS (USER PROVIDED)
# ============================================================

files = {
    'CO':     r"D:\IPMA\Results\co_fire_meteo_Iberia.nc",
    'NO':     r"D:\IPMA\Results\no_fire_meteo_Iberia.nc",
    'NO2':    r"D:\IPMA\Results\no2_fire_meteo_Iberia.nc",
    'PM2p5':  r"D:\IPMA\Results\pm2p5_fire_meteo_Iberia.nc",
    'PM10':   r"D:\IPMA\Results\pm10_fire_meteo_Iberia.nc"
}

# ============================================================
# USER SETTINGS
# ============================================================

POLLUTANT_VAR = "Mean"
TEST_YEAR = 2017
TEST_MONTH = None        # None or 1–12
LAG_DAYS = 14
EPOCHS = 40
BATCH_SIZE = 32

INPUT_VARS = [
    "Mean",
    "temp_Max",
    "wind_Max",
    "precip_Total_Precipitation",
    "frp_sum_Iberia"
]

# ============================================================
# HELPER FUNCTIONS
# ============================================================

def build_sequences(X, y, lags):
    X_out, y_out = [], []
    for i in range(lags, len(X)):
        X_out.append(X[i-lags:i])
        y_out.append(y[i])
    return np.array(X_out), np.array(y_out)


def train_mask(time):
    return time.dt.year != TEST_YEAR


def test_mask(time):
    mask = time.dt.year == TEST_YEAR
    if TEST_MONTH is not None:
        mask = mask & (time.dt.month == TEST_MONTH)
    return mask

# ============================================================
# STORAGE
# ============================================================

all_predictions = {}
all_metrics = {}

# ============================================================
# MAIN LOOP
# ============================================================

for pollutant, path in files.items():

    print(f"\n==============================")
    print(f"Processing: {pollutant}")
    print(f"==============================")

    ds = xr.open_dataset(path)

    # -------------------------------
    # CHECK VARIABLES
    # -------------------------------
    for var in INPUT_VARS:
        if var not in ds:
            raise KeyError(f"{var} not found in {path}")

    y = ds[POLLUTANT_VAR]
    X = xr.merge([ds[var] for var in INPUT_VARS])

    time = ds.time
    train_idx = train_mask(time)
    test_idx = test_mask(time)

    X_train = X.sel(time=train_idx)
    y_train = y.sel(time=train_idx)
    X_test = X.sel(time=test_idx)
    y_test = y.sel(time=test_idx)

    # ========================================================
    # BUILD TRAINING DATA (NaN-SAFE)
    # ========================================================

    X_all, y_all = [], []

    for lat in ds.latitude.values:
        for lon in ds.longitude.values:

            X_ts = X_train.sel(latitude=lat, longitude=lon).to_array().values.T
            y_ts = y_train.sel(latitude=lat, longitude=lon).values

            # Keep only valid days (no NaNs anywhere)
            valid = (
                ~np.isnan(y_ts) &
                ~np.isnan(X_ts).any(axis=1)
            )

            X_ts = X_ts[valid]
            y_ts = y_ts[valid]

            if len(y_ts) <= LAG_DAYS:
                continue

            scaler_X = MinMaxScaler()
            scaler_y = MinMaxScaler()

            X_scaled = scaler_X.fit_transform(X_ts)
            y_scaled = scaler_y.fit_transform(y_ts.reshape(-1, 1)).ravel()

            X_seq, y_seq = build_sequences(X_scaled, y_scaled, LAG_DAYS)

            if len(y_seq) == 0:
                continue

            X_all.append(X_seq)
            y_all.append(y_seq)

    X_train_all = np.concatenate(X_all)
    y_train_all = np.concatenate(y_all)

    print(f"Training samples: {X_train_all.shape[0]}")

    # ========================================================
    # MODEL
    # ========================================================

    model = Sequential([
        LSTM(64, input_shape=(LAG_DAYS, X_train_all.shape[2])),
        Dense(1)
    ])

    model.compile(optimizer="adam", loss="mse")

    model.fit(
        X_train_all,
        y_train_all,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=[EarlyStopping(patience=5, restore_best_weights=True)],
        verbose=1
    )

    # ========================================================
    # PREDICTION (NaN-SAFE)
    # ========================================================

    test_times = time.sel(time=test_idx)
    preds = np.full(
        (len(ds.latitude), len(ds.longitude), len(test_times)),
        np.nan
    )

    for i, lat in enumerate(ds.latitude.values):
        for j, lon in enumerate(ds.longitude.values):

            X_ts = X.sel(latitude=lat, longitude=lon).to_array().values.T
            y_ts = y.sel(latitude=lat, longitude=lon).values

            valid = ~np.isnan(X_ts).any(axis=1)

            if valid.sum() <= LAG_DAYS:
                continue

            X_ts = X_ts[valid]
            y_ts = y_ts[valid]

            scaler_X = MinMaxScaler()
            scaler_y = MinMaxScaler()

            X_scaled = scaler_X.fit_transform(X_ts)
            y_scaled = scaler_y.fit_transform(y_ts.reshape(-1, 1))

            test_start = np.where(test_idx.values)[0][0]

            for k in range(len(test_times)):
                t = test_start + k
                if t - LAG_DAYS < 0:
                    continue

                X_input = X_scaled[t-LAG_DAYS:t]

                if np.isnan(X_input).any():
                    continue

                pred_scaled = model.predict(
                    X_input.reshape(1, LAG_DAYS, -1),
                    verbose=0
                )

                preds[i, j, k] = scaler_y.inverse_transform(pred_scaled)[0, 0]

    # ========================================================
    # OUTPUT
    # ========================================================

    pred_da = xr.DataArray(
        preds,
        dims=("latitude", "longitude", "time"),
        coords={
            "latitude": ds.latitude,
            "longitude": ds.longitude,
            "time": test_times
        },
        name=f"{pollutant}_predicted"
    )

    all_predictions[pollutant] = pred_da

    # ========================================================
    # EVALUATION (NaN-SAFE)
    # ========================================================

    error = pred_da - y_test

    mae = float(abs(error).where(np.isfinite(error)).mean())
    rmse = float(np.sqrt((error ** 2)).where(np.isfinite(error)).mean())
    bias = float(error.where(np.isfinite(error)).mean())
    corr = float(
        xr.corr(pred_da, y_test, dim="time")
        .where(np.isfinite(pred_da))
        .mean()
    )

    all_metrics[pollutant] = {
        "MAE": mae,
        "RMSE": rmse,
        "Bias": bias,
        "Correlation": corr
    }

    print("Evaluation:")
    for k, v in all_metrics[pollutant].items():
        print(f"  {k}: {v:.4f}")

# ============================================================
# FINAL SUMMARY
# ============================================================

print("\n==============================")
print("FINAL SUMMARY (2017)")
print("==============================")

for pol, metrics in all_metrics.items():
    print(f"\n{pol}")
    for k, v in metrics.items():
        print(f"  {k}: {v:.4f}")

# Optional save:
# xr.Dataset(all_predictions).to_netcdf("pollutant_predictions_2017.nc")


  if not hasattr(np, "object"):



Processing: CO
Training samples: 1139999


  super().__init__(**kwargs)


Epoch 1/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m176s[0m 5ms/step - loss: 0.0012
Epoch 2/40
[1m   19/35625[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:24[0m 6ms/step - loss: 9.1016e-04

  current = self.get_monitor_value(logs)


[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0011
Epoch 3/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 4ms/step - loss: 0.0011
Epoch 4/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 4ms/step - loss: 0.0011
Epoch 5/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 4ms/step - loss: 0.0011
Epoch 6/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 4ms/step - loss: 0.0011
Epoch 7/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 4ms/step - loss: 0.0010
Epoch 8/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 4ms/step - loss: 0.0010
Epoch 9/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 4ms/step - loss: 0.0010
Epoch 10/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 4ms/step - loss: 0.0010
Epoch 11/40
[1m35625/35625[0m [32m━━━━━━━━━━

  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Evaluation:
  MAE: 0.0347
  RMSE: 0.0347
  Bias: -0.0113
  Correlation: 0.7333

Processing: NO
Training samples: 1139954


  super().__init__(**kwargs)


Epoch 1/40
[1m35624/35624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0027
Epoch 2/40
[1m   31/35624[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:02[0m 3ms/step - loss: 0.0026 

  current = self.get_monitor_value(logs)


[1m35624/35624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 4ms/step - loss: 0.0026
Epoch 3/40
[1m35624/35624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 5ms/step - loss: 0.0025
Epoch 4/40
[1m35624/35624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 5ms/step - loss: 0.0025
Epoch 5/40
[1m35624/35624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 4ms/step - loss: 0.0025
Epoch 6/40
[1m35624/35624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 4ms/step - loss: 0.0025
Epoch 7/40
[1m35624/35624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 4ms/step - loss: 0.0024
Epoch 8/40
[1m35624/35624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 5ms/step - loss: 0.0024
Epoch 9/40
[1m35624/35624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 5ms/step - loss: 0.0024
Epoch 10/40
[1m35624/35624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 4ms/step - loss: 0.0024
Epoch 11/40
[1m35624/35624[0m [32m━━━━━━━━━━

  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Evaluation:
  MAE: 1.0579
  RMSE: 1.0579
  Bias: 0.2724
  Correlation: 0.5081

Processing: NO2
Training samples: 1139999


  super().__init__(**kwargs)


Epoch 1/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 4ms/step - loss: 0.0056
Epoch 2/40


  current = self.get_monitor_value(logs)


[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0053
Epoch 3/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 4ms/step - loss: 0.0053
Epoch 4/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m176s[0m 5ms/step - loss: 0.0052
Epoch 5/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m161s[0m 5ms/step - loss: 0.0051
Epoch 6/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0051
Epoch 7/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0050
Epoch 8/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0050
Epoch 9/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0049
Epoch 10/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 4ms/step - loss: 0.0049
Epoch 11/40
[1m35625/35625[0m [32m━━━━━━━━━━

  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Evaluation:
  MAE: 1.7395
  RMSE: 1.7395
  Bias: -0.1151
  Correlation: 0.6960

Processing: PM2p5
Training samples: 1139997


  super().__init__(**kwargs)


Epoch 1/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 4ms/step - loss: 0.0013
Epoch 2/40
[1m   29/35625[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:10[0m 4ms/step - loss: 0.0012     

  current = self.get_monitor_value(logs)


[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 4ms/step - loss: 0.0013
Epoch 3/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 4ms/step - loss: 0.0013
Epoch 4/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 4ms/step - loss: 0.0012
Epoch 5/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 4ms/step - loss: 0.0012
Epoch 6/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 4ms/step - loss: 0.0012
Epoch 7/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m170s[0m 5ms/step - loss: 0.0012
Epoch 8/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 5ms/step - loss: 0.0012
Epoch 9/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 5ms/step - loss: 0.0012
Epoch 10/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m176s[0m 5ms/step - loss: 0.0012
Epoch 11/40
[1m35625/35625[0m [32m━━━━━━━━━━

  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Evaluation:
  MAE: 4.9140
  RMSE: 4.9140
  Bias: -0.1606
  Correlation: 0.5824

Processing: PM10
Training samples: 1139998


  super().__init__(**kwargs)


Epoch 1/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 3ms/step - loss: 0.0014
Epoch 2/40
[1m   27/35625[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:22[0m 4ms/step - loss: 0.0010 

  current = self.get_monitor_value(logs)


[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0013
Epoch 3/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0013
Epoch 4/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0013
Epoch 5/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 4ms/step - loss: 0.0013
Epoch 6/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0013
Epoch 7/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 5ms/step - loss: 0.0013
Epoch 8/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 5ms/step - loss: 0.0012
Epoch 9/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m168s[0m 5ms/step - loss: 0.0012
Epoch 10/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0012
Epoch 11/40
[1m35625/35625[0m [32m━━━━━━━━━━

  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


In [None]:
# ============================================================
# DAILY POLLUTANT PREDICTION WITH LSTM (PER-GRID)
# Using explicit file paths (IPMA Iberia)
# ============================================================

import numpy as np
import xarray as xr

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

# ============================================================
# FILE PATHS (USER-PROVIDED)
# ============================================================

files = {
    'CO':     r"D:\IPMA\Results\co_fire_meteo_Iberia.nc",
    'NO':     r"D:\IPMA\Results\no_fire_meteo_Iberia.nc",
    'NO2':    r"D:\IPMA\Results\no2_fire_meteo_Iberia.nc",
    'PM2p5':  r"D:\IPMA\Results\pm2p5_fire_meteo_Iberia.nc",
    'PM10':   r"D:\IPMA\Results\pm10_fire_meteo_Iberia.nc"
}

# ============================================================
# USER SETTINGS
# ============================================================

POLLUTANT_VAR = "Mean"      # daily mean concentration
TEST_YEAR = 2017
TEST_MONTH = None           # None or integer 1–12
LAG_DAYS = 14
EPOCHS = 40
BATCH_SIZE = 32

# Input variables for the model
INPUT_VARS = [
    "Mean",                          # lagged pollutant
    "temp_Max",
    "wind_Max",
    "precip_Total_Precipitation",
    "frp_sum_Iberia"
]

# ============================================================
# HELPER FUNCTIONS
# ============================================================

def build_sequences(X, y, lags):
    X_out, y_out = [], []
    for i in range(lags, len(X)):
        X_out.append(X[i-lags:i])
        y_out.append(y[i])
    return np.array(X_out), np.array(y_out)


def train_mask(time):
    return time.dt.year != TEST_YEAR


def test_mask(time):
    mask = time.dt.year == TEST_YEAR
    if TEST_MONTH is not None:
        mask = mask & (time.dt.month == TEST_MONTH)
    return mask

# ============================================================
# STORAGE
# ============================================================

all_predictions = {}
all_metrics = {}

# ============================================================
# MAIN LOOP OVER POLLUTANTS
# ============================================================

for pollutant, path in files.items():

    print(f"\n==============================")
    print(f"Processing pollutant: {pollutant}")
    print(f"File: {path}")
    print(f"==============================")

    ds = xr.open_dataset(path)

    # -------------------------------
    # CHECK REQUIRED VARIABLES
    # -------------------------------
    for var in INPUT_VARS:
        if var not in ds:
            raise KeyError(f"{var} not found in {path}")

    # -------------------------------
    # DEFINE INPUTS / TARGET
    # -------------------------------
    y = ds[POLLUTANT_VAR]
    X = xr.merge([ds[var] for var in INPUT_VARS])

    time = ds.time
    train_idx = train_mask(time)
    test_idx = test_mask(time)

    X_train = X.sel(time=train_idx)
    y_train = y.sel(time=train_idx)

    X_test = X.sel(time=test_idx)
    y_test = y.sel(time=test_idx)

    # -------------------------------
    # BUILD TRAINING DATASET
    # -------------------------------
    X_all, y_all = [], []

    for lat in ds.latitude.values:
        for lon in ds.longitude.values:

            X_ts = X_train.sel(latitude=lat, longitude=lon).to_array().values.T
            y_ts = y_train.sel(latitude=lat, longitude=lon).values

            if np.isnan(X_ts).any() or np.isnan(y_ts).any():
                continue

            if len(y_ts) <= LAG_DAYS:
                continue

            scaler_X = MinMaxScaler()
            scaler_y = MinMaxScaler()

            X_scaled = scaler_X.fit_transform(X_ts)
            y_scaled = scaler_y.fit_transform(y_ts.reshape(-1, 1)).ravel()

            X_seq, y_seq = build_sequences(X_scaled, y_scaled, LAG_DAYS)

            X_all.append(X_seq)
            y_all.append(y_seq)

    X_train_all = np.concatenate(X_all)
    y_train_all = np.concatenate(y_all)

    print(f"Training samples: {X_train_all.shape[0]}")

    # -------------------------------
    # LSTM MODEL
    # -------------------------------
    model = Sequential([
        LSTM(64, input_shape=(LAG_DAYS, X_train_all.shape[2])),
        Dense(1)
    ])

    model.compile(optimizer="adam", loss="mse")

    model.fit(
        X_train_all,
        y_train_all,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=[EarlyStopping(patience=5, restore_best_weights=True)],
        verbose=1
    )

    # -------------------------------
    # PREDICTION FOR TEST PERIOD
    # -------------------------------
    test_times = time.sel(time=test_idx)
    preds = np.zeros((len(ds.latitude), len(ds.longitude), len(test_times)))

    for i, lat in enumerate(ds.latitude.values):
        for j, lon in enumerate(ds.longitude.values):

            X_ts = X.sel(latitude=lat, longitude=lon).to_array().values.T
            y_ts = y.sel(latitude=lat, longitude=lon).values

            if np.isnan(X_ts).any() or np.isnan(y_ts).any():
                preds[i, j, :] = np.nan
                continue

            scaler_X = MinMaxScaler()
            scaler_y = MinMaxScaler()

            X_scaled = scaler_X.fit_transform(X_ts)
            y_scaled = scaler_y.fit_transform(y_ts.reshape(-1, 1))

            test_start = np.where(test_idx.values)[0][0]

            for k in range(len(test_times)):
                t = test_start + k
                X_input = X_scaled[t-LAG_DAYS:t]
                pred_scaled = model.predict(
                    X_input.reshape(1, LAG_DAYS, -1),
                    verbose=0
                )
                preds[i, j, k] = scaler_y.inverse_transform(pred_scaled)[0, 0]

    # -------------------------------
    # XR OUTPUT
    # -------------------------------
    pred_da = xr.DataArray(
        preds,
        dims=("latitude", "longitude", "time"),
        coords={
            "latitude": ds.latitude,
            "longitude": ds.longitude,
            "time": test_times
        },
        name=f"{pollutant}_predicted"
    )

    all_predictions[pollutant] = pred_da

    # -------------------------------
    # EVALUATION METRICS
    # -------------------------------
    error = pred_da - y_test

    mae = float(abs(error).mean())
    rmse = float(np.sqrt((error ** 2).mean()))
    bias = float(error.mean())
    corr = float(xr.corr(pred_da, y_test, dim="time").mean())

    all_metrics[pollutant] = {
        "MAE": mae,
        "RMSE": rmse,
        "Bias": bias,
        "Correlation": corr
    }

    print("Evaluation metrics:")
    for k, v in all_metrics[pollutant].items():
        print(f"  {k}: {v:.4f}")

# ============================================================
# FINAL SUMMARY
# ============================================================

print("\n==============================")
print("FINAL SUMMARY (2017 prediction)")
print("==============================")

for pol, metrics in all_metrics.items():
    print(f"\n{pol}")
    for k, v in metrics.items():
        print(f"  {k}: {v:.4f}")

# Optional save:
# xr.Dataset(all_predictions).to_netcdf("pollutant_predictions_2017.nc")



Processing pollutant: CO
File: D:\IPMA\Results\co_fire_meteo_Iberia.nc
Training samples: 1139999


  super().__init__(**kwargs)


Epoch 1/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 5ms/step - loss: 0.0012
Epoch 2/40
[1m   11/35625[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:01[0m 5ms/step - loss: 8.7911e-04

  current = self.get_monitor_value(logs)


[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m176s[0m 5ms/step - loss: 0.0011
Epoch 3/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 5ms/step - loss: 0.0011
Epoch 4/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m158s[0m 4ms/step - loss: 0.0011
Epoch 5/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 3ms/step - loss: 0.0011
Epoch 6/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 4ms/step - loss: 0.0011
Epoch 7/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 4ms/step - loss: 0.0010
Epoch 8/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0010
Epoch 9/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0010
Epoch 10/40
[1m35625/35625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 4ms/step - loss: 0.0010
Epoch 11/40
[1m35625/35625[0m [32m━━━━━━━━━━

  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Evaluation metrics:
  MAE: 0.0378
  RMSE: 0.4157
  Bias: 0.0070
  Correlation: 0.7204

Processing pollutant: NO
File: D:\IPMA\Results\no_fire_meteo_Iberia.nc
