In [49]:
import sys, pathlib
import joblib
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
from pathlib import Path

PROJECT_ROOT = pathlib.Path().resolve().parent.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))
from src import config as cfg

# ── Cargar datos
data = joblib.load(cfg.DATA / "processed" / "lstm_data.pkl")
X, y = data["X"], data["y"]
fechas = pd.to_datetime(data["dates"])
print("✅ Datos cargados:", X.shape, y.shape)


✅ Datos cargados: (3239, 60, 40) (3239, 40)


In [51]:
# ── División temporal por fechas
train_mask = fechas < "2019-01-01"
val_mask   = (fechas >= "2019-01-01") & (fechas < "2021-01-01")
test_mask  = fechas >= "2021-01-01"

X_train, y_train = X[train_mask], y[train_mask]
X_val, y_val     = X[val_mask], y[val_mask]
X_test, y_test   = X[test_mask], y[test_mask]

print("🔹 Train:", X_train.shape)
print("🔹 Val:  ", X_val.shape)
print("🔹 Test: ", X_test.shape)


print("🗓️ Rango fechas:")
print("Train:", fechas[train_mask].min(), "→", fechas[train_mask].max())
print("Val:  ", fechas[val_mask].min(), "→", fechas[val_mask].max())
print("Test: ", fechas[test_mask].min(), "→", fechas[test_mask].max())

🔹 Train: (1609, 60, 40)
🔹 Val:   (505, 60, 40)
🔹 Test:  (1125, 60, 40)
🗓️ Rango fechas:
Train: 2012-08-08 00:00:00 → 2018-12-31 00:00:00
Val:   2019-01-02 00:00:00 → 2020-12-31 00:00:00
Test:  2021-01-04 00:00:00 → 2025-06-26 00:00:00


In [53]:
# ── Escalado
X_train_scaled = X_train          #  ya vienen escalados
X_val_scaled   = X_val
X_test_scaled  = X_test

y_train_scaled = y_train          #  idem
y_val_scaled   = y_val
y_test_scaled  = y_test


In [55]:
# ── Definir modelo
model = models.Sequential([
    layers.Input(shape=(cfg.WINDOW, X.shape[2])),
    layers.LSTM(64, return_sequences=True),
    layers.Dropout(0.2),
    layers.LSTM(32),
    layers.Dense(y.shape[1])
])
model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss="mse")
model.summary()

In [57]:
# ── Entrenar
early_stop = EarlyStopping(patience=5, restore_best_weights=True)
history = model.fit(
    X_train_scaled, y_train_scaled,
    validation_data=(X_val_scaled, y_val_scaled),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 101ms/step - loss: 0.8651 - val_loss: 1.6995
Epoch 2/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 75ms/step - loss: 0.6826 - val_loss: 1.6987
Epoch 3/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 65ms/step - loss: 0.7259 - val_loss: 1.6990
Epoch 4/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 73ms/step - loss: 0.7214 - val_loss: 1.6991
Epoch 5/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 74ms/step - loss: 0.7418 - val_loss: 1.6996
Epoch 6/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 69ms/step - loss: 0.7939 - val_loss: 1.7076
Epoch 7/50
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 70ms/step - loss: 0.6993 - val_loss: 1.7014


In [64]:
# ── Evaluar en test (coherente con el nuevo pipeline) ──
y_pred = model.predict(X_test)                    # <─ X_test YA escalado
rmse   = np.sqrt(((y_test - y_pred)**2).mean(axis=0))
rmse_mean = rmse.mean()
print("📉 RMSE medio:", rmse_mean)
joblib.dump(rmse_mean, cfg.RESULT / "rmse_lstm.pkl")
print("✅ RMSE guardado.")


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
📉 RMSE medio: 1.0181041
✅ RMSE guardado.


In [66]:
# ── Guardar histórico y modelo (sin cambios) ──
joblib.dump(history.history, cfg.RESULT / "history_lstm.pkl")
print("✅ Histórico de entrenamiento guardado.")

Path(cfg.MODELS).mkdir(parents=True, exist_ok=True)
model.save(cfg.MODELS / "lstm_t1.keras")
print("✅ Modelo guardado.")


✅ Histórico de entrenamiento guardado.
✅ Modelo guardado.
