In [2]:
import tensorflow as tf
import joblib, pathlib
import numpy as np
import sys, pathlib


# Añadir src/ al path para poder importar config
PROJECT_ROOT = pathlib.Path().resolve().parent.parent  
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))
from src import config as cfg

# --- Cargar dataset procesado ---
ruta = cfg.DATA / "processed" / "gru5d_data.pkl"
print("📦 Cargando datos desde:", ruta)

data = joblib.load(ruta)

X, y = data["X"], data["y"]
fechas = data["dates"]
print(f"✅ X shape: {X.shape}")
print(f"✅ y shape: {y.shape}")
print(f"📅 Fechas disponibles: {fechas.min()} → {fechas.max()}")

📦 Cargando datos desde: C:\Users\1M72763\Desktop\TFM\data\processed\gru5d_data.pkl
✅ X shape: (4450, 60, 80)
✅ y shape: (4450, 40)
📅 Fechas disponibles: 2012-08-22 00:00:00 → 2025-06-21 00:00:00


In [3]:
from sklearn.preprocessing import StandardScaler

# --- División temporal ---
train_mask = fechas < "2019-01-01"
val_mask   = (fechas >= "2019-01-01") & (fechas < "2021-01-01")
test_mask  = fechas >= "2021-01-01"

X_train, y_train = X[train_mask], y[train_mask]
X_val, y_val     = X[val_mask], y[val_mask]
X_test, y_test   = X[test_mask], y[test_mask]

print(f"🔹 Train: {X_train.shape}")
print(f"🔹 Val:   {X_val.shape}")
print(f"🔹 Test:  {X_test.shape}")

print("🗓️ Rango fechas:")
print("Train:", fechas[train_mask].min(), "→", fechas[train_mask].max())
print("Val:  ", fechas[val_mask].min(), "→", fechas[val_mask].max())
print("Test: ", fechas[test_mask].min(), "→", fechas[test_mask].max())

# ✅ ESCALADO AÑADIDO - CRÍTICO para consistencia con backtest
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_val_scaled = scaler_X.transform(X_val.reshape(-1, X_val.shape[-1])).reshape(X_val.shape)
X_test_scaled = scaler_X.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train)
y_val_scaled = scaler_y.transform(y_val)
y_test_scaled = scaler_y.transform(y_test)

# Guardar escaladores para backtest
cfg.MODELS.mkdir(parents=True, exist_ok=True)
joblib.dump(scaler_X, cfg.MODELS / "scaler_X_gru5d.pkl")
joblib.dump(scaler_y, cfg.MODELS / "scaler_y_gru5d.pkl")
print("✅ Escaladores guardados para backtest")

🔹 Train: (2086, 60, 80)
🔹 Val:   (731, 60, 80)
🔹 Test:  (1633, 60, 80)
🗓️ Rango fechas:
Train: 2012-08-22 00:00:00 → 2018-12-31 00:00:00
Val:   2019-01-01 00:00:00 → 2020-12-31 00:00:00
Test:  2021-01-01 00:00:00 → 2025-06-21 00:00:00
✅ Escaladores guardados para backtest


In [4]:
# --- Crear modelo GRU ---
print("🛠️  Definiendo arquitectura...")

inputs = tf.keras.Input(shape=X.shape[1:])
x = tf.keras.layers.GRU(64, return_sequences=True)(inputs)
x = tf.keras.layers.GRU(32)(x)
outputs = tf.keras.layers.Dense(y.shape[1])(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss="mse",
    metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

model.summary()


🛠️  Definiendo arquitectura...


In [5]:
# --- Entrenamiento ---
ckpt_path = pathlib.Path(cfg.MODELS) / "gru5d.keras"
print("📁 Checkpoint se guardará en:", ckpt_path)

# ✅ Verificar datos ESCALADOS
print("🔍 NaNs en X_train_scaled:", np.isnan(X_train_scaled).sum())
print("🔍 NaNs en y_train_scaled:", np.isnan(y_train_scaled).sum())
print("🔍 Rango X_train_scaled  :", np.min(X_train_scaled), "→", np.max(X_train_scaled))
print("🔍 Rango y_train_scaled  :", np.min(y_train_scaled), "→", np.max(y_train_scaled))

es_cb = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

# ✅ Entrenar con datos ESCALADOS
history = model.fit(
    X_train_scaled, y_train_scaled,
    validation_data=(X_val_scaled, y_val_scaled),
    epochs=50,
    batch_size=32,
    callbacks=[es_cb],
    verbose=2
)

print("🏁 Entrenamiento finalizado.")

📁 Checkpoint se guardará en: C:\Users\1M72763\Desktop\TFM\models\gru5d.keras
🔍 NaNs en X_train_scaled: 0
🔍 NaNs en y_train_scaled: 0
🔍 Rango X_train_scaled  : -13.168951 → 44.82546
🔍 Rango y_train_scaled  : -7.228031 → 20.053337
Epoch 1/50
66/66 - 5s - 73ms/step - loss: 1.0170 - root_mean_squared_error: 1.0085 - val_loss: 2.0701 - val_root_mean_squared_error: 1.4388
Epoch 2/50
66/66 - 2s - 36ms/step - loss: 0.9421 - root_mean_squared_error: 0.9706 - val_loss: 2.1151 - val_root_mean_squared_error: 1.4543
Epoch 3/50
66/66 - 2s - 31ms/step - loss: 0.8555 - root_mean_squared_error: 0.9250 - val_loss: 2.1254 - val_root_mean_squared_error: 1.4579
Epoch 4/50
66/66 - 2s - 31ms/step - loss: 0.7656 - root_mean_squared_error: 0.8750 - val_loss: 2.1894 - val_root_mean_squared_error: 1.4797
Epoch 5/50
66/66 - 2s - 35ms/step - loss: 0.7145 - root_mean_squared_error: 0.8453 - val_loss: 2.2176 - val_root_mean_squared_error: 1.4891
Epoch 6/50
66/66 - 2s - 32ms/step - loss: 0.6691 - root_mean_squared_er

In [6]:
print("📊 Evaluando en test...")
# ✅ Evaluar con datos escalados y desescalar predicciones para comparar
y_pred_scaled = model.predict(X_test_scaled)
y_pred = scaler_y.inverse_transform(y_pred_scaled)

# RMSE en escala original para comparabilidad
rmse = np.sqrt(((y_test - y_pred)**2).mean(axis=0))
rmse_mean = rmse.mean()
print("✅ RMSE medio (escala original):", rmse_mean)

# --- Guardado ---
model.save(cfg.MODELS / "gru5d.keras")
joblib.dump(history.history, cfg.RESULT / "history_gru5d.pkl")
joblib.dump(rmse_mean, cfg.RESULT / "rmse_gru5d.pkl")

print("✅ Modelo y resultados guardados correctamente.")

📊 Evaluando en test...
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
✅ RMSE medio (escala original): 0.007461296
✅ Modelo y resultados guardados correctamente.
