In [13]:
import joblib
import pandas as pd
import sys, pathlib
PROJECT_ROOT = pathlib.Path().resolve().parent.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))
from src import config as cfg

data = joblib.load(cfg.DATA / "processed" / "lstm_data.pkl")

df_prices = pd.read_parquet(cfg.DATA / "raw" / "prices.parquet")

X = data["X"]
y = data["y"]
print("X:", X.shape, "y:", y.shape)


X: (1609, 60, 40) y: (1609, 40)


In [15]:
# División temporal: 2010–2018 / 2019–2020 / 2021–2025 aprox.
n = len(X)
train_size = int(n * 0.7)
val_size = int(n * 0.15)

X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val     = X[train_size:train_size+val_size], y[train_size:train_size+val_size]
X_test, y_test   = X[train_size+val_size:], y[train_size+val_size:]

print("Train:", X_train.shape)
print("Val:", X_val.shape)
print("Test:", X_test.shape)


Train: (1126, 60, 40)
Val: (241, 60, 40)
Test: (242, 60, 40)


In [17]:
import tensorflow as tf
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Input(shape=(60, X.shape[2])),  # 60 días, 38 activos
    layers.LSTM(64, return_sequences=True),
    layers.Dropout(0.2),
    layers.LSTM(32),
    layers.Dense(X.shape[2])  # una predicción por activo
])

model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss="mse")
model.summary()


In [19]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 87ms/step - loss: 0.0031 - val_loss: 0.0044
Epoch 2/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 57ms/step - loss: 0.0012 - val_loss: 0.0041
Epoch 3/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 69ms/step - loss: 7.9786e-04 - val_loss: 0.0040
Epoch 4/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 59ms/step - loss: 0.0014 - val_loss: 0.0039
Epoch 5/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 76ms/step - loss: 0.0013 - val_loss: 0.0039
Epoch 6/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 66ms/step - loss: 4.2210e-04 - val_loss: 0.0039
Epoch 7/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 76ms/step - loss: 9.2422e-04 - val_loss: 0.0038
Epoch 8/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 64ms/step - loss: 4.9138e-04 - val_loss: 0.0038
Epoch 9/50
[1m36/36[0m [32m━━

In [21]:
import joblib
joblib.dump(history.history, cfg.RESULT / "history_lstm.pkl")
print("✅ Histórico de entrenamiento guardado.")

✅ Histórico de entrenamiento guardado.


In [23]:
from pathlib import Path

# Crear carpeta si no existe
Path("models").mkdir(parents=True, exist_ok=True)

# Guardar el modelo
model.save(cfg.MODELS / "lstm_t1.keras")
print("✅ Modelo guardado en formato .keras")


✅ Modelo guardado en formato .keras


In [25]:
from sklearn.metrics import mean_squared_error
import numpy as np

# Predicción en test
y_pred = model.predict(X_test)

# RMSE por activo
rmse = np.sqrt(((y_test - y_pred)**2).mean(axis=0))
rmse_mean = rmse.mean()
print("RMSE medio:", rmse_mean)
joblib.dump(rmse_mean, cfg.RESULT / "rmse_lstm.pkl")
print("✅ RMSE guardado.")


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 64ms/step
RMSE medio: 0.020582965
✅ RMSE guardado.
