In [24]:
import sys, pathlib
import joblib
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
from pathlib import Path
from src import config as cfg
PROJECT_ROOT = pathlib.Path().resolve().parent.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))
from src import config as cfg

# ── Cargar datos
data = joblib.load(cfg.DATA / "processed" / "lstm_data.pkl")
X, y = data["X"], data["y"]
fechas = pd.to_datetime(data["dates"])
print("✅ Datos cargados:", X.shape, y.shape)


✅ Datos cargados: (3239, 60, 40) (3239, 40)


In [26]:
# ── División temporal por fechas
train_mask = fechas < "2019-01-01"
val_mask   = (fechas >= "2019-01-01") & (fechas < "2021-01-01")
test_mask  = fechas >= "2021-01-01"

X_train, y_train = X[train_mask], y[train_mask]
X_val, y_val     = X[val_mask], y[val_mask]
X_test, y_test   = X[test_mask], y[test_mask]

print("🔹 Train:", X_train.shape)
print("🔹 Val:  ", X_val.shape)
print("🔹 Test: ", X_test.shape)


print("🗓️ Rango fechas:")
print("Train:", fechas[train_mask].min(), "→", fechas[train_mask].max())
print("Val:  ", fechas[val_mask].min(), "→", fechas[val_mask].max())
print("Test: ", fechas[test_mask].min(), "→", fechas[test_mask].max())

🔹 Train: (1609, 60, 40)
🔹 Val:   (505, 60, 40)
🔹 Test:  (1125, 60, 40)
🗓️ Rango fechas:
Train: 2012-08-08 00:00:00 → 2018-12-31 00:00:00
Val:   2019-01-02 00:00:00 → 2020-12-31 00:00:00
Test:  2021-01-04 00:00:00 → 2025-06-26 00:00:00


In [28]:
# ── Escalado
scaler_X = StandardScaler()
X_train_2d = X_train.reshape(-1, X.shape[2])
X_train_scaled = scaler_X.fit_transform(X_train_2d).reshape(X_train.shape)

X_val_scaled  = scaler_X.transform(X_val.reshape(-1, X.shape[2])).reshape(X_val.shape)
X_test_scaled = scaler_X.transform(X_test.reshape(-1, X.shape[2])).reshape(X_test.shape)

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train)
y_val_scaled   = scaler_y.transform(y_val)
y_test_scaled  = scaler_y.transform(y_test)

# Guardar escaladores
joblib.dump(scaler_X, cfg.MODELS / "scaler_X_lstm.pkl")
joblib.dump(scaler_y, cfg.MODELS / "scaler_y_lstm.pkl")
print("✅ Escaladores guardados.")

✅ Escaladores guardados.


In [17]:
import tensorflow as tf
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Input(shape=(60, X.shape[2])),  # 60 días, 38 activos
    layers.LSTM(64, return_sequences=True),
    layers.Dropout(0.2),
    layers.LSTM(32),
    layers.Dense(X.shape[2])  # una predicción por activo
])

model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss="mse")
model.summary()


In [19]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 87ms/step - loss: 0.0031 - val_loss: 0.0044
Epoch 2/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 57ms/step - loss: 0.0012 - val_loss: 0.0041
Epoch 3/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 69ms/step - loss: 7.9786e-04 - val_loss: 0.0040
Epoch 4/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 59ms/step - loss: 0.0014 - val_loss: 0.0039
Epoch 5/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 76ms/step - loss: 0.0013 - val_loss: 0.0039
Epoch 6/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 66ms/step - loss: 4.2210e-04 - val_loss: 0.0039
Epoch 7/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 76ms/step - loss: 9.2422e-04 - val_loss: 0.0038
Epoch 8/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 64ms/step - loss: 4.9138e-04 - val_loss: 0.0038
Epoch 9/50
[1m36/36[0m [32m━━

In [21]:
import joblib
joblib.dump(history.history, cfg.RESULT / "history_lstm.pkl")
print("✅ Histórico de entrenamiento guardado.")

✅ Histórico de entrenamiento guardado.


In [23]:
from pathlib import Path

# Crear carpeta si no existe
Path("models").mkdir(parents=True, exist_ok=True)

# Guardar el modelo
model.save(cfg.MODELS / "lstm_t1.keras")
print("✅ Modelo guardado en formato .keras")


✅ Modelo guardado en formato .keras


In [25]:
from sklearn.metrics import mean_squared_error
import numpy as np

# Predicción en test
y_pred = model.predict(X_test)

# RMSE por activo
rmse = np.sqrt(((y_test - y_pred)**2).mean(axis=0))
rmse_mean = rmse.mean()
print("RMSE medio:", rmse_mean)
joblib.dump(rmse_mean, cfg.RESULT / "rmse_lstm.pkl")
print("✅ RMSE guardado.")


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 64ms/step
RMSE medio: 0.020582965
✅ RMSE guardado.


NameError: name 'df_prices' is not defined

In [5]:
print(df_prices.index)
print("Primera fecha:", df_prices.index.min())
print("Última fecha:", df_prices.index.max())


NameError: name 'df_prices' is not defined

In [7]:
import joblib
import pandas as pd
import sys, pathlib

PROJECT_ROOT = pathlib.Path().resolve().parent.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from src import config as cfg

data = joblib.load(cfg.DATA / "processed" / "lstm_data.pkl")
df_prices = pd.read_parquet(cfg.DATA / "raw" / "prices.parquet")


In [9]:
# ¿Cómo está indexado df_prices?
print("Tipo de índice:", type(df_prices.index))
print(df_prices.index)

# Si es MultiIndex, verifica los niveles
if isinstance(df_prices.index, pd.MultiIndex):
    print("Niveles:", df_prices.index.names)
    fechas = df_prices.index.get_level_values("date")
else:
    fechas = df_prices.index

# Verifica el rango y el orden
print("Primera fecha:", fechas.min())
print("Última fecha:", fechas.max())
print("¿Está ordenado?:", fechas.is_monotonic_increasing)


Tipo de índice: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
               '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
               '2010-01-14', '2010-01-15',
               ...
               '2025-06-17', '2025-06-18', '2025-06-19', '2025-06-20',
               '2025-06-21', '2025-06-22', '2025-06-23', '2025-06-24',
               '2025-06-25', '2025-06-26'],
              dtype='datetime64[ns]', name='Date', length=5120, freq=None)
Primera fecha: 2010-01-04 00:00:00
Última fecha: 2025-06-26 00:00:00
¿Está ordenado?: True
