In [1]:
import sys, pathlib
import pandas as pd
import numpy as np
import joblib

# Añadir src/ al path para poder importar config
PROJECT_ROOT = pathlib.Path().resolve().parent.parent  # estamos en /notebooks/lstm5d
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from src import config as cfg

# === 1. Cargar precios ===
df = pd.read_parquet(cfg.DATA / "raw" / "prices.parquet")
df = df.ffill().dropna()

# === 2. Calcular retornos y características ===
ret = np.log(df / df.shift(1)).dropna()
ret5 = ret.rolling(5).sum()
vol5 = ret.rolling(5).std()
momentum = (ret5 / (vol5 + 1e-6)).clip(-10, 10)

# === 3. Definir target ===
y = ret5.shift(-5).dropna()  # retorno acumulado futuro

# === 4. Alinear features con el target ===
ret = ret.shift(1).reindex(y.index)
momentum = momentum.shift(1).reindex(y.index)

# === 5. Crear ventanas ===
X = []
for i in range(cfg.WINDOW, len(y)):
    ventana_ret = ret.iloc[i - cfg.WINDOW:i].values       # (60, N)
    ventana_mom = momentum.iloc[i - cfg.WINDOW:i].values  # (60, N)
    bloque = np.concatenate([ventana_ret, ventana_mom], axis=1)  # (60, 2N)
    X.append(bloque)

X = np.stack(X)  # shape = (n_muestras, 60, 2N)
y = y.iloc[cfg.WINDOW:].values  # shape = (n_muestras, N)

print(f"✅ X shape: {X.shape}")
print(f"✅ y shape: {y.shape}")

✅ X shape: (4455, 60, 80)
✅ y shape: (4455, 40)


In [3]:
# === 6. Guardar datos procesados ===
joblib.dump({
    "X": X,
    "y": y,
    "tickers": df.columns.tolist()
}, cfg.DATA / "processed" / "cnn5d_data.pkl")

loaded = joblib.load(cfg.DATA / "processed" / "cnn5d_data.pkl")
print("Tickers guardados:", len(loaded["tickers"]))

Tickers guardados: 40
