In [2]:
import sys, pathlib
import joblib
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping

# Setup ruta al proyecto
PROJECT_ROOT = pathlib.Path().resolve().parent.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from src import config as cfg

# === 1. Cargar datos ===
data_path = cfg.DATA / "processed" / "cnn5d_data.pkl"
data      = joblib.load(data_path)

X, y   = data["X"], data["y"]            # X shape ⇒ (N, 60, 80, 2)
fechas = pd.to_datetime(data["dates"])

print(f"Dataset       : {data_path.name}")
print(f"X shape       : {X.shape}  (N, WINDOW, assets, channels)")
print(f"y shape       : {y.shape}  (N, assets)")
print(f"Fechas rango  : {fechas.min()}  →  {fechas.max()}")

Dataset       : cnn5d_data.pkl
X shape       : (4450, 60, 40, 2)  (N, WINDOW, assets, channels)
y shape       : (4450, 40)  (N, assets)
Fechas rango  : 2012-08-22 00:00:00  →  2025-06-21 00:00:00


In [3]:
train_mask = fechas <  "2019-01-01"
val_mask   = (fechas >= "2019-01-01") & (fechas < "2021-01-01")
test_mask  = fechas >= "2021-01-01"

X_train, y_train = X[train_mask], y[train_mask]
X_val,   y_val   = X[val_mask],   y[val_mask]
X_test,  y_test  = X[test_mask],  y[test_mask]

print("\n── Splits ──")
print("Train:", X_train.shape, fechas[train_mask].min(), "→", fechas[train_mask].max())
print("Val  :", X_val.shape,   fechas[val_mask].min(),   "→", fechas[val_mask].max())
print("Test :", X_test.shape,  fechas[test_mask].min(),  "→", fechas[test_mask].max())



── Splits ──
Train: (2086, 60, 40, 2) 2012-08-22 00:00:00 → 2018-12-31 00:00:00
Val  : (731, 60, 40, 2) 2019-01-01 00:00:00 → 2020-12-31 00:00:00
Test : (1633, 60, 40, 2) 2021-01-01 00:00:00 → 2025-06-21 00:00:00


In [4]:
n_assets = X.shape[2]    # 80
n_chan   = X.shape[3]    # 2   (ret, momentum)
n_feats  = n_assets * n_chan

scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(
    X_train.reshape(-1, n_feats)
).reshape(X_train.shape)

# aplicar el mismo scaler a val y test
X_val_scaled  = scaler_X.transform(X_val.reshape(-1, n_feats)).reshape(X_val.shape)
X_test_scaled = scaler_X.transform(X_test.reshape(-1, n_feats)).reshape(X_test.shape)

# (b) target
scaler_y      = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train)
y_val_scaled   = scaler_y.transform(y_val)
y_test_scaled  = scaler_y.transform(y_test)

# guardar para el back-test
cfg.MODELS.mkdir(parents=True, exist_ok=True)
joblib.dump(scaler_X, cfg.MODELS / "scaler_X_cnn5d.pkl")
joblib.dump(scaler_y, cfg.MODELS / "scaler_y_cnn5d.pkl")
print("Escaladores guardados en", cfg.MODELS)

Escaladores guardados en C:\Users\1M72763\Desktop\TFM\models


In [5]:
# === 4. Definir modelo CNN ===
model = models.Sequential([
    layers.Input(shape=(cfg.WINDOW, n_assets, n_chan)),   # (60, 80, 2)
    layers.Conv2D(32, kernel_size=(3, 1), padding="same", activation="relu"),
    layers.Conv2D(32, kernel_size=(3, 1), padding="same", activation="relu"),
    layers.GlobalAveragePooling2D(),
    layers.Dense(y.shape[1])          # 40 salidas (1 por activo en y)
])

model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
              loss="mse")
model.summary()

# === 5. Entrenamiento ===
early_stop = EarlyStopping(patience=5, restore_best_weights=True)

history = model.fit(
    X_train_scaled, y_train_scaled,
    validation_data=(X_val_scaled, y_val_scaled),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)


Epoch 1/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 38ms/step - loss: 1.0590 - val_loss: 2.0358
Epoch 2/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 35ms/step - loss: 1.0017 - val_loss: 2.0357
Epoch 3/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 35ms/step - loss: 1.0505 - val_loss: 2.0348
Epoch 4/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - loss: 1.0033 - val_loss: 2.0351
Epoch 5/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - loss: 0.9644 - val_loss: 2.0348
Epoch 6/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - loss: 0.9813 - val_loss: 2.0342
Epoch 7/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - loss: 1.0237 - val_loss: 2.0346
Epoch 8/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 34ms/step - loss: 0.9645 - val_loss: 2.0338
Epoch 9/50
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━

In [6]:
# ───────────────────────── 6. Guardar modelo y training-log ────────────────
model.save(cfg.MODELS / "cnn5d.keras")
joblib.dump(history.history, cfg.RESULT / "history_cnn5d.pkl")
print("Modelo y log guardados")

# ───────────────────────── 7. Métrica out-of-sample (RMSE) ─────────────────
y_pred   = model.predict(X_test_scaled)
rmse     = np.sqrt(((y_test_scaled - y_pred) ** 2).mean(axis=0))
rmse_avg = rmse.mean()
print("RMSE medio test:", rmse_avg.round(4))

joblib.dump(rmse_avg, cfg.RESULT / "rmse_cnn5d.pkl")
print("RMSE guardado →", cfg.RESULT / "rmse_cnn5d.pkl")

 Modelo y log guardados
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
RMSE medio test: 1.1932
RMSE guardado → C:\Users\1M72763\Desktop\TFM\results\rmse_cnn5d.pkl
