In [1]:
import joblib
import pandas as pd
import sys, pathlib

# Setup ruta al proyecto
PROJECT_ROOT = pathlib.Path().resolve().parent.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from src import config as cfg

# Cargar datos procesados específicos para CNN
data = joblib.load(cfg.DATA / "processed" / "cnn5d_data.pkl")
df_prices = pd.read_parquet(cfg.DATA / "raw" / "prices.parquet")

X = data["X"]
y = data["y"]


In [3]:
import numpy as np

mask = ~np.isnan(y).any(axis=1) & ~np.isinf(y).any(axis=1)
X = X[mask]
y = y[mask]

print("✅ X:", X.shape, "y:", y.shape)


✅ X: (4455, 60, 80) y: (4455, 40)


In [7]:
n = len(X)
train_size = int(n * 0.7)
val_size = int(n * 0.15)

X_train = X[:train_size]
y_train = y[:train_size]

X_val = X[train_size:train_size + val_size]
y_val = y[train_size:train_size + val_size]

X_test = X[train_size + val_size:]
y_test = y[train_size + val_size:]


from sklearn.preprocessing import StandardScaler

# Escalar entradas como (n_total * steps, features)
X_all_2d = X.reshape(-1, X.shape[2])
scaler_X = StandardScaler()
X_all_scaled = scaler_X.fit_transform(X_all_2d).reshape(X.shape)

X_train_scaled = X_all_scaled[:train_size]
X_val_scaled   = X_all_scaled[train_size:train_size + val_size]
X_test_scaled  = X_all_scaled[train_size + val_size:]

print("✅ X escalado correctamente:", X_train_scaled.shape)



✅ X escalado correctamente: (3118, 60, 80)


In [9]:
# Filtrar muestras inválidas en X antes de escalar y
mask_valid = ~np.isnan(X_train_scaled).any(axis=(1, 2))
X_train_scaled = X_train_scaled[mask_valid]
y_train = y_train[mask_valid]

from sklearn.preprocessing import StandardScaler
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train)

# Validación sin escalar (pero revisable si se desea también escalar)
y_val_scaled = scaler_y.transform(y_val)
y_test_scaled = scaler_y.transform(y_test)

# Guardar escaladores
joblib.dump(scaler_X, cfg.MODELS / "scaler_X_cnn5d.pkl")
joblib.dump(scaler_y, cfg.MODELS / "scaler_y_cnn5d.pkl")

print("✅ Escalado completado. Train:", X_train_scaled.shape)


✅ Escalado completado. Train: (3113, 60, 80)


In [11]:
import tensorflow as tf
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Input(shape=(cfg.WINDOW, X.shape[2])),  # (60, 2N)
    layers.Conv1D(32, kernel_size=3, padding="causal", activation="relu"),
    layers.Conv1D(32, kernel_size=3, padding="causal", activation="relu"),
    layers.GlobalAveragePooling1D(),
    layers.Dense(y.shape[1])  # 1 predicción por activo
])

model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss="mse")
model.summary()


In [13]:
print("Check X_train_scaled:", np.isnan(X_train_scaled).sum(), "NaNs /", np.isinf(X_train_scaled).sum(), "Infs")
print("Check y_train_scaled:", np.isnan(y_train_scaled).sum(), "NaNs /", np.isinf(y_train_scaled).sum(), "Infs")
print("Check X_val_scaled:", np.isnan(X_val_scaled).sum(), "NaNs /", np.isinf(X_val_scaled).sum(), "Infs")
print("Check y_val_scaled:", np.isnan(y_val_scaled).sum(), "NaNs /", np.isinf(y_val_scaled).sum(), "Infs")


Check X_train_scaled: 0 NaNs / 0 Infs
Check y_train_scaled: 0 NaNs / 0 Infs
Check X_val_scaled: 0 NaNs / 0 Infs
Check y_val_scaled: 0 NaNs / 0 Infs


In [15]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(patience=5, restore_best_weights=True)

history = model.fit(
    X_train_scaled, y_train_scaled,
    validation_data=(X_val_scaled, y_val_scaled),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)


Epoch 1/50
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 0.9842 - val_loss: 1.4311
Epoch 2/50
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 1.0439 - val_loss: 1.4286
Epoch 3/50
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.9779 - val_loss: 1.4432
Epoch 4/50
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.9481 - val_loss: 1.4513
Epoch 5/50
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.9495 - val_loss: 1.4605
Epoch 6/50
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.9694 - val_loss: 1.4266
Epoch 7/50
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 0.9293 - val_loss: 1.4430
Epoch 8/50
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.9521 - val_loss: 1.4730
Epoch 9/50
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [17]:
from pathlib import Path

Path(cfg.MODELS).mkdir(parents=True, exist_ok=True)

model.save(cfg.MODELS / "cnn5d.keras")
print("✅ Modelo guardado en formato .keras")

joblib.dump(history.history, cfg.RESULT / "history_cnn5d.pkl")


✅ Modelo guardado en formato .keras


['C:\\Users\\ferra\\Documents\\TFM\\results\\history_cnn5d.pkl']

In [19]:
y_pred = model.predict(X_test_scaled)
rmse = np.sqrt(((y_test_scaled - y_pred)**2).mean(axis=0))
rmse_mean = rmse.mean()

print("📉 RMSE medio:", rmse_mean)

joblib.dump(rmse_mean, cfg.RESULT / "rmse_cnn5d.pkl")


[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
📉 RMSE medio: 1.0555992080260754


['C:\\Users\\ferra\\Documents\\TFM\\results\\rmse_cnn5d.pkl']