# Imports + helpers

In [135]:
from __future__ import annotations
import sys
import os
PROJECT_ROOT = os.path.abspath('..')
sys.path.append(PROJECT_ROOT)


import re
import json
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Dict, Any, List, Tuple, Optional, Union

import numpy as np
import pandas as pd

import torch
import torch.nn as nn

from torch.utils.data import TensorDataset, DataLoader

from sklearn.preprocessing import StandardScaler

import joblib


from machine_learning.data_collectors import (
    build_ml_dataframe,
    build_supervised_dataset,
    time_split_masks,
    purged_ts_cv_splits,
    TARGET_HORIZONS,
    TARGET_LOOKBACKS,
    parse_feat_lag
)

from machine_learning.evaluators import eval_regression, eval_regression_extended, calculate_deadzone

from python_scripts.LLM_analysis.preprocess_store_database import get_connection
from database_tier1 import TARGET_STOCKS

import random

In [136]:
    
def save_cnn_artifact():
    #TODO
    pass

def set_seed(seed: int) -> None:
    random.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [137]:
def infer_cnn_perm(
    X_columns: List[str],
    *,
    feature_cols: List[str],
    lookback: int,
    time_order: str = "oldest_to_newest",
) -> Tuple[List[int], int, int]:
    """
    Constructs perm to be able to reorder the vector (B, input_dim) to (B, C, T)

    - feature_cols defines the order of the channels (C).
    - lookback defines T.
    - time_order:
        - "oldest_to_newest": T=0 is the oldest (last = lookback-1), T=T-1 is lag0
        - "newest_to_oldest": T=0 is lag=0

    Returns: (perm, n_channels, seq_len)
    """

    col_map: Dict[Tuple[str, int], int] = {}
    for i, c in enumerate(X_columns):
        parsed = parse_feat_lag(c)
        if parsed is None:
            continue
        col_map[parsed] = i

    if time_order == "oldest_to_newest":
        lag_iter = list(reversed(range(lookback)))
    elif time_order == "newest_to_oldest":
        lag_iter = list(range(lookback))
    else:
        raise ValueError(f"invalid time_order: {time_order}")
    
    perm: List[int] = []
    missing: List[str] = []

    for feat in feature_cols:
        for lag in lag_iter:
            idx = col_map.get((feat, lag), None)
            if idx is None:
                missing.append(f"{feat} lag{lag}")
            else:
                perm.append(idx)

    input_dim_expected = len(feature_cols) * lookback
    if len(perm) != input_dim_expected:
        # fast debug
        example_cols = X_columns[:10]
        raise ValueError(
            f"Incomplete perm. Got {len(perm)} but was expecting {input_dim_expected}. "
            f"Column examples: {example_cols}\n"
            f"Missing (for instance): {missing[:20]}\n"
        )
    
    return perm, len(feature_cols), lookback

## CNN module

In [138]:
class ConvBlock1D(nn.Module):
    def __init__(
            self,
            in_ch: int,
            out_ch: int,
            *,
            kernel_size: int = 3,
            dilation: int = 0,
            dropout: float = 0.1,
            use_bn: bool = True,
    ):
        super().__init__()
        padding = ((kernel_size - 1) // 2) * dilation

        self.conv = nn.Conv1d(
            in_channels=in_ch,
            out_channels=out_ch,
            kernel_size=kernel_size,
            stride=1,
            padding=padding,
            dilation=dilation,
            bias=not use_bn
        )

        self.bn = nn.BatchNorm1d(out_ch) if use_bn else nn.Identity()
        self.act = nn.GELU()
        self.drop = nn.Dropout(dropout)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.conv(x)
        x = self.bn(x)
        x = self.act(x)
        x = self.drop(x)
        return x


class CNN1DRegressor(nn.Module):
    def __init__(
            self, *,
            input_dim: int,
            n_channels: int,
            seq_len: int,
            perm: List[int],
            conv_channels: Tuple[int, ...] = (32, 64, 64),
            kernel_size: int = 5,
            dilations: Optional[Tuple[int, ...]] = None,
            dropout: float = 0.1,
            use_bn: bool = True,
            head_hidden: int = 64,
            out_dim: int = 1
    ):
        super().__init__()
        assert input_dim == n_channels * seq_len, "input_dim must be n_channels*seq_len"

        self.input_dim = input_dim
        self.n_channels = n_channels
        self.seq_len = seq_len
        self.conv_channels = conv_channels
        self.kernel_size = kernel_size
        self.dilations = dilations
        self.dropout = dropout
        self.use_bn = use_bn
        self.head_hidden = head_hidden
        self.out_dim = out_dim

        # store perm as buffer (non-trainable)
        self.register_buffer("perm", torch.tensor(perm, dtype=torch.long), persistent=True)

        # backbone conv
        layers = []
        in_ch = n_channels

        if dilations is None:
            dilations = tuple([1] * len(conv_channels))
        assert len(dilations) == len(conv_channels), "dilations must have the same length as conv_channels"

        for out_ch, dil in zip(conv_channels, dilations):
            layers.append(
                ConvBlock1D(
                    in_ch, out_ch,
                    kernel_size=kernel_size,
                    dilation=dil,
                    # dropout=dropout,
                    use_bn=use_bn,
                )
            )
            in_ch = out_ch
        self.backbone = nn.Sequential(*layers)

        # head
        self.head = nn.Sequential(
            nn.Linear(in_ch, head_hidden),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(head_hidden, out_dim),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        x: (B, input_dim) where input_dim = C * T
        """

        if x.dim() != 2:
            raise ValueError(f"Expected x dim=2 (B, input_dim), got {tuple(x.shape)}")
        
        # Reorder columns -> (B, C*T) consistent
        x = x.index_select(1, self.perm) # type: ignore

        b = x.shape[0]
        x = x.view(b, self.n_channels, self.seq_len) # (B, C, T)

        x = self.backbone(x)        # (B, hidden, T)

        x = x.mean(dim=-1)     # (B, hidden)

        y = self.head(x)
        return y.squeeze(-1)


## Dataloaders

In [139]:
def make_loader(
        X_np: np.ndarray,
        y_np: np.ndarray,
        *,
        batch_size: int,
        shuffle: bool,
        num_workers: int = 0,
        drop_last: bool = False
) -> DataLoader:
    ds = TensorDataset(
        torch.from_numpy(X_np.astype(np.float32)),
        torch.from_numpy(y_np.astype(np.float32)),
    )
    return DataLoader(ds, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, drop_last=drop_last)


## Train / eval loops

In [140]:
@torch.no_grad()
def predict_loader(model: nn.Module, loader: DataLoader, device: torch.device) -> Tuple[np.ndarray, np.ndarray]:
    model.eval()
    ys = []
    yhat = []
    for xb, yb in loader:
        xb = xb.to(device)
        pred = model(xb).detach().cpu().numpy().reshape(-1)
        ys.append(yb.numpy().reshape(-1))
        yhat.append(pred)

    return np.concatenate(ys, axis=0), np.concatenate(yhat, axis=0)

In [141]:
@dataclass
class TrainConfig:
    lr: float = 1e-3
    weight_decay: float = 1e-3
    batch_size: int = 256
    max_epochs: int = 100
    patience: int = 15
    min_delta: float = 1e-4
    grad_clip: float = 1.0
    num_workers: int = 0
    use_amp: bool = True
    monitor_key: str = "DailyRankIC_mean" 


In [142]:


def train_cnn_one_run(
    *,
    model: nn.Module,
    train_loader: DataLoader,
    val_loader: DataLoader,
    meta_val: pd.DataFrame,
    train_cfg: TrainConfig,
    device: torch.device,
) -> Dict[str, Any]:
    
    loss_fn = nn.SmoothL1Loss(beta=0.01)
    opt = torch.optim.AdamW(model.parameters(), lr=train_cfg.lr,
                            weight_decay=train_cfg.weight_decay)
    
    use_amp = train_cfg.use_amp and device.type == "cuda"
    scaler = torch.GradScaler('cuda', enabled=use_amp)

    best_score = -np.inf
    best_state = None
    best_epoch = -1

    history = {
        "train_loss": [],
        "val_score": [],
        "val_metrics": [],
        "best_epoch": None,
        "best_score": None,
    }

    model.to(device)
    
    bad_epochs = 0
    for epoch in range(train_cfg.max_epochs):
        model.train()
        running = 0.0
        nobs = 0

        for xb, yb in train_loader:
            xb = xb.to(device)
            yb = yb.to(device)

            opt.zero_grad(set_to_none=True)

            if use_amp:
                with torch.autocast('cuda'):
                    pred = model(xb).reshape(-1)
                    loss = loss_fn(pred, yb)

                scaler.scale(loss).backward()
                scaler.unscale_(opt)
                torch.nn.utils.clip_grad_norm_(model.parameters(), train_cfg.grad_clip)
                scaler.step(opt)
                scaler.update()

            else:
                pred = model(xb).reshape(-1)
                loss = loss_fn(pred, yb)
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), train_cfg.grad_clip)
                opt.step()

            running += loss.item() * len(yb)
            nobs += len(yb)
        train_loss = running / max(nobs, 1)
        history["train_loss"].append(float(train_loss))

        # --- eval val ---
        yv_true, yv_pred = predict_loader(model, val_loader, device=device)
        val_metrics = eval_regression_extended(y_true=yv_true,
                                               y_pred=yv_pred,
                                               meta=meta_val,
                                               time_col="timestamp",
                                               group_col="symbol",
                                               deadzone=calculate_deadzone(20),
        )

        score = val_metrics.get(train_cfg.monitor_key, None)
        if score is None:
            raise KeyError(
                f"monitor_key not found ({train_cfg.monitor_key}) at val_metrics\n"
                f"Available keys: {list(val_metrics.keys())[:30]}"
            )
        history["val_score"].append(float(score))
        history["val_metrics"].append(val_metrics)

        improved = (score > best_score + train_cfg.min_delta)

        if improved:
            best_score = score
            best_epoch = epoch
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            bad_epochs = 0

        else:
            bad_epochs += 1
        if bad_epochs >= train_cfg.patience:
            print(f"early stopping")
            break
        
        if epoch % 10 == 0:
            print(f'Epoch: {epoch}/{train_cfg.max_epochs} ', "val: ", val_metrics)

    history['best_epoch'] = int(best_epoch)
    history['best_score'] = float(best_score)

    if best_state is None:
        best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}

    


    model.load_state_dict(best_state)

    return {
        "model_state": best_state,
        "best_epoch": best_epoch,
        "best_score": best_score,
        "history": history,
    }
    


In [143]:
def metrics_matrix(metrics_by_model: dict[str, dict]) -> pd.DataFrame:
    mat = pd.DataFrame(metrics_by_model)
    if "N" in mat.index:
        ordered = ["N", 'DailyRankIC_mean', 'DailyRankIC_frac_pos', 'QuantileSpread_sharpe', 'DailyIC_mean', 'HitRate(sign,deadzone)', 'AUC(Sign)', 'QuantileSpread_std', 'DailyRankIC_std']
        order = ordered + [i for i in mat.index if i not in ordered]
        mat = mat.loc[order]
    return mat


from IPython.display import display
from matplotlib.colors import LinearSegmentedColormap

def style_metrics_by_row(
    mat: pd.DataFrame,
    exclude_rows=("N",),
    lower_is_better=(),   # e.g. ("QuantileSpread_std", "RMSE", ...)
    fmt="{:.4f}",
    row_limits: dict[str, tuple[float, float]] | None = None,  # <-- NUEVO
    clip: bool = True,                                       # <-- NUEVO
):
    mat_num = mat.copy().apply(pd.to_numeric, errors="coerce")

    # gmap: 0..1 por celda (0=peor/rojo, 1=mejor/verde)
    gmap = pd.DataFrame(index=mat_num.index, columns=mat_num.columns, dtype=float)

    row_limits = row_limits or {}

    for r in mat_num.index:
        row = mat_num.loc[r]

        # 1) Normalización con límites manuales si existen
        if r in row_limits:
            vmin, vmax = row_limits[r]
            denom = (vmax - vmin)
            if denom == 0:
                g = pd.Series(np.nan, index=row.index, dtype=float)
            else:
                g = (row - vmin) / denom
                if clip:
                    g = g.clip(0.0, 1.0)

        # 2) Si no hay límites, normaliza por min/max entre modelos (como antes)
        else:
            row_min = row.min()
            row_max = row.max()
            denom = (row_max - row_min)
            if pd.isna(denom) or denom == 0:
                g = pd.Series(np.nan, index=row.index, dtype=float)
            else:
                g = (row - row_min) / denom  # ya queda en 0..1

        # 3) Invertir escala si menor es mejor (después de normalizar)
        if r in lower_is_better:
            g = 1.0 - g

        gmap.loc[r] = g

    # (opcional) no colorear algunas filas
    rows_to_color = mat_num.index.difference(list(exclude_rows))

    # Colormap rojo → blanco → verde
    cmap = LinearSegmentedColormap.from_list(
        "red_white_green", ["#d73027", "#ffffff", "#1a9850"]
    )

    return (
        mat_num.style
            .format(fmt)
            .background_gradient(
                axis=None,
                cmap=cmap,
                gmap=gmap,
                subset=pd.IndexSlice[rows_to_color, :]  # type: ignore
            )
    )

# Parameter declaration

In [147]:
conn = get_connection()

timeframe = "1Day"
symbols = TARGET_STOCKS

start = None
end = None

include_indicators = False
indicator_names = []
# indicator_names = ['RSI_14', 'BBB_20_2.0', 'BBP_20_2.0', 'ATRr_14']

include_economic_indicators = False
econ_indicator_names = []
# econ_indicator_names = ['CPI', 'UNEMPLOYMENT']

include_fmp = False
fmp_feature_names = []
keep_fmp_asof_date = False
fmp_prefix = 'fmp'

# -----------------------
# ELIGE LOOKBACK AQUÍ
# -----------------------
lookback = TARGET_LOOKBACKS[2]  # <-- cámbialo

# 3 horizontes baseline (puedes editar)
#horizons = [5, 20, 60]
horizon = TARGET_HORIZONS[2]

base_feature_cols = ['open', 'high', 'low', 'close', 'volume', 'trade_count']

lags_by_feature = None
default_lags = lookback


feature_cols = base_feature_cols + indicator_names + econ_indicator_names + fmp_feature_names


print(f"lb={lookback}, h={horizon}")



lb=120, h=20


# Dataset construction

In [148]:
df = build_ml_dataframe(conn,
                        symbols, timeframe,
                        econ_indicator_names=econ_indicator_names,
                        include_indicators=include_indicators,
                        indicator_names=indicator_names,
                        include_econ=include_economic_indicators,
                        include_fmp=include_fmp,
                        fmp_feature_names=fmp_feature_names,
                        fmp_prefix=fmp_prefix,
                        keep_fmp_asof_date=keep_fmp_asof_date,
)

X_df, y_ser, meta = build_supervised_dataset(
    df = df,
    feature_cols=feature_cols,
    lookback=lookback,
    horizon=horizon,
    price_col="close",
    group_col="symbol",
    timestamp_col="timestamp",
    lags_by_feature=lags_by_feature, 
    default_lags=None,
)
y = np.asarray(y_ser, dtype=np.float32).reshape(-1)

meta = meta.copy()
meta["timestamp"] = pd.to_datetime(meta["timestamp"])
meta["target_timestamp"] = pd.to_datetime(meta["target_timestamp"])

# Simple split

In [149]:
train_mask, val_mask, test_mask, train_end, val_end = time_split_masks(meta,
                                      train_frac=0.7,
                                      val_frac=0.15,)

X_train_df = X_df.loc[train_mask]
y_train = y[train_mask]
meta_train = meta.loc[train_mask]

X_val_df = X_df.loc[val_mask]
y_val = y[val_mask]
meta_val = meta.loc[val_mask]

X_test_df = X_df.loc[test_mask]
y_test = y[test_mask]
meta_test = meta.loc[test_mask]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train_df).astype(np.float32)
X_val = scaler.transform(X_val_df).astype(np.float32)
X_test = scaler.transform(X_test_df).astype(np.float32)

feature_names = list(X_df.columns) 
perm, n_channels, seq_len = infer_cnn_perm(feature_names, feature_cols=feature_cols, 
                                           lookback=lookback, time_order="oldest_to_newest")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(n_channels, seq_len)






6 120


## Trial definition

In [150]:
set_seed(3)
trials: List[CNN1DRegressor] = [


    CNN1DRegressor(input_dim=X_train.shape[1], n_channels=n_channels, seq_len=seq_len,
               perm=perm, conv_channels=(32, 64, 64), kernel_size=5, dilations=None,
               dropout=0.10, use_bn=True, head_hidden=64, out_dim=1,
    ),
    # B1 - baseline medio (más capacidad)
    CNN1DRegressor(input_dim=X_train.shape[1], n_channels=n_channels, seq_len=seq_len,
                perm=perm, conv_channels=(64, 128, 128), kernel_size=5, dilations=None,
                dropout=0.15, use_bn=True, head_hidden=128, out_dim=1,
    ),
    # B2 - baseline profundo sin dilation (si quieres más “jerarquía” local)
    CNN1DRegressor(input_dim=X_train.shape[1], n_channels=n_channels, seq_len=seq_len,
                perm=perm, conv_channels=(64, 128, 128, 128), kernel_size=5, dilations=None,
                dropout=0.20, use_bn=True, head_hidden=128, out_dim=1,
    ),

    # B3 - kernel más chico (a veces generaliza mejor) + más capas
    CNN1DRegressor(input_dim=X_train.shape[1], n_channels=n_channels, seq_len=seq_len,
                perm=perm, conv_channels=(64, 64, 128, 128), kernel_size=3, dilations=None,
                dropout=0.20, use_bn=True, head_hidden=128, out_dim=1,
    ), # 252 - 20 winner

    CNN1DRegressor(input_dim=X_train.shape[1], n_channels=n_channels, seq_len=seq_len,
                perm=perm, conv_channels=(32, 32, 64, 64), kernel_size=3, dilations=None,
                dropout=0.15, use_bn=True, head_hidden=64, out_dim=1,
    ),
    CNN1DRegressor(input_dim=X_train.shape[1], n_channels=n_channels, seq_len=seq_len,
                perm=perm, conv_channels=(32, 64, 64, 64, 128), kernel_size=5, dilations=(1, 2, 4, 8, 16),
                dropout=0.10, use_bn=True, head_hidden=64, out_dim=1,
    ),


   

]

## Train config

In [151]:
train_cfg = TrainConfig(
    lr=3e-4,
    weight_decay=3e-3,
    batch_size=256,
    max_epochs=80,
    patience=10,
    min_delta=1e-4,
    # monitor_key="DailyRankIC_mean",
    monitor_key="QuantileSpread_sharpe",
    # num_workers=
)



# train

In [None]:



train_loader = make_loader(X_train, y_train, batch_size=train_cfg.batch_size, shuffle=True, num_workers=train_cfg.num_workers, drop_last=True)
val_loader = make_loader(X_val, y_val, batch_size=train_cfg.batch_size, shuffle=False, num_workers=train_cfg.num_workers, )
test_loader = make_loader(X_test, y_test, batch_size=train_cfg.batch_size, shuffle=False, num_workers=train_cfg.num_workers)

set_seed(1)

trials_evaluations: Dict[str, Any] = {}
cnn_candidates_state = [{} for i in range(len(trials))]
cnn_candidates_history = [{} for i in range(len(trials))]

for i in range(len(trials)):
    trial = trials[i]
    name = 'trial' + str(i)
    print(f"TRIAL {i}/{len(trials)} -------------------------")

    out = train_cnn_one_run(model=trial, train_loader=train_loader,
                            val_loader=val_loader, meta_val=meta_val,
                            train_cfg=train_cfg, device=device)
    
    yv_true, yv_pred = predict_loader(trial, val_loader, device)
    metrics_val = eval_regression_extended(yv_true, yv_pred, meta=meta_val, deadzone=calculate_deadzone(horizon))

    yt_true, yt_pred = predict_loader(trial, test_loader, device)
    metrics_test = eval_regression_extended(yt_true, yt_pred, meta=meta_test, deadzone=calculate_deadzone(horizon))

    print("Val metrics", metrics_val)
    print("test metrics", metrics_val)

    trials_evaluations[name] = {
        'val': metrics_val,
        'test': metrics_test,
    }


    cnn_candidates_state[i] = {k: v.detach().cpu().clone() for k, v in trial.state_dict().items()}
    cnn_candidates_history[i] = out['history']


TRIAL 0/6 -------------------------
Epoch: 0/100  val:  {'MAE': 0.1088062971830368, 'MedianAE': 0.08332178741693497, 'RMSE': 0.15232556797001837, 'R2': -0.24774408340454102, 'HitRate(sign)': 0.5102223816355811, 'HitRate(sign,deadzone)': 0.5102223816355811, 'PearsonCorr(IC)': 0.10394364811472757, 'SpearmanCorr(RankIC)': 0.045164454557774375, 'AUC(Sign)': 0.5042709140652976, 'N': 22304, 'N_deadzone': 22304, 'DailyIC_mean': 0.09195662955880204, 'DailyIC_std': 0.12318624933182155, 'DailyIC_tstat': 12.311338276902777, 'DailyIC_frac_pos': 0.7058823529411765, 'DailyIC_N': 272, 'DailyRankIC_mean': 0.03048609166113283, 'DailyRankIC_std': 0.10294030429637019, 'DailyRankIC_tstat': 4.884282279532553, 'DailyRankIC_frac_pos': 0.5036764705882353, 'DailyRankIC_N': 272, 'QuantileSpread_mean': 0.0517484260010807, 'QuantileSpread_std': 0.06878899167894427, 'QuantileSpread_sharpe': 11.942038625328152, 'QuantileSpread_N': 272, 'Conformal_qhat(alpha=0.1)': nan, 'Conformal_coverage(alpha=0.1)': nan, 'Conform

# walk-forward

## Config

In [None]:
from machine_learning.data_collectors import make_test_mask, make_walk_forward_plan


VAL_DAYS   = 126          # 6m
STEP_DAYS  = 126          # 6m (reentreno)
TEST_DAYS  = 504          # 2y holdout
TRAIN_DAYS = 252 * 5      # rolling 5y (pruébalo vs expanding => TRAIN_DAYS=None)
EMBARGO_DAYS = 0          # opcional; con purge por target_timestamp, normalmente 0

target_col = f"target_timestamp_{horizon}" if f"target_timestamp_{horizon}" in meta.columns else "target_timestamp"

wf = make_walk_forward_plan(
    meta,
    val_days=VAL_DAYS,
    step_days=STEP_DAYS,
    test_days=TEST_DAYS,
    train_days=TRAIN_DAYS,
    embargo_days=EMBARGO_DAYS,
    min_train_days=252*3,
    timestamp_col="timestamp",
)

print(f"WF folds: {len(wf["folds"])} | test: {wf["test_start"].date()} ->{wf["test_end"].date()}")

trials_evaluations: dict[str, dict] = {}

cnn_candidates_state   = [{} for _ in range(len(trials))]
cnn_candidates_history = [{} for _ in range(len(trials))]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

feature_names = list(X_df.columns)
perm, n_channels, seq_len = infer_cnn_perm(feature_names, feature_cols=feature_cols, lookback=lookback)

test_mask = make_test_mask(
    meta,
    test_start=wf["test_start"],
    test_end=wf["test_end"],
    target_col=target_col,
)

X_test_df = X_df.loc[test_mask]
y_test = y[test_mask]
meta_test = meta.loc[test_mask].reset_index(drop=True)



## Actual walk-forward

In [None]:
import copy

from machine_learning.data_collectors import time_split_mask_by_time_purged
for i in range(len(trials)):

    model_template = trials[i]
    base_state = copy.deepcopy(model_template.state_dict())

    print(f"\ntrial: {i} ---------------------")

    fold_summaries = []
    fold_histories = {}

    last_fold_model = None
    last_fold_scaler = None

    monitor_key = train_cfg.monitor_key
    monitor_mode = getattr(train_cfg, "monitor_mode", "min")
    def is_better(a, b):
        if b is None:
            return True
        return (a < b) if monitor_mode == "min" else (a > b)
    
    best_fold_score = None
    best_fold_state = None

    for f in wf["folds"]:
        fold_id = f["fold"]

        train_start = f["train_start"]
        train_end   = f["train_end"]   # embargo cut
        val_start   = f["val_start"]   # purge cut
        val_end     = f["val_end"]

        print(f"fold: {fold_id} train[{train_start.date()} -> {train_end.date()}]  "
              f"val[{val_start.date()} -> {val_end.date()}]")
        
        model = copy.deepcopy(model_template)
        model.load_state_dict(base_state)
        model.to(device)

        set_seed(10_000 + i * 00 + fold_id)

        train_mask, val_mask = time_split_mask_by_time_purged(
            meta,
            train_start=train_start,
            train_end=train_end,
            val_start=val_start,
            val_end=val_end,
            timestamp_col="timestamp",
            target_col=target_col,
        )
        
        X_train_df = X_df.loc[train_mask]
        y_train = y[train_mask]

        X_val_df = X_df.loc[val_mask]
        y_val = y[val_mask]

        meta_val = meta.loc[val_mask].reset_index(drop=True)
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train_df).astype(np.float32)
        X_val = scaler.transform(X_val_df).astype(np.float32)

        train_loader = make_loader(
            X_train, y_train,
            batch_size=train_cfg.batch_size,
            shuffle=True,
            num_workers=train_cfg.num_workers,
            drop_last=True
        )
        val_loader = make_loader(
            X_val, y_val,
            batch_size=train_cfg.batch_size,
            shuffle=False,
            num_workers=train_cfg.num_workers,
            drop_last=False
        )

        out = train_cnn_one_run(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            meta_val=meta_val,
            train_cfg=train_cfg,
            device=device,
        )

        # Eval val con tus métricas
        yv_true, yv_pred = predict_loader(model, val_loader, device)
        metrics_val = eval_regression_extended(
            yv_true, yv_pred,
            meta=meta_val,
            deadzone=calculate_deadzone(horizon)
        )

        print("Val metrics", {k: metrics_val.get(k) for k in [
            monitor_key, "DailyRankIC_frac_pos", "QuantileSpread_sharpe"
        ]})

        fold_histories[str(fold_id)] = out.get("history", None)

        val_monitor = float(metrics_val[monitor_key])

        fold_summaries.append({
            "fold": fold_id,
            "train_start": str(train_start.date()),
            "train_end": str(train_end.date()),
            "val_start": str(val_start.date()),
            "val_end": str(val_end.date()),
            "monitor": monitor_key,
            "val_monitor": val_monitor,
            "val_frac_pos": float(metrics_val.get("DailyRankIC_frac_pos", np.nan)),
            "val_spread_sharpe": float(metrics_val.get("QuantileSpread_sharpe", np.nan)),
            "best_epoch": int(out.get("best_epoch", -1)),
            "n_train": int(train_mask.sum()),
            "n_val": int(val_mask.sum()),
        })

        trials_evaluations[f"trial{i}_f{fold_id}"] = {
            "fold": fold_id,
            "train_start": str(train_start.date()),
            "train_end": str(train_end.date()),
            "val_start": str(val_start.date()),
            "val_end": str(val_end.date()),
            "val": metrics_val,
            "best_epoch": int(out.get("best_epoch", -1)),
        }

        # Save best fold by monitor
        
        if is_better(val_monitor, best_fold_score):
            best_fold_score = val_monitor
            best_fold_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}

        # Guardar el último fold (más reciente) para test
        last_fold_model = model
        last_fold_scaler = scaler

    # Resumen CV
    fold_df = pd.DataFrame(fold_summaries)
    print(fold_df)
    print("Mean monitor:", fold_df["val_monitor"].mean(), "Std:", fold_df["val_monitor"].std())

    X_test = last_fold_scaler.transform(X_test_df).astype(np.float32)
    test_loader = make_loader(
        X_test, y_test,
        batch_size=train_cfg.batch_size,
        shuffle=False,
        num_workers=train_cfg.num_workers,
        drop_last=False
    )

    yt_true, yt_pred = predict_loader(last_fold_model, test_loader, device)
    metrics_test = eval_regression_extended(
        yt_true, yt_pred,
        deadzone=calculate_deadzone(horizon),
        meta=meta_test
    )

    # Guardado similar a lo tuyo
    trials_evaluations[f"trial{i}"] = {
        "cv": {
            "monitor_key": monitor_key,
            "mean": float(fold_df["val_monitor"].mean()),
            "std": float(fold_df["val_monitor"].std()),
            "folds": fold_summaries,
        },
        "test": metrics_test,
    }

    # state/history similares (pero ahora “bien”)
    # - state: te dejo el del último fold (más reciente) como candidato “deploy”
    cnn_candidates_state[i] = {k: v.detach().cpu().clone() for k, v in last_fold_model.state_dict().items()}

    # - history: guardo por fold (más útil que solo el último)
    cnn_candidates_history[i] = {
        "fold_histories": fold_histories,
        "fold_summaries": fold_summaries,
        "best_fold_monitor": best_fold_score,
        "best_fold_state": best_fold_state,  # opcional: quítalo si pesa demasiado
    }

    print("Test metrics", {k: metrics_test.get(k) for k in [
        monitor_key, "DailyRankIC_frac_pos", "QuantileSpread_sharpe"
    ]})




ModuleNotFoundError: No module named 'machine_learning'

In [None]:
from machine_learning.data_collectors import make_walk_forward_boundaries, time_split_mask_by_time
from machine_learning.evaluators import calculate_deadzone

wf = make_walk_forward_boundaries(meta, n_folds = 4, val_days=350, test_days=350)

trials_evaluations: Dict[str, Any] = {}
cnn_candidates_state = [{} for i in range(len(trials))]
cnn_candidates_history = [{} for i in range(len(trials))]

for i in range(len(trials)):

    model = trials[i]
    print(f"trial: {i} ---------------------" )

    fold_summaries = []

    for f in wf["folds"]:
        fold_id = f["fold"]
        train_end = f["train_end"]
        val_end = f["val_end"]
        print(f"fold: {fold_id} {train_end} | {val_end}")


        train_mask, val_mask, test_mask = time_split_mask_by_time(meta, train_end = train_end, val_end=val_end)

        X_train_df = X_df.loc[train_mask]
        y_train = y[train_mask]

        X_val_df = X_df.loc[val_mask]
        y_val = y[val_mask]

        meta_val = meta.loc[val_mask].reset_index(drop = True)

        # Scaler by fold (only in train)
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train_df).astype(np.float32)
        X_val = scaler.transform(X_val_df).astype(np.float32)

        # perm is the same as long as feature_names and feature_cols are the same
        feature_names = list(X_df.columns)
        perm, n_channels, seq_len = infer_cnn_perm(feature_names, feature_cols=feature_cols, lookback=lookback)


        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        set_seed(0 + fold_id)

        train_loader = make_loader(X_train, y_train, batch_size=train_cfg.batch_size, shuffle=True, num_workers=train_cfg.num_workers, drop_last=True)
        val_loader = make_loader(X_val, y_val, batch_size=train_cfg.batch_size, shuffle=False, num_workers=train_cfg.num_workers)

        out = train_cnn_one_run(
            model = model,
            train_loader=train_loader,
            val_loader=val_loader,
            meta_val=meta_val,
            train_cfg=train_cfg,
            device=device,
        )

        yv_true, yv_pred = predict_loader(model, val_loader, device)
        metrics_val = eval_regression_extended(yv_true, yv_pred, meta=meta_val, deadzone=calculate_deadzone(horizon))

        print("Val metrics", metrics_val)

        fold_summaries.append({
            "fold": fold_id,
            "train_end": str(train_end.date()),
            "val_end": str(val_end.date()),
            "monitor": train_cfg.monitor_key,
            "val_monitor": float(metrics_val[train_cfg.monitor_key]),
            "val_frac_pos": float(metrics_val.get("DailyRankIC_frac_pos", np.nan)),
            "val_spread_sharpe": float(metrics_val.get("QuantileSpread_sharpe", np.nan)),
            "best_epoch": int(out["best_epoch"]),
        })

        trials_evaluations[f"trial{i}_f{fold_id}"] = {
            'val': metrics_val
        }

    _, _, test_mask = time_split_mask_by_time(meta, pd.to_datetime('2024-01-01'), pd.to_datetime('2024-10-18'))

    X_test = X_df.loc[test_mask]
    y_test = y[test_mask]
    meta_test = meta[test_mask]
    test_loader = make_loader(scaler.transform(X_test).astype(np.float32), y_test, batch_size=train_cfg.batch_size, shuffle=False, num_workers=train_cfg.num_workers, drop_last=False)

    yt_true, yt_pred = predict_loader(model, test_loader, device)
    metrics_test = eval_regression_extended(yt_true, yt_pred, deadzone=calculate_deadzone(horizon), meta=meta_test)

    trials_evaluations[f"trial{i}"]['test'] = metrics_test

    cnn_candidates_state[i] = {k : v.detach().cpu().clone() for k, v in model.state_dict().items()}
    cnn_candidates_history[i] = out['history']
    
    fold_df = pd.DataFrame(fold_summaries)
    print(fold_df)
    print("Mean monitor:", fold_df["val_monitor"].mean(), "Std:", fold_df["val_monitor"].std())

        





trial: 0 ---------------------
fold: 1 2019-10-18 04:00:00+00:00 | 2020-12-28 05:00:00+00:00
Epoch: 0/80  val:  {'MAE': 0.07790208607912064, 'MedianAE': 0.0485200434923172, 'RMSE': 0.13397896647311067, 'R2': -0.013612508773803711, 'HitRate(sign)': 0.5832921810699588, 'HitRate(sign,deadzone)': 0.590797601744186, 'PearsonCorr(IC)': 0.027753978167332532, 'SpearmanCorr(RankIC)': 0.04448301035266264, 'AUC(Sign)': 0.512146191637522, 'N': 24300, 'N_deadzone': 22016, 'DailyIC_mean': 0.01919526940381557, 'DailyIC_std': 0.17126475555368573, 'DailyIC_tstat': 1.9412740096405328, 'DailyIC_frac_pos': 0.5733333333333334, 'DailyIC_N': 300, 'DailyRankIC_mean': 0.012182738960843178, 'DailyRankIC_std': 0.19509902238227342, 'DailyRankIC_tstat': 1.081559640733827, 'DailyRankIC_frac_pos': 0.4866666666666667, 'DailyRankIC_N': 300, 'QuantileSpread_mean': 0.00821933918632567, 'QuantileSpread_std': 0.0639364574084561, 'QuantileSpread_sharpe': 2.040744355544752, 'QuantileSpread_N': 300, 'Conformal_qhat(alpha=0.1

KeyboardInterrupt: 

# compare models

In [16]:
comparing_by = 'val'

metrics_by_model = {}
for candidate, d in trials_evaluations.items():
    metrics_by_model[candidate] = d[comparing_by]

metrics_mat = metrics_matrix(metrics_by_model=metrics_by_model)

row_limits = {
    "DailyRankIC_mean": (-0.02, 0.05),
    "DailyRankIC_frac_pos": (0.40, 0.60),
    "QuantileSpread_sharpe": (-2, 2.0),
    "DailyIC_mean": (-0.01, 0.02),
    "HitRate(sign,deadzone)": (0.4, 0.6),
    "AUC(Sign)": (0.4, 0.6),
}

styled = style_metrics_by_row(
    metrics_mat,
    exclude_rows=("N", "DailyIC_N", "N_deadzone", "DailyIC_tstat", "QuantileSpread_N",
                  "Conformal_avg_width(alpha=0.1)", "Conformal_avg_width(alpha=0.05)"),
    lower_is_better=("QuantileSpread_std", "DailyRankIC_std", "MAE", "MedianAE", "RMSE", "DailyIC_std"),
    row_limits=row_limits,
    clip=True,
)

display(styled)

Unnamed: 0,"trial0_f{'fold': 1, 'train_end': Timestamp('2021-10-15 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC')}","trial0_f{'fold': 2, 'train_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC')}","trial0_f{'fold': 3, 'train_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2024-10-18 04:00:00+0000', tz='UTC')}","trial1_f{'fold': 1, 'train_end': Timestamp('2021-10-15 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC')}","trial1_f{'fold': 2, 'train_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC')}","trial1_f{'fold': 3, 'train_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2024-10-18 04:00:00+0000', tz='UTC')}","trial2_f{'fold': 1, 'train_end': Timestamp('2021-10-15 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC')}","trial2_f{'fold': 2, 'train_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC')}","trial2_f{'fold': 3, 'train_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2024-10-18 04:00:00+0000', tz='UTC')}","trial3_f{'fold': 1, 'train_end': Timestamp('2021-10-15 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC')}","trial3_f{'fold': 2, 'train_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC')}","trial3_f{'fold': 3, 'train_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2024-10-18 04:00:00+0000', tz='UTC')}","trial4_f{'fold': 1, 'train_end': Timestamp('2021-10-15 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC')}","trial4_f{'fold': 2, 'train_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC')}","trial4_f{'fold': 3, 'train_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2024-10-18 04:00:00+0000', tz='UTC')}","trial5_f{'fold': 1, 'train_end': Timestamp('2021-10-15 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC')}","trial5_f{'fold': 2, 'train_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC')}","trial5_f{'fold': 3, 'train_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2024-10-18 04:00:00+0000', tz='UTC')}","trial6_f{'fold': 1, 'train_end': Timestamp('2021-10-15 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC')}","trial6_f{'fold': 2, 'train_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC')}","trial6_f{'fold': 3, 'train_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2024-10-18 04:00:00+0000', tz='UTC')}","trial7_f{'fold': 1, 'train_end': Timestamp('2021-10-15 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC')}","trial7_f{'fold': 2, 'train_end': Timestamp('2022-10-17 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC')}","trial7_f{'fold': 3, 'train_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2024-10-18 04:00:00+0000', tz='UTC')}"
N,20540.0,20664.0,20664.0,20540.0,20664.0,20664.0,20540.0,20664.0,20664.0,20540.0,20664.0,20664.0,20540.0,20664.0,20664.0,20540.0,20664.0,20664.0,20540.0,20664.0,20664.0,20540.0,20664.0,20664.0
DailyRankIC_mean,0.0165,0.0586,0.1066,0.0127,0.0016,0.1168,-0.04,0.0657,0.1662,-0.0204,-0.017,0.1826,0.0027,0.0618,0.1254,0.083,0.0354,0.0436,0.0553,0.0585,0.1171,-0.0631,0.0852,0.169
DailyRankIC_frac_pos,0.5794,0.6706,0.8175,0.5119,0.496,0.8571,0.3968,0.7103,0.9405,0.4246,0.4484,0.9444,0.5159,0.6746,0.8929,0.7579,0.6032,0.6865,0.6627,0.7302,0.8413,0.3056,0.7619,0.9444
QuantileSpread_sharpe,9.8046,13.2108,11.9835,8.2297,13.9931,12.6511,-1.9159,13.1722,12.8093,7.0437,8.4174,5.2133,1.9377,11.9327,4.8114,12.5855,13.085,6.9909,8.8124,13.8146,5.7601,3.356,12.0515,12.5494
DailyIC_mean,0.031,0.0755,0.0686,0.1622,0.0455,0.0499,-0.0384,0.1388,0.1586,-0.0479,0.0854,0.0613,-0.0067,0.0969,0.0331,0.2508,0.0517,0.0174,0.0457,0.0926,0.0528,0.0064,0.0982,0.1426
"HitRate(sign,deadzone)",0.3825,0.5224,0.4602,0.4008,0.4722,0.4853,0.4015,0.5132,0.5281,0.4155,0.4942,0.4606,0.4192,0.514,0.5039,0.421,0.4976,0.524,0.378,0.5079,0.4895,0.3784,0.5332,0.4878
AUC(Sign),0.5284,0.5164,0.5376,0.547,0.4808,0.5366,0.5086,0.5177,0.5231,0.5127,0.4698,0.5764,0.5197,0.5121,0.5247,0.5807,0.5054,0.5242,0.5581,0.5166,0.5108,0.4986,0.5349,0.598
QuantileSpread_std,0.1207,0.0763,0.0832,0.2778,0.0729,0.0733,0.1011,0.0831,0.206,0.2783,0.076,0.1949,0.1704,0.0792,0.1578,0.2957,0.0728,0.0832,0.2938,0.0725,0.2033,0.2083,0.0639,0.2134
DailyRankIC_std,0.1122,0.126,0.1175,0.117,0.1233,0.113,0.1229,0.1283,0.1129,0.1259,0.1233,0.1117,0.1067,0.1242,0.1036,0.1249,0.1253,0.1037,0.1178,0.1142,0.1135,0.1177,0.1056,0.1133
MAE,0.1509,0.0937,0.1378,0.1511,0.0983,0.1374,0.157,0.0913,0.1383,0.1673,0.0952,0.1422,0.1488,0.0945,0.133,0.1447,0.0946,0.1339,0.1571,0.1038,0.1422,0.1513,0.1198,0.153


# Saving

In [19]:
chosen = 2

cnn_config = {
    "input_dim": trials[chosen].input_dim,
    "n_channels": trials[chosen].n_channels,
    "seq_len": trials[chosen].seq_len,
    "perm": perm,  # lista de ints
    "conv_channels": trials[chosen].conv_channels,
    "kernel_size": trials[chosen].kernel_size,
    "dilations": trials[chosen].dilations,
    "dropout": trials[chosen].dropout,
    "use_bn": trials[chosen].use_bn,
    "head_hidden": trials[chosen].head_hidden,
    "out_dim": trials[chosen].out_dim,
    "time_order": "oldest_to_newest",
}

config = {
    "model": "cnn1d_regressor",
    "timeframe": timeframe,
    "symbols": list(symbols),
    "lookback": lookback,
    "horizon": horizon,
    "base_feature_cols": base_feature_cols,
    "include_indicators": include_indicators,
    "econ_indicator_names": econ_indicator_names,
    "include_fmp": include_fmp,
    "fmp_feature_names": fmp_feature_names,
    "fmp_prefix": fmp_prefix,
    "lags_by_feature": None,
    "default_lags": None,
    "cnn_config": cnn_config,
    "best_epoch": int(out["best_epoch"]),
    "best_val_score": float(out["best_score"]),
    "train_cfg": asdict(train_cfg),
    "train_end": train_end,
    "val_end": val_end,
}

metrics = {
    "val": metrics_by_model.get("trial" + str(chosen), "trial2_f{'fold': 3, 'train_end': Timestamp('2023-10-18 04:00:00+0000', tz='UTC'), 'val_end': Timestamp('2024-10-18 04:00:00+0000', tz='UTC')}")
    # "test": metrics_by_model["trial" + str(chosen)],
}


In [20]:
def save_cnn_artifact(run_dir: str, model: nn.Module, scaler: StandardScaler, config: dict,
                    metrics: dict, feature_names: list[str], history: dict) -> None:
    
    p = Path(run_dir)
    p.mkdir(parents=True, exist_ok=True)

    model_payload = {
        "state_dict": model.state_dict(),
        "input_dim": int(len(feature_names)),
        "config": config.get("cnn_config", {}),
    }
    
    torch.save(model_payload, p / "model.pt")

    # StandardScaler
    joblib.dump(scaler, p / "scaler.joblib")

    with open(p / "config.json", "w") as f:
        json.dump(config, f, indent=2, default=str)
    
    with open(p / "metrics.json", "w") as f:
        json.dump(metrics, f, indent=2, default=float)

    with open(p / "feature_names.json", "w") as f:
        json.dump(feature_names, f, indent=2)

    with open(p / "history", "w") as f:
        json.dump(history, f, indent=2)

    # Predictions (not this time)

In [None]:
ind_flag = int(include_indicators)
econ_flag = int(include_economic_indicators and econ_indicator_names is not None and len(econ_indicator_names) > 0)
fmp_flag = int(include_fmp)

conv_chan = "-".join(map(str, trials[chosen].conv_channels))

run_dir = (
    f"runs/cnn1d_{timeframe}_lb{lookback}_h{horizon}"
    f"_indicators{ind_flag}_econ{econ_flag}_fmp{fmp_flag}"
    f"_trial{chosen}_convchan{conv_chan}"
)
# reconstruct model
# model_to_save = CNN1DRegressor(
#     input_dim=trials[chosen].input_dim,
#     n_channels=trials[chosen].n_channels,
#     seq_len=trials[chosen].seq_len,
#     perm=perm,
#     conv_channels=trials[chosen].conv_channels,
#     kernel_size=trials[chosen].kernel_size,
#     dilations=trials[chosen].dilations,
#     dropout=trials[chosen].dropout,
#     use_bn=trials[chosen].use_bn,
#     head_hidden=trials[chosen].head_hidden,
#     out_dim=trials[chosen].out_dim
# ).cpu()

model_to_save = trials[chosen].load_state_dict(cnn_candidates_state[chosen])

model_to_save = trials[chosen]

save_cnn_artifact(
    run_dir=run_dir,
    scaler=scaler,
    model=model_to_save,
    config=config,
    metrics=trials_evaluations['trial' + str(chosen)],
    feature_names=feature_names,
    history=cnn_candidates_history[chosen],
)
    

