In [1]:
import sys
import os
PROJECT_ROOT = os.path.abspath('../..')
sys.path.append(PROJECT_ROOT)

# AJUSTA ESTOS IMPORTS a tu proyecto real:
from machine_learning.data_collectors import build_ml_dataframe, build_supervised_dataset
from machine_learning.evaluators import eval_regression_extended

from machine_learning.tcn.train_tcn import train_eval_tcn, TrainTCNConfig
from machine_learning.artifacts import load_model_artifact_auto

from machine_learning.data_collectors import (
    build_ml_dataframe,
    build_supervised_dataset,
    time_split_masks,
    purged_ts_cv_splits,
    TARGET_HORIZONS,
    TARGET_LOOKBACKS,
    parse_feat_lag
)
from database_tier1 import TARGET_STOCKS
from python_scripts.LLM_analysis.preprocess_store_database import get_connection

import pandas as pd

from train_walk_forward_tcn import run_walk_forward_tcn, ExperimentConfig, TrainHP
from walk_forward import WalkForwardConfig


[DB_PATH_DEFAULT] /workspace/finance-modelling/data/stock_data.db
{'foreign_keys': 1, 'journal_mode': 'wal', 'synchronous': 1}


In [2]:
# 1) Cargar datos (long: symbol, timestamp, OHLCV, indicadores, etc.)
conn = get_connection()

timeframe = "1Day"
symbols = TARGET_STOCKS

start = None
end = None

include_indicators = False
indicator_names = []
# indicator_names = ['RSI_14', 'BBB_20_2.0', 'BBP_20_2.0', 'ATRr_14']

include_economic_indicators = False
econ_indicator_names = []
# econ_indicator_names = ['CPI', 'UNEMPLOYMENT']

include_fmp = False
fmp_feature_names = []
keep_fmp_asof_date = False
fmp_prefix = 'fmp'

# -----------------------
# ELIGE LOOKBACK AQUÍ
# -----------------------
lookback = TARGET_LOOKBACKS[3]  # <-- cámbialo

# 3 horizontes baseline (puedes editar)
#horizons = [5, 20, 60]
horizon = TARGET_HORIZONS[2]

base_feature_cols = ['open', 'high', 'low', 'close', 'volume', 'trade_count']

lags_by_feature = None
default_lags = lookback


feature_cols = base_feature_cols + indicator_names + econ_indicator_names + fmp_feature_names


print(f"lb={lookback}, h={horizon}")





lb=252, h=20


In [3]:

df = build_ml_dataframe(
    conn,
    symbols=symbols,
    timeframe="1Day",
    start="2015-01-01",
    end="2025-12-31",
    include_indicators=True,
    include_econ=True,
    include_fmp=False,
)

# 2) feature_cols (excluir no-features)
non_feature_cols = {"symbol", "timestamp", "timeframe"}
feature_cols = [c for c in df.columns if c not in non_feature_cols]

# 3) Sanity check explícito: build_supervised_dataset clásico (horizon=5, lags_by_feature=None)
X_wide_5, y_5, meta_5 = build_supervised_dataset(
    df,
    feature_cols=feature_cols,
    lookback=60,
    horizon=5,
    price_col="close",
    group_col="symbol",
    timestamp_col="timestamp",
    lags_by_feature=None,   # explícito, como pediste
)
print("Sanity check horizon=5:", X_wide_5.shape, y_5.shape, meta_5.columns)
assert "target_timestamp" in meta_5.columns, "meta debe incluir target_timestamp"



Sanity check horizon=5: (198102, 420) (198102,) Index(['symbol', 'timestamp', 'target_timestamp'], dtype='str')


In [None]:

# 4) Walk-forward + TCN
cfg = ExperimentConfig(
    lookback=TARGET_LOOKBACKS[3],
    horizons=(5, 20, 60),
    wf=WalkForwardConfig(
        target_col="target_timestamp",
        train_span=252*5,   # rolling 5 años (en target timestamps)
        val_span=256,       # 6m
        test_span=256,      # 6m
        step_span=256,      # reentreno semestral
        embargo_span=0,
        min_train_span=252*3,
    ),
    seed=0,
    run_base_dir="../runs",
    run_name=None,
    device="cuda",
    train_hp=TrainHP(num_workers=8, pin_memory=True),
    timeframe="1Day"
)

# (Opcional) selección simple de hiperparámetros (ejemplo: probar ic_lambda)
hp_candidates = [
    {"loss_hp": {"ic_lambda": 0.0}},   # solo SmoothL1
    {"loss_hp": {"ic_lambda": 0.2}},   # SmoothL1 + IC loss
]

out = run_walk_forward_tcn(
    df,
    feature_cols=feature_cols,
    build_supervised_dataset_fn=build_supervised_dataset,
    eval_fn=eval_regression_extended,
    cfg=cfg,
    hp_candidates=hp_candidates,
)

print("run_dir:", out["run_dir"])
print("final verify diffs:", out["final"]["verify"])
print("tabla folds:\n", out["agg_table"])


=== Hyperparam selection (val mean DailyIC promedio en folds) ===


  WeightNorm.apply(module, name, dim)


[fold 01][epoch 001] train_loss=0.008708 | val_mean_dailyIC=0.012250 | best=0.012250@1 | no_improve=0time_train=9.2s time_valpred=1.1s time_score=0.2s total=10.5s
[fold 01][epoch 002] train_loss=0.006032 | val_mean_dailyIC=0.024320 | best=0.024320@2 | no_improve=0time_train=7.1s time_valpred=1.0s time_score=0.1s total=8.3s
[fold 01][epoch 003] train_loss=0.005894 | val_mean_dailyIC=-0.036021 | best=0.024320@2 | no_improve=1time_train=7.2s time_valpred=1.1s time_score=0.1s total=8.4s
[fold 01][epoch 004] train_loss=0.005881 | val_mean_dailyIC=0.022169 | best=0.024320@2 | no_improve=2time_train=7.0s time_valpred=1.0s time_score=0.1s total=8.1s
[fold 01][epoch 005] train_loss=0.005796 | val_mean_dailyIC=0.036694 | best=0.036694@5 | no_improve=0time_train=7.2s time_valpred=1.0s time_score=0.1s total=8.3s
[fold 01][epoch 006] train_loss=0.005758 | val_mean_dailyIC=-0.007090 | best=0.036694@5 | no_improve=1time_train=7.1s time_valpred=1.0s time_score=0.1s total=8.2s
[fold 01][epoch 007] trai

  WeightNorm.apply(module, name, dim)


[fold 02][epoch 001] train_loss=0.009544 | val_mean_dailyIC=0.015425 | best=0.015425@1 | no_improve=0time_train=7.0s time_valpred=1.1s time_score=0.1s total=8.2s
[fold 02][epoch 002] train_loss=0.007667 | val_mean_dailyIC=0.013969 | best=0.015425@1 | no_improve=1time_train=7.3s time_valpred=1.0s time_score=0.1s total=8.5s
[fold 02][epoch 003] train_loss=0.007668 | val_mean_dailyIC=0.034580 | best=0.034580@3 | no_improve=0time_train=7.4s time_valpred=1.0s time_score=0.1s total=8.6s
[fold 02][epoch 004] train_loss=0.007545 | val_mean_dailyIC=0.009880 | best=0.034580@3 | no_improve=1time_train=7.4s time_valpred=1.0s time_score=0.1s total=8.6s
[fold 02][epoch 005] train_loss=0.007463 | val_mean_dailyIC=0.010865 | best=0.034580@3 | no_improve=2time_train=7.1s time_valpred=1.0s time_score=0.1s total=8.2s
[fold 02][epoch 006] train_loss=0.007406 | val_mean_dailyIC=0.043394 | best=0.043394@6 | no_improve=0time_train=6.9s time_valpred=1.0s time_score=0.1s total=8.1s
[fold 02][epoch 007] train_l

  WeightNorm.apply(module, name, dim)


[fold 03][epoch 001] train_loss=0.010699 | val_mean_dailyIC=0.010574 | best=0.010574@1 | no_improve=0time_train=7.4s time_valpred=1.0s time_score=0.1s total=8.5s
[fold 03][epoch 002] train_loss=0.008217 | val_mean_dailyIC=-0.033856 | best=0.010574@1 | no_improve=1time_train=7.1s time_valpred=1.0s time_score=0.1s total=8.2s
[fold 03][epoch 003] train_loss=0.008264 | val_mean_dailyIC=0.049062 | best=0.049062@3 | no_improve=0time_train=7.0s time_valpred=1.1s time_score=0.1s total=8.2s
[fold 03][epoch 004] train_loss=0.008127 | val_mean_dailyIC=-0.015066 | best=0.049062@3 | no_improve=1time_train=8.2s time_valpred=1.4s time_score=0.1s total=9.8s
[fold 03][epoch 005] train_loss=0.008064 | val_mean_dailyIC=0.008739 | best=0.049062@3 | no_improve=2time_train=7.2s time_valpred=1.1s time_score=0.1s total=8.4s
[fold 03][epoch 006] train_loss=0.008078 | val_mean_dailyIC=-0.010827 | best=0.049062@3 | no_improve=3time_train=7.9s time_valpred=1.4s time_score=0.1s total=9.4s
[fold 03][epoch 007] trai

  WeightNorm.apply(module, name, dim)


[fold 04][epoch 001] train_loss=0.010095 | val_mean_dailyIC=-0.019329 | best=-0.019329@1 | no_improve=0time_train=8.7s time_valpred=1.1s time_score=0.1s total=9.9s
[fold 04][epoch 002] train_loss=0.008413 | val_mean_dailyIC=-0.013897 | best=-0.013897@2 | no_improve=0time_train=7.0s time_valpred=1.0s time_score=0.2s total=8.3s
[fold 04][epoch 003] train_loss=0.008340 | val_mean_dailyIC=-0.040294 | best=-0.013897@2 | no_improve=1time_train=7.4s time_valpred=1.0s time_score=0.2s total=8.6s
[fold 04][epoch 004] train_loss=0.008300 | val_mean_dailyIC=-0.073138 | best=-0.013897@2 | no_improve=2time_train=7.3s time_valpred=1.1s time_score=0.1s total=8.6s
[fold 04][epoch 005] train_loss=0.008190 | val_mean_dailyIC=-0.016034 | best=-0.013897@2 | no_improve=3time_train=8.3s time_valpred=1.1s time_score=0.1s total=9.6s
[fold 04][epoch 006] train_loss=0.008128 | val_mean_dailyIC=-0.023099 | best=-0.013897@2 | no_improve=4time_train=7.8s time_valpred=1.2s time_score=0.1s total=9.2s
[fold 04][epoch 

  WeightNorm.apply(module, name, dim)


[fold 05][epoch 001] train_loss=0.009608 | val_mean_dailyIC=0.025677 | best=0.025677@1 | no_improve=0time_train=7.3s time_valpred=1.0s time_score=0.1s total=8.5s
[fold 05][epoch 002] train_loss=0.008287 | val_mean_dailyIC=-0.009342 | best=0.025677@1 | no_improve=1time_train=7.6s time_valpred=1.4s time_score=0.1s total=9.1s
[fold 05][epoch 003] train_loss=0.008224 | val_mean_dailyIC=-0.000261 | best=0.025677@1 | no_improve=2time_train=7.7s time_valpred=1.4s time_score=0.1s total=9.2s
[fold 05][epoch 004] train_loss=0.008178 | val_mean_dailyIC=-0.010039 | best=0.025677@1 | no_improve=3time_train=9.3s time_valpred=1.1s time_score=0.1s total=10.5s
[fold 05][epoch 005] train_loss=0.008073 | val_mean_dailyIC=-0.003123 | best=0.025677@1 | no_improve=4time_train=7.5s time_valpred=1.3s time_score=0.1s total=9.0s
[fold 05][epoch 006] train_loss=0.008049 | val_mean_dailyIC=0.013374 | best=0.025677@1 | no_improve=5time_train=8.2s time_valpred=1.1s time_score=0.1s total=9.4s
[fold 05][epoch 007] tr

  WeightNorm.apply(module, name, dim)


[fold 06][epoch 001] train_loss=0.011101 | val_mean_dailyIC=0.016594 | best=0.016594@1 | no_improve=0time_train=7.6s time_valpred=1.2s time_score=0.1s total=8.9s
[fold 06][epoch 002] train_loss=0.009688 | val_mean_dailyIC=0.025665 | best=0.025665@2 | no_improve=0time_train=7.5s time_valpred=1.1s time_score=0.2s total=8.7s
[fold 06][epoch 003] train_loss=0.009605 | val_mean_dailyIC=0.036169 | best=0.036169@3 | no_improve=0time_train=7.4s time_valpred=1.2s time_score=0.1s total=8.7s
[fold 06][epoch 004] train_loss=0.009499 | val_mean_dailyIC=0.031907 | best=0.036169@3 | no_improve=1time_train=7.6s time_valpred=1.1s time_score=0.2s total=8.8s
[fold 06][epoch 005] train_loss=0.009415 | val_mean_dailyIC=0.038388 | best=0.038388@5 | no_improve=0time_train=7.3s time_valpred=1.2s time_score=0.1s total=8.6s
[fold 06][epoch 006] train_loss=0.009440 | val_mean_dailyIC=0.040308 | best=0.040308@6 | no_improve=0time_train=7.3s time_valpred=1.3s time_score=0.1s total=8.7s
[fold 06][epoch 007] train_l

  WeightNorm.apply(module, name, dim)


[fold 01][epoch 001] train_loss=0.003428 | val_mean_dailyIC=-0.022537 | best=-0.022537@1 | no_improve=0time_train=7.6s time_valpred=1.1s time_score=0.1s total=8.8s
[fold 01][epoch 002] train_loss=-0.005950 | val_mean_dailyIC=0.019916 | best=0.019916@2 | no_improve=0time_train=7.2s time_valpred=1.1s time_score=0.1s total=8.5s
[fold 01][epoch 003] train_loss=-0.008295 | val_mean_dailyIC=-0.010937 | best=0.019916@2 | no_improve=1time_train=6.9s time_valpred=1.1s time_score=0.1s total=8.1s
[fold 01][epoch 004] train_loss=-0.011645 | val_mean_dailyIC=-0.043434 | best=0.019916@2 | no_improve=2time_train=7.0s time_valpred=1.1s time_score=0.1s total=8.2s
[fold 01][epoch 005] train_loss=-0.014749 | val_mean_dailyIC=0.009369 | best=0.019916@2 | no_improve=3time_train=7.2s time_valpred=1.1s time_score=0.1s total=8.4s
[fold 01][epoch 006] train_loss=-0.017371 | val_mean_dailyIC=0.002895 | best=0.019916@2 | no_improve=4time_train=7.4s time_valpred=1.1s time_score=0.1s total=8.6s
[fold 01][epoch 007

  WeightNorm.apply(module, name, dim)


[fold 02][epoch 001] train_loss=0.008276 | val_mean_dailyIC=0.020015 | best=0.020015@1 | no_improve=0time_train=8.1s time_valpred=1.1s time_score=0.2s total=9.3s
[fold 02][epoch 002] train_loss=-0.001719 | val_mean_dailyIC=0.038188 | best=0.038188@2 | no_improve=0time_train=6.9s time_valpred=1.1s time_score=0.1s total=8.1s
[fold 02][epoch 003] train_loss=-0.005315 | val_mean_dailyIC=0.012581 | best=0.038188@2 | no_improve=1time_train=7.1s time_valpred=1.1s time_score=0.1s total=8.3s
[fold 02][epoch 004] train_loss=-0.008177 | val_mean_dailyIC=-0.007007 | best=0.038188@2 | no_improve=2time_train=7.2s time_valpred=1.0s time_score=0.2s total=8.3s
[fold 02][epoch 005] train_loss=-0.011252 | val_mean_dailyIC=0.027904 | best=0.038188@2 | no_improve=3time_train=7.0s time_valpred=1.0s time_score=0.1s total=8.2s
[fold 02][epoch 006] train_loss=-0.017142 | val_mean_dailyIC=0.052474 | best=0.052474@6 | no_improve=0time_train=7.4s time_valpred=1.1s time_score=0.1s total=8.6s
[fold 02][epoch 007] t

  WeightNorm.apply(module, name, dim)


[fold 03][epoch 001] train_loss=0.005883 | val_mean_dailyIC=0.068534 | best=0.068534@1 | no_improve=0time_train=7.4s time_valpred=1.0s time_score=0.2s total=8.6s
[fold 03][epoch 002] train_loss=-0.002913 | val_mean_dailyIC=0.024217 | best=0.068534@1 | no_improve=1time_train=7.5s time_valpred=1.1s time_score=0.1s total=8.7s
[fold 03][epoch 003] train_loss=-0.007195 | val_mean_dailyIC=0.073522 | best=0.073522@3 | no_improve=0time_train=7.6s time_valpred=1.1s time_score=0.1s total=8.8s
[fold 03][epoch 004] train_loss=-0.008551 | val_mean_dailyIC=0.054289 | best=0.073522@3 | no_improve=1time_train=7.2s time_valpred=1.3s time_score=0.1s total=8.7s
[fold 03][epoch 005] train_loss=-0.010592 | val_mean_dailyIC=0.076964 | best=0.076964@5 | no_improve=0time_train=7.5s time_valpred=1.3s time_score=0.1s total=9.0s
[fold 03][epoch 006] train_loss=-0.014158 | val_mean_dailyIC=0.034764 | best=0.076964@5 | no_improve=1time_train=7.7s time_valpred=1.3s time_score=0.1s total=9.2s
[fold 03][epoch 007] tr

  WeightNorm.apply(module, name, dim)


[fold 04][epoch 001] train_loss=0.005841 | val_mean_dailyIC=-0.015578 | best=-0.015578@1 | no_improve=0time_train=7.7s time_valpred=1.0s time_score=0.1s total=8.8s
[fold 04][epoch 002] train_loss=-0.000809 | val_mean_dailyIC=0.018775 | best=0.018775@2 | no_improve=0time_train=7.1s time_valpred=1.0s time_score=0.1s total=8.3s
[fold 04][epoch 003] train_loss=-0.005793 | val_mean_dailyIC=-0.010307 | best=0.018775@2 | no_improve=1time_train=7.0s time_valpred=1.1s time_score=0.1s total=8.2s
[fold 04][epoch 004] train_loss=-0.007140 | val_mean_dailyIC=0.055180 | best=0.055180@4 | no_improve=0time_train=7.2s time_valpred=1.0s time_score=0.1s total=8.3s
[fold 04][epoch 005] train_loss=-0.008407 | val_mean_dailyIC=0.011687 | best=0.055180@4 | no_improve=1time_train=7.5s time_valpred=1.1s time_score=0.1s total=8.7s
[fold 04][epoch 006] train_loss=-0.010811 | val_mean_dailyIC=0.005713 | best=0.055180@4 | no_improve=2time_train=7.3s time_valpred=1.0s time_score=0.1s total=8.5s
[fold 04][epoch 007]

  WeightNorm.apply(module, name, dim)


[fold 05][epoch 001] train_loss=0.006638 | val_mean_dailyIC=-0.009937 | best=-0.009937@1 | no_improve=0time_train=7.4s time_valpred=1.1s time_score=0.1s total=8.7s
[fold 05][epoch 002] train_loss=-0.005010 | val_mean_dailyIC=-0.031583 | best=-0.009937@1 | no_improve=1time_train=7.5s time_valpred=1.1s time_score=0.1s total=8.7s
[fold 05][epoch 003] train_loss=-0.008282 | val_mean_dailyIC=0.009540 | best=0.009540@3 | no_improve=0time_train=7.5s time_valpred=1.1s time_score=0.1s total=8.8s
[fold 05][epoch 004] train_loss=-0.010324 | val_mean_dailyIC=-0.015366 | best=0.009540@3 | no_improve=1time_train=7.6s time_valpred=1.0s time_score=0.1s total=8.8s
[fold 05][epoch 005] train_loss=-0.012653 | val_mean_dailyIC=0.019328 | best=0.019328@5 | no_improve=0time_train=7.1s time_valpred=1.1s time_score=0.1s total=8.4s
[fold 05][epoch 006] train_loss=-0.015254 | val_mean_dailyIC=0.008488 | best=0.019328@5 | no_improve=1time_train=7.6s time_valpred=1.0s time_score=0.2s total=8.7s
[fold 05][epoch 00

  WeightNorm.apply(module, name, dim)


[fold 06][epoch 001] train_loss=0.009289 | val_mean_dailyIC=0.006628 | best=0.006628@1 | no_improve=0time_train=7.1s time_valpred=1.0s time_score=0.1s total=8.3s
[fold 06][epoch 002] train_loss=-0.002845 | val_mean_dailyIC=0.024741 | best=0.024741@2 | no_improve=0time_train=7.1s time_valpred=1.1s time_score=0.2s total=8.3s
[fold 06][epoch 003] train_loss=-0.007287 | val_mean_dailyIC=0.069409 | best=0.069409@3 | no_improve=0time_train=8.0s time_valpred=1.1s time_score=0.1s total=9.2s
[fold 06][epoch 004] train_loss=-0.009960 | val_mean_dailyIC=0.059699 | best=0.069409@3 | no_improve=1time_train=7.4s time_valpred=1.1s time_score=0.1s total=8.6s
[fold 06][epoch 005] train_loss=-0.011490 | val_mean_dailyIC=0.035332 | best=0.069409@3 | no_improve=2time_train=7.5s time_valpred=1.0s time_score=0.1s total=8.7s
[fold 06][epoch 006] train_loss=-0.013804 | val_mean_dailyIC=0.048946 | best=0.069409@3 | no_improve=3time_train=7.3s time_valpred=1.1s time_score=0.1s total=8.5s
[fold 06][epoch 007] tr

  WeightNorm.apply(module, name, dim)


[fold 01][epoch 001] train_loss=0.004798 | val_mean_dailyIC=-0.095801 | best=-0.095801@1 | no_improve=0time_train=7.3s time_valpred=1.1s time_score=0.1s total=8.6s
[fold 01][epoch 002] train_loss=-0.004446 | val_mean_dailyIC=-0.006653 | best=-0.006653@2 | no_improve=0time_train=7.5s time_valpred=1.0s time_score=0.1s total=8.6s
[fold 01][epoch 003] train_loss=-0.007368 | val_mean_dailyIC=-0.017154 | best=-0.006653@2 | no_improve=1time_train=7.2s time_valpred=1.1s time_score=0.1s total=8.4s
[fold 01][epoch 004] train_loss=-0.009411 | val_mean_dailyIC=-0.049760 | best=-0.006653@2 | no_improve=2time_train=7.3s time_valpred=1.1s time_score=0.1s total=8.5s
[fold 01][epoch 005] train_loss=-0.010914 | val_mean_dailyIC=-0.005858 | best=-0.005858@5 | no_improve=0time_train=7.8s time_valpred=1.7s time_score=0.1s total=9.6s
[fold 01][epoch 006] train_loss=-0.012780 | val_mean_dailyIC=-0.017152 | best=-0.005858@5 | no_improve=1time_train=8.3s time_valpred=1.1s time_score=0.1s total=9.5s
[fold 01][e

  WeightNorm.apply(module, name, dim)


[fold 01] artifact verify: {'max_abs_diff': 0.0, 'mean_abs_diff': 0.0}


  WeightNorm.apply(module, name, dim)


[fold 02][epoch 001] train_loss=0.010319 | val_mean_dailyIC=0.012296 | best=0.012296@1 | no_improve=0time_train=7.8s time_valpred=1.1s time_score=0.1s total=9.1s
[fold 02][epoch 002] train_loss=-0.000573 | val_mean_dailyIC=0.048615 | best=0.048615@2 | no_improve=0time_train=7.8s time_valpred=1.1s time_score=0.2s total=9.1s
