# 1. Ingesta y Filtrado de Datos (Data Ingestion)

Este bloque carga los datos históricos de mercado (EURUSD) recopilados tick a tick. Se procesan millones de registros para filtrar únicamente los horarios de mayor liquidez (Sesiones de Londres y Nueva York, 07:00 - 18:00 UTC), garantizando que la simulación utilice precios realistas con spreads bajos y sin gaps significativos.

In [None]:
import polars as pl
from pathlib import Path

DATA_DIR = Path("/data/EURUSD")
OUT_PARQUET = Path("eurusd_5s_2021_2025_07_18utc.parquet")

files = sorted(DATA_DIR.glob("*.parquet"))
assert len(files) > 0, "No se encontraron .parquet en la carpeta."

lf = (
    pl.scan_parquet([str(f) for f in files])
    .select([
        "ts", "open", "high", "low", "close", "ticks",
        "bid_last", "ask_last", "bid_vol_sum", "ask_vol_sum"
    ])
    .filter(
        (pl.col("ts").dt.time() >= pl.time(7, 0, 0)) &
        (pl.col("ts").dt.time() <  pl.time(18, 0, 0))
    )
    .sort("ts")
)

lf.sink_parquet(str(OUT_PARQUET))

df = pl.read_parquet(str(OUT_PARQUET)).sort("ts")
df_5s = df 

row_count = df.height
min_ts = df.select(pl.col("ts").min()).item()
max_ts = df.select(pl.col("ts").max()).item()

nulls = df.select([pl.col(c).null_count().alias(c) for c in df.columns])

dup_ts = df.select(pl.col("ts").is_duplicated().sum().alias("dup_ts")).item()

df_dt = df.with_columns(
    (pl.col("ts").diff().dt.total_seconds()).alias("dt_seconds")
)

gaps_gt_5s = df_dt.select((pl.col("dt_seconds") > 5).sum().alias("gaps_gt_5s")).item()
neg_dt = df_dt.select((pl.col("dt_seconds") < 0).sum().alias("neg_dt")).item()
weird_step = df_dt.select(
    ((pl.col("dt_seconds").is_not_null()) & (pl.col("dt_seconds") % 5 != 0)).sum().alias("weird_step")
).item()

ohlc_bad = df.select(
    (
        (pl.col("high") < pl.max_horizontal(["open", "close"])) |
        (pl.col("low")  > pl.min_horizontal(["open", "close"])) |
        (pl.col("high") < pl.col("low"))
    ).sum().alias("ohlc_bad_rows")
).item()

df_spread = df.with_columns((pl.col("ask_last") - pl.col("bid_last")).alias("spread"))
spread_neg = df_spread.select((pl.col("spread") < 0).sum().alias("spread_neg")).item()
spread_p999 = df_spread.select(pl.col("spread").quantile(0.999).alias("spread_p999")).item()

ticks_neg = df.select((pl.col("ticks") < 0).sum().alias("ticks_neg")).item()
bidvol_neg = df.select((pl.col("bid_vol_sum") < 0).sum().alias("bidvol_neg")).item()
askvol_neg = df.select((pl.col("ask_vol_sum") < 0).sum().alias("askvol_neg")).item()

qc = pl.DataFrame([{
    "rows": row_count,
    "min_ts": min_ts,
    "max_ts": max_ts,
    "dup_ts": dup_ts,
    "gaps_gt_5s": gaps_gt_5s,
    "neg_dt": neg_dt,
    "weird_step_non_multiple_5s": weird_step,
    "ohlc_bad_rows": ohlc_bad,
    "spread_neg": spread_neg,
    "spread_p999": spread_p999,
    "ticks_neg": ticks_neg,
    "bidvol_neg": bidvol_neg,
    "askvol_neg": askvol_neg,
}])

qc.write_csv("qc_report_2021_2025_07_18utc.csv")

print(qc)
print("\nNull counts:")
print(nulls)

shape: (1, 13)
┌─────────┬────────────┬────────────┬────────┬───┬────────────┬───────────┬────────────┬───────────┐
│ rows    ┆ min_ts     ┆ max_ts     ┆ dup_ts ┆ … ┆ spread_p99 ┆ ticks_neg ┆ bidvol_neg ┆ askvol_ne │
│ ---     ┆ ---        ┆ ---        ┆ ---    ┆   ┆ 9          ┆ ---       ┆ ---        ┆ g         │
│ i64     ┆ datetime[μ ┆ datetime[μ ┆ i64    ┆   ┆ ---        ┆ i64       ┆ i64        ┆ ---       │
│         ┆ s]         ┆ s]         ┆        ┆   ┆ f64        ┆           ┆            ┆ i64       │
╞═════════╪════════════╪════════════╪════════╪═══╪════════════╪═══════════╪════════════╪═══════════╡
│ 8449570 ┆ 2021-01-01 ┆ 2025-11-30 ┆ 0      ┆ … ┆ 0.00014    ┆ 0         ┆ 0          ┆ 0         │
│         ┆ 07:00:00   ┆ 17:59:55   ┆        ┆   ┆            ┆           ┆            ┆           │
└─────────┴────────────┴────────────┴────────┴───┴────────────┴───────────┴────────────┴───────────┘

Null counts:
shape: (1, 10)
┌─────┬──────┬──────┬─────┬───┬──────────┬─────

# 2. Transformación de Datos (Resampling)

A partir de la data cruda de 5 segundos, se construyen estructuras de precios en temporalidades superiores (1, 3, 5 y 15 minutos). Esto permite a la estrategia analizar tendencias macro mientras ejecuta órdenes con precisión de microsegundos, manteniendo la granularidad necesaria para el análisis de microestructura.

In [None]:
import polars as pl

def resample_ohlcv_microstructure(df_5s: pl.DataFrame, every: str, ts_col: str = "ts") -> pl.DataFrame:
    df_5s = df_5s.sort(ts_col)

    return (
        df_5s
        .group_by_dynamic(
            index_column=ts_col,
            every=every,
            closed="left",
            label="left",
            start_by="window",  
        )
        .agg([
            pl.col("open").first().alias("open"),
            pl.col("high").max().alias("high"),
            pl.col("low").min().alias("low"),
            pl.col("close").last().alias("close"),

            pl.col("ticks").sum().alias("ticks"),
            pl.col("bid_vol_sum").sum().alias("bid_vol_sum"),
            pl.col("ask_vol_sum").sum().alias("ask_vol_sum"),

            pl.col("bid_last").last().alias("bid_last"),
            pl.col("ask_last").last().alias("ask_last"),

            pl.len().alias("n_bars_5s"),
        ])
        .sort(ts_col)
    )

df_5s = pl.read_parquet("eurusd_5s_2021_2025_07_18utc.parquet") 
outputs = {tf: resample_ohlcv_microstructure(df_5s, tf) for tf in ["1m", "3m", "5m", "15m"]}

# 3. Control de Calidad (Quality Assurance)

Se ejecutan pruebas automáticas para verificar la integridad matemática de los datos transformados. El sistema valida que no existan discrepancias entre los precios de 5 segundos y los gráficos de minutos, y asegura que el volumen de negociación sea consistente en todos los marcos temporales.

In [3]:
expected = {"1m": 12, "3m": 36, "5m": 60, "15m": 180}

for tf, df_tf in outputs.items():
    exp = expected[tf]
    qc_tf = (
        df_tf.select([
            pl.len().alias("n_buckets"),
            (pl.col("n_bars_5s") == exp).sum().alias("buckets_full"),
            (pl.col("n_bars_5s") < exp).sum().alias("buckets_with_gaps"),
            (pl.col("n_bars_5s").min()).alias("min_n_bars_5s"),
        ])
        .with_columns(
            (pl.col("buckets_full") / pl.col("n_buckets")).alias("pct_full")
        )
    )
    print(tf)
    print(qc_tf)

1m
shape: (1, 5)
┌───────────┬──────────────┬───────────────────┬───────────────┬──────────┐
│ n_buckets ┆ buckets_full ┆ buckets_with_gaps ┆ min_n_bars_5s ┆ pct_full │
│ ---       ┆ ---          ┆ ---               ┆ ---           ┆ ---      │
│ u32       ┆ u32          ┆ u32               ┆ u32           ┆ f64      │
╞═══════════╪══════════════╪═══════════════════╪═══════════════╪══════════╡
│ 770030    ┆ 420862       ┆ 349168            ┆ 1             ┆ 0.546553 │
└───────────┴──────────────┴───────────────────┴───────────────┴──────────┘
3m
shape: (1, 5)
┌───────────┬──────────────┬───────────────────┬───────────────┬──────────┐
│ n_buckets ┆ buckets_full ┆ buckets_with_gaps ┆ min_n_bars_5s ┆ pct_full │
│ ---       ┆ ---          ┆ ---               ┆ ---           ┆ ---      │
│ u32       ┆ u32          ┆ u32               ┆ u32           ┆ f64      │
╞═══════════╪══════════════╪═══════════════════╪═══════════════╪══════════╡
│ 256757    ┆ 84070        ┆ 172687            ┆ 1    

In [4]:
import polars as pl
df_m3=outputs["3m"]
checks_align = df_m3.select([
    (pl.col("ts").dt.second() == 0).all().alias("sec_is_0"),
    ((pl.col("ts").dt.minute() % 3) == 0).all().alias("minute_multiple_of_3"),
])

print("Alineación M3:")
print(checks_align)

checks_ohlc = df_m3.select([
    (pl.col("high") >= pl.max_horizontal(["open","close"])).all().alias("high_ok"),
    (pl.col("low")  <= pl.min_horizontal(["open","close"])).all().alias("low_ok"),
    (pl.col("high") >= pl.col("low")).all().alias("range_ok"),
])

print("\nCoherencia OHLC:")
print(checks_ohlc)

stats_m3 = df_m3.select([
    pl.col("n_bars_5s").count().alias("count"),
    pl.col("n_bars_5s").min().alias("min"),
    pl.col("n_bars_5s").quantile(0.25).alias("q25"),
    pl.col("n_bars_5s").median().alias("median"),
    pl.col("n_bars_5s").mean().alias("mean"),
    pl.col("n_bars_5s").quantile(0.75).alias("q75"),
    pl.col("n_bars_5s").max().alias("max"),
])
print(stats_m3)

Alineación M3:
shape: (1, 2)
┌──────────┬──────────────────────┐
│ sec_is_0 ┆ minute_multiple_of_3 │
│ ---      ┆ ---                  │
│ bool     ┆ bool                 │
╞══════════╪══════════════════════╡
│ true     ┆ true                 │
└──────────┴──────────────────────┘

Coherencia OHLC:
shape: (1, 3)
┌─────────┬────────┬──────────┐
│ high_ok ┆ low_ok ┆ range_ok │
│ ---     ┆ ---    ┆ ---      │
│ bool    ┆ bool   ┆ bool     │
╞═════════╪════════╪══════════╡
│ true    ┆ true   ┆ true     │
└─────────┴────────┴──────────┘
shape: (1, 7)
┌────────┬─────┬──────┬────────┬──────────┬──────┬─────┐
│ count  ┆ min ┆ q25  ┆ median ┆ mean     ┆ q75  ┆ max │
│ ---    ┆ --- ┆ ---  ┆ ---    ┆ ---      ┆ ---  ┆ --- │
│ u32    ┆ u32 ┆ f64  ┆ f64    ┆ f64      ┆ f64  ┆ u32 │
╞════════╪═════╪══════╪════════╪══════════╪══════╪═════╡
│ 256757 ┆ 1   ┆ 31.0 ┆ 34.0   ┆ 32.90882 ┆ 36.0 ┆ 36  │
└────────┴─────┴──────┴────────┴──────────┴──────┴─────┘


In [5]:
import random

df_5s = df_5s.sort("ts")
df_m3 = df_m3.sort("ts")

sample_idx = random.sample(range(df_m3.height), 5)
sample = df_m3[sample_idx].select(["ts","open","high","low","close","ticks","bid_vol_sum","ask_vol_sum","bid_last","ask_last","n_bars_5s"])

print("Muestras M3 a verificar:")
print(sample)

for row in sample.iter_rows(named=True):
    t0 = row["ts"]
    t1 = t0 + pl.duration(minutes=3)

    chunk = df_5s.filter((pl.col("ts") >= t0) & (pl.col("ts") < t1))

    recomputed = chunk.select([
        pl.col("open").first().alias("open"),
        pl.col("high").max().alias("high"),
        pl.col("low").min().alias("low"),
        pl.col("close").last().alias("close"),
        pl.col("ticks").sum().alias("ticks"),
        pl.col("bid_vol_sum").sum().alias("bid_vol_sum"),
        pl.col("ask_vol_sum").sum().alias("ask_vol_sum"),
        pl.col("bid_last").last().alias("bid_last"),
        pl.col("ask_last").last().alias("ask_last"),
        pl.len().alias("n_bars_5s")
    ]).to_dicts()[0]

    ok = True
    for k in recomputed.keys():
        if row[k] != recomputed[k]:
            ok = False
            print(f"\n❌ Mismatch en ventana {t0} clave={k}: M3={row[k]} vs recomputed={recomputed[k]}")
    if ok:
        print(f"\n✅ Ventana {t0} OK (coincide con recompute desde 5s)")

Muestras M3 a verificar:
shape: (5, 11)
┌─────────────┬──────────┬──────────┬──────────┬───┬─────────────┬──────────┬──────────┬───────────┐
│ ts          ┆ open     ┆ high     ┆ low      ┆ … ┆ ask_vol_sum ┆ bid_last ┆ ask_last ┆ n_bars_5s │
│ ---         ┆ ---      ┆ ---      ┆ ---      ┆   ┆ ---         ┆ ---      ┆ ---      ┆ ---       │
│ datetime[ms ┆ f64      ┆ f64      ┆ f64      ┆   ┆ f64         ┆ f64      ┆ f64      ┆ u32       │
│ ]           ┆          ┆          ┆          ┆   ┆             ┆          ┆          ┆           │
╞═════════════╪══════════╪══════════╪══════════╪═══╪═════════════╪══════════╪══════════╪═══════════╡
│ 2025-07-19  ┆ 1.16757  ┆ 1.16759  ┆ 1.1673   ┆ … ┆ 83.669999   ┆ 1.16735  ┆ 1.16737  ┆ 24        │
│ 09:15:00    ┆          ┆          ┆          ┆   ┆             ┆          ┆          ┆           │
│ 2024-06-15  ┆ 1.091235 ┆ 1.091715 ┆ 1.09033  ┆ … ┆ 2106.769985 ┆ 1.09032  ┆ 1.09034  ┆ 36        │
│ 16:42:00    ┆          ┆          ┆          ┆   

# 4. Gestión de Eventos Económicos (News Filter)

Se integra un calendario económico para identificar noticias de alto impacto (ej. decisiones de tipos de interés). El sistema marca estas ventanas de tiempo para evitar operar durante periodos de volatilidad extrema e impredecible, protegiendo el capital de movimientos erráticos (whipsaws).

In [6]:
import pandas as pd
import polars as pl
from datasets import load_dataset
from pathlib import Path

CURRENCIES = {"EUR", "USD"}
PRE_MIN, POST_MIN = 15, 15 
OUT_EVENTS = Path("news_events_high_EURUSD.parquet")

ds = load_dataset("Ehsanrs2/Forex_Factory_Calendar", split="train")
news = ds.to_pandas()

news = news[
    news["Impact"].astype(str).str.contains("High", case=False, na=False) &
    news["Currency"].isin(CURRENCIES)
].copy()

news["event_ts"] = pd.to_datetime(news["DateTime"], utc=True, errors="coerce")
news = news.dropna(subset=["event_ts"]).sort_values("event_ts")
news["event_ts"] = news["event_ts"].dt.tz_convert("UTC").dt.tz_localize(None)

events = pl.from_pandas(news[["event_ts", "Currency", "Impact"]])

events = (
    events
    .with_columns(pl.col("event_ts").cast(pl.Datetime("ms"), strict=False))
    .unique(subset=["event_ts", "Currency", "Impact"], keep="last")
    .sort("event_ts")
)

events.write_parquet(str(OUT_EVENTS))
print(f"Saved events -> {OUT_EVENTS}")


Saved events -> news_events_high_EURUSD.parquet


In [None]:
import polars as pl

events = pl.read_parquet("news_events_high_EURUSD.parquet").sort("event_ts")

def add_news_flag(
    df_5s: pl.DataFrame,
    events: pl.DataFrame,
    pre_min: int = 15,
    post_min: int = 15,
) -> pl.DataFrame:
    df = df_5s.sort("ts")

    ev = (
        events
        .select(pl.col("event_ts").cast(pl.Datetime("ms"), strict=False).alias("event_ts"))
        .sort("event_ts")
        .unique(subset=["event_ts"], keep="last")
    )

    prev = (
        df.join_asof(ev, left_on="ts", right_on="event_ts", strategy="backward")
          .rename({"event_ts": "prev_event_ts"})
    )
    nxt = (
        df.join_asof(ev, left_on="ts", right_on="event_ts", strategy="forward")
          .rename({"event_ts": "next_event_ts"})
    )

    out = prev.with_columns(nxt.get_column("next_event_ts"))

    pre_s = pre_min * 60
    post_s = post_min * 60
    BIG = 10**12  

    out = out.with_columns([
        pl.when(pl.col("prev_event_ts").is_null())
          .then(BIG)
          .otherwise((pl.col("ts") - pl.col("prev_event_ts")).dt.total_seconds())
          .alias("secs_since_prev_event"),

        pl.when(pl.col("next_event_ts").is_null())
          .then(BIG)
          .otherwise((pl.col("next_event_ts") - pl.col("ts")).dt.total_seconds())
          .alias("secs_until_next_event"),
    ]).with_columns([
        (
            (pl.col("secs_since_prev_event") <= post_s) |
            (pl.col("secs_until_next_event") <= pre_s)
        ).alias("in_news_window")
    ])

    return out

In [10]:
df_5s_news = add_news_flag(df_5s, events, pre_min=15, post_min=15)

print(df_5s_news.select([
    pl.len().alias("rows"),
    pl.col("in_news_window").sum().alias("bars_in_news_window"),
    (pl.col("in_news_window").mean() * 100).alias("pct_in_news_window"),
]))

shape: (1, 3)
┌─────────┬─────────────────────┬────────────────────┐
│ rows    ┆ bars_in_news_window ┆ pct_in_news_window │
│ ---     ┆ ---                 ┆ ---                │
│ u32     ┆ u32                 ┆ f64                │
╞═════════╪═════════════════════╪════════════════════╡
│ 8449570 ┆ 100560              ┆ 1.19012            │
└─────────┴─────────────────────┴────────────────────┘


In [11]:
import polars as pl

PRE_MIN, POST_MIN = 15, 15 

min_ts = df_5s_news.select(pl.col("ts").min()).item()
max_ts = df_5s_news.select(pl.col("ts").max()).item()

one_event = (
    events
    .filter((pl.col("event_ts") >= min_ts) & (pl.col("event_ts") <= max_ts))
    .select("event_ts")
    .sort("event_ts")
    .head(1)
    .item()
)

print("Evento dentro del rango:", one_event)

window = (
    df_5s_news
    .filter(
        (pl.col("ts") >= one_event - pl.duration(minutes=20)) &
        (pl.col("ts") <= one_event + pl.duration(minutes=20))
    )
    .select(["ts", "in_news_window"])
)
print(window.head(10))
print("...")
print(window.tail(10))

check = (
    df_5s_news
    .filter(
        (pl.col("ts") >= one_event - pl.duration(minutes=20)) &
        (pl.col("ts") <= one_event + pl.duration(minutes=20))
    )
    .with_columns([
        (
            ((pl.col("ts") >= one_event - pl.duration(minutes=PRE_MIN)) & (pl.col("ts") <= one_event)) |
            ((pl.col("ts") >= one_event) & (pl.col("ts") <= one_event + pl.duration(minutes=POST_MIN)))
        ).alias("should_be_true")
    ])
    .select([
        (pl.col("in_news_window") == pl.col("should_be_true")).all().alias("window_matches_exactly")
    ])
)

print(check)

Evento dentro del rango: 2021-01-06 19:00:00
shape: (0, 2)
┌──────────────┬────────────────┐
│ ts           ┆ in_news_window │
│ ---          ┆ ---            │
│ datetime[ms] ┆ bool           │
╞══════════════╪════════════════╡
└──────────────┴────────────────┘
...
shape: (0, 2)
┌──────────────┬────────────────┐
│ ts           ┆ in_news_window │
│ ---          ┆ ---            │
│ datetime[ms] ┆ bool           │
╞══════════════╪════════════════╡
└──────────────┴────────────────┘
shape: (1, 1)
┌────────────────────────┐
│ window_matches_exactly │
│ ---                    │
│ bool                   │
╞════════════════════════╡
│ true                   │
└────────────────────────┘


# 5. Lógica de la Estrategia (Signal Generation)

Implementación del algoritmo de decisión. Se generan señales de entrada basadas en patrones de velas Japonesas 'Contrarian' en gráficos de 15 minutos. La lógica incluye filtros de tendencia y validación cruzada entre temporalidades para aumentar la probabilidad de éxito de cada operación.

In [12]:
import polars as pl

def add_news_flag(
    df_5s: pl.DataFrame,
    events: pl.DataFrame,
    pre_min: int = 15,
    post_min: int = 15,
) -> pl.DataFrame:
    df = df_5s.sort("ts")

    ev = (
        events
        .select(pl.col("event_ts").cast(pl.Datetime("ms"), strict=False).alias("event_ts"))
        .sort("event_ts")
        .unique(subset=["event_ts"], keep="last")
    )

    prev = (
        df.join_asof(ev, left_on="ts", right_on="event_ts", strategy="backward")
          .rename({"event_ts": "prev_event_ts"})
    )
    nxt = (
        df.join_asof(ev, left_on="ts", right_on="event_ts", strategy="forward")
          .rename({"event_ts": "next_event_ts"})
    )

    out = prev.with_columns(nxt.get_column("next_event_ts"))

    pre_s = pre_min * 60
    post_s = post_min * 60
    BIG = 10**12 

    out = out.with_columns([
        pl.when(pl.col("prev_event_ts").is_null())
          .then(BIG)
          .otherwise((pl.col("ts") - pl.col("prev_event_ts")).dt.total_seconds())
          .alias("secs_since_prev_event"),

        pl.when(pl.col("next_event_ts").is_null())
          .then(BIG)
          .otherwise((pl.col("next_event_ts") - pl.col("ts")).dt.total_seconds())
          .alias("secs_until_next_event"),
    ]).with_columns([
        (
            (pl.col("secs_since_prev_event") <= post_s) |
            (pl.col("secs_until_next_event") <= pre_s)
        ).alias("in_news_window")
    ])

    return out


def compute_signals_tf(df_tf: pl.DataFrame) -> pl.DataFrame:
    df = df_tf.sort("ts")

    df = df.with_columns([
        pl.when(pl.col("close") > pl.col("open")).then(1)
          .when(pl.col("close") < pl.col("open")).then(-1)
          .otherwise(None)
          .alias("candle_dir")
    ])

    df = df.with_columns([
        pl.col("open").shift(1).alias("open_1"),
        pl.col("high").shift(1).alias("high_1"),
        pl.col("low").shift(1).alias("low_1"),
        pl.col("close").shift(1).alias("close_1"),
        pl.col("candle_dir").shift(1).alias("dir_1"),
    ])

    outside = (pl.col("high") > pl.col("high_1")) & (pl.col("low") < pl.col("low_1"))

    long_sig = (
        (pl.col("dir_1") == -1) &
        (pl.col("candle_dir") == -1) &
        outside &
        (pl.col("close") < pl.col("low_1"))
    )

    short_sig = (
        (pl.col("dir_1") == 1) &
        (pl.col("candle_dir") == 1) &
        outside &
        (pl.col("close") > pl.col("high_1"))
    )

    df = df.with_columns([
        pl.when(long_sig).then(1)
          .when(short_sig).then(-1)
          .otherwise(0)
          .alias("signal")
    ])

    return df


def map_tf_signals_to_5s(
    df_5s: pl.DataFrame,
    df_tf_signals: pl.DataFrame,
    tf_minutes: int
) -> pl.DataFrame:
    df5 = df_5s.sort("ts")

    sig = (
        df_tf_signals
        .select([
            pl.col("ts").alias("signal_ts"),
            pl.col("signal")
        ])
        .filter(pl.col("signal") != 0)
        .with_columns([
            (pl.col("signal_ts") + pl.duration(minutes=tf_minutes)).alias("entry_ts_ideal")
        ])
        .sort("entry_ts_ideal")
    )

    mapped = (
        sig.join_asof(
            df5.select(["ts"]),
            left_on="entry_ts_ideal",
            right_on="ts",
            strategy="forward"
        )
        .rename({"ts": "entry_ts"})
        .drop_nulls(["entry_ts"])
        .select(["signal_ts", "entry_ts_ideal", "entry_ts", "signal"])
    )

    out = (
        df5.join(mapped.select(["entry_ts", "signal"]), left_on="ts", right_on="entry_ts", how="left")
           .with_columns(pl.col("signal").fill_null(0).alias("signal_tf"))
           .drop(["signal"])
    )

    return out


def add_time_filters(
    df_5s: pl.DataFrame,
    last_entry_hour_utc: int = 16
) -> pl.DataFrame:
    return df_5s.with_columns([
        (pl.col("ts").dt.hour() >= last_entry_hour_utc).alias("after_last_entry_hour")
    ])


def add_entry_allowed(df_5s: pl.DataFrame) -> pl.DataFrame:
    return df_5s.with_columns([
        (
            (pl.col("signal_tf") != 0) &
            (pl.col("in_news_window") == False) &
            (pl.col("after_last_entry_hour") == False)
        ).alias("entry_allowed")
    ])


import polars as pl

def add_trade_constraints(
    df_5s_with_entries: pl.DataFrame,
    session_end_hour_utc: int = 18,
    max_hold_minutes: int = 60
) -> pl.DataFrame:

    out = df_5s_with_entries.with_columns([
        (pl.col("ts").dt.date().cast(pl.Datetime("ms")) + pl.duration(hours=session_end_hour_utc)).alias("session_close_ts"),
        (pl.col("ts") + pl.duration(minutes=max_hold_minutes)).alias("max_exit_ts_by_duration"),
    ])

    out = out.with_columns([
        pl.min_horizontal([
            pl.col("session_close_ts"),
            pl.col("max_exit_ts_by_duration")
        ]).alias("hard_exit_ts")
    ])

    return out


# Ejemplo de ejecucion

In [13]:
import polars as pl


PATH_5S    = "eurusd_5s_2021_2025_07_18utc.parquet"
PATH_NEWS  = "news_events_high_EURUSD.parquet"   
PATH_M3    = None 

df_5s  = pl.read_parquet(PATH_5S).sort("ts")
events = pl.read_parquet(PATH_NEWS).sort("event_ts")

df_5s_news = add_news_flag(df_5s, events, pre_min=15, post_min=15)

df_m3 = outputs["3m"].sort("ts")


df_m3_sig = compute_signals_tf(df_m3)

df_5s_sig = map_tf_signals_to_5s(df_5s_news, df_m3_sig, tf_minutes=3)

df_5s_sig = add_time_filters(df_5s_sig, last_entry_hour_utc=16)

df_5s_sig = add_entry_allowed(df_5s_sig)

df_5s_sig = add_trade_constraints(df_5s_sig, session_end_hour_utc=18, max_hold_minutes=60)

print(df_5s_sig.select([
    pl.len().alias("rows"),
    (pl.col("signal_tf") != 0).sum().alias("n_entries_mapped"),
    pl.col("entry_allowed").sum().alias("n_entries_allowed"),
    pl.col("in_news_window").sum().alias("bars_in_news_window"),
]).head(1))

shape: (1, 4)
┌─────────┬──────────────────┬───────────────────┬─────────────────────┐
│ rows    ┆ n_entries_mapped ┆ n_entries_allowed ┆ bars_in_news_window │
│ ---     ┆ ---              ┆ ---               ┆ ---                 │
│ u32     ┆ u32              ┆ u32               ┆ u32                 │
╞═════════╪══════════════════╪═══════════════════╪═════════════════════╡
│ 8449570 ┆ 4350             ┆ 3566              ┆ 100560              │
└─────────┴──────────────────┴───────────────────┴─────────────────────┘


In [14]:
print(
    df_m3_sig.select([
        pl.len().alias("n_m3_bars"),
        (pl.col("signal") == 1).sum().alias("n_long_signals"),
        (pl.col("signal") == -1).sum().alias("n_short_signals"),
        (pl.col("signal") != 0).sum().alias("n_total_signals"),
    ])
)

print(
    df_5s_sig.select([
        (pl.col("signal_tf") != 0).sum().alias("n_entries_mapped_5s"),
    ])
)
print(
    df_5s_sig.select([
        pl.col("entry_allowed").sum().alias("n_entries_allowed"),
        ((pl.col("signal_tf") != 0) & (pl.col("in_news_window") == True)).sum().alias("blocked_by_news"),
        ((pl.col("signal_tf") != 0) & (pl.col("after_last_entry_hour") == True)).sum().alias("blocked_by_time"),
    ])
)

mapped_entries = (
    df_5s_sig
    .filter(pl.col("signal_tf") != 0)
    .select(["ts", "signal_tf"]) 
    .head(10)
)

print(mapped_entries)

shape: (1, 4)
┌───────────┬────────────────┬─────────────────┬─────────────────┐
│ n_m3_bars ┆ n_long_signals ┆ n_short_signals ┆ n_total_signals │
│ ---       ┆ ---            ┆ ---             ┆ ---             │
│ u32       ┆ u32            ┆ u32             ┆ u32             │
╞═══════════╪════════════════╪═════════════════╪═════════════════╡
│ 256757    ┆ 2108           ┆ 2242            ┆ 4350            │
└───────────┴────────────────┴─────────────────┴─────────────────┘
shape: (1, 1)
┌─────────────────────┐
│ n_entries_mapped_5s │
│ ---                 │
│ u32                 │
╞═════════════════════╡
│ 4350                │
└─────────────────────┘
shape: (1, 3)
┌───────────────────┬─────────────────┬─────────────────┐
│ n_entries_allowed ┆ blocked_by_news ┆ blocked_by_time │
│ ---               ┆ ---             ┆ ---             │
│ u32               ┆ u32             ┆ u32             │
╞═══════════════════╪═════════════════╪═════════════════╡
│ 3566              ┆ 61       

In [15]:
mapped_audit = (
    df_m3_sig
    .select([pl.col("ts").alias("signal_ts"), pl.col("signal")])
    .filter(pl.col("signal") != 0)
    .with_columns([
        (pl.col("signal_ts") + pl.duration(minutes=3)).alias("entry_ts_ideal")
    ])
    .sort("entry_ts_ideal")
    .join_asof(
        df_5s_sig.select([pl.col("ts").alias("entry_ts")]).sort("entry_ts"),
        left_on="entry_ts_ideal",
        right_on="entry_ts",
        strategy="forward"
    )
    .select(["signal_ts","entry_ts_ideal","entry_ts","signal"])
    .head(20)
)

print(mapped_audit)

shape: (20, 4)
┌─────────────────────┬─────────────────────┬─────────────────────┬────────┐
│ signal_ts           ┆ entry_ts_ideal      ┆ entry_ts            ┆ signal │
│ ---                 ┆ ---                 ┆ ---                 ┆ ---    │
│ datetime[ms]        ┆ datetime[ms]        ┆ datetime[ms]        ┆ i32    │
╞═════════════════════╪═════════════════════╪═════════════════════╪════════╡
│ 2021-01-01 09:51:00 ┆ 2021-01-01 09:54:00 ┆ 2021-01-01 09:54:00 ┆ -1     │
│ 2021-01-01 10:36:00 ┆ 2021-01-01 10:39:00 ┆ 2021-01-01 10:39:00 ┆ 1      │
│ 2021-01-01 13:57:00 ┆ 2021-01-01 14:00:00 ┆ 2021-01-01 14:00:00 ┆ 1      │
│ 2021-01-02 09:21:00 ┆ 2021-01-02 09:24:00 ┆ 2021-01-02 09:24:00 ┆ 1      │
│ 2021-01-02 12:45:00 ┆ 2021-01-02 12:48:00 ┆ 2021-01-02 12:48:05 ┆ 1      │
│ …                   ┆ …                   ┆ …                   ┆ …      │
│ 2021-01-04 12:00:00 ┆ 2021-01-04 12:03:00 ┆ 2021-01-04 12:03:00 ┆ 1      │
│ 2021-01-04 12:54:00 ┆ 2021-01-04 12:57:00 ┆ 2021-01-04 12:5

In [16]:
import polars as pl

TF_MIN = 3  # M3

mapped_events = (
    df_m3_sig
    .select([pl.col("ts").alias("signal_ts"), pl.col("signal")])
    .filter(pl.col("signal") != 0)
    .with_columns([
        (pl.col("signal_ts") + pl.duration(minutes=TF_MIN)).alias("entry_ts_ideal")
    ])
    .sort("entry_ts_ideal")
    .join_asof(
        df_5s_sig.select([pl.col("ts").alias("entry_ts")]).sort("entry_ts"),
        left_on="entry_ts_ideal",
        right_on="entry_ts",
        strategy="forward"
    )
    .select(["signal_ts", "entry_ts_ideal", "entry_ts", "signal"])
)

print(mapped_events.head(5))

shape: (5, 4)
┌─────────────────────┬─────────────────────┬─────────────────────┬────────┐
│ signal_ts           ┆ entry_ts_ideal      ┆ entry_ts            ┆ signal │
│ ---                 ┆ ---                 ┆ ---                 ┆ ---    │
│ datetime[ms]        ┆ datetime[ms]        ┆ datetime[ms]        ┆ i32    │
╞═════════════════════╪═════════════════════╪═════════════════════╪════════╡
│ 2021-01-01 09:51:00 ┆ 2021-01-01 09:54:00 ┆ 2021-01-01 09:54:00 ┆ -1     │
│ 2021-01-01 10:36:00 ┆ 2021-01-01 10:39:00 ┆ 2021-01-01 10:39:00 ┆ 1      │
│ 2021-01-01 13:57:00 ┆ 2021-01-01 14:00:00 ┆ 2021-01-01 14:00:00 ┆ 1      │
│ 2021-01-02 09:21:00 ┆ 2021-01-02 09:24:00 ┆ 2021-01-02 09:24:00 ┆ 1      │
│ 2021-01-02 12:45:00 ┆ 2021-01-02 12:48:00 ┆ 2021-01-02 12:48:05 ┆ 1      │
└─────────────────────┴─────────────────────┴─────────────────────┴────────┘


In [17]:
import polars as pl
import plotly.graph_objects as go

TF_MIN = 3
N = 20

lastN = mapped_events.sort("signal_ts").tail(N).sort("signal_ts")

t0 = lastN.select(pl.col("signal_ts").min()).item() - pl.duration(minutes=240)
t1 = lastN.select(pl.col("signal_ts").max()).item() + pl.duration(minutes=60)

m3_window = (
    df_m3.filter((pl.col("ts") >= t0) & (pl.col("ts") <= t1))
         .select(["ts","open","high","low","close"])
         .sort("ts")
         .to_pandas()
)

sig_pts = (
    lastN.join(
        df_m3.select([pl.col("ts").alias("signal_ts"), pl.col("close").alias("signal_price")]),
        on="signal_ts",
        how="left"
    )
)

entry_pts = (
    lastN.join(
        df_5s.select([pl.col("ts").alias("entry_ts"), pl.col("open").alias("entry_price_5s")]),
        on="entry_ts",
        how="left"
    )
)

sig_pd   = sig_pts.to_pandas()
entry_pd = entry_pts.to_pandas()

longs  = sig_pd[sig_pd["signal"] == 1]
shorts = sig_pd[sig_pd["signal"] == -1]

fig = go.Figure()

fig.add_trace(go.Candlestick(
    x=m3_window["ts"],
    open=m3_window["open"],
    high=m3_window["high"],
    low=m3_window["low"],
    close=m3_window["close"],
    name="M3"
))

if len(longs):
    fig.add_trace(go.Scatter(
        x=longs["signal_ts"],
        y=longs["signal_price"],
        mode="markers",
        name="Signal LONG",
        marker=dict(symbol="triangle-up", size=10),
        hovertemplate="LONG SIGNAL<br>%{x}<br>close(M3)=%{y}<extra></extra>"
    ))

if len(shorts):
    fig.add_trace(go.Scatter(
        x=shorts["signal_ts"],
        y=shorts["signal_price"],
        mode="markers",
        name="Signal SHORT",
        marker=dict(symbol="triangle-down", size=10),
        hovertemplate="SHORT SIGNAL<br>%{x}<br>close(M3)=%{y}<extra></extra>"
    ))

fig.add_trace(go.Scatter(
    x=entry_pd["entry_ts"],
    y=entry_pd["entry_price_5s"],
    mode="markers",
    name="ENTRY (5s open)",
    marker=dict(symbol="circle", size=6, opacity=0.7),
    hovertemplate="ENTRY<br>%{x}<br>open(5s)=%{y}<extra></extra>"
))

fig.update_layout(
    title=f"M3 Candles + últimas {N} señales (triángulos) + entradas reales 5s (puntos)",
    xaxis_title="Time (UTC naive)",
    yaxis_title="Price",
    xaxis_rangeslider_visible=False,
    template="plotly_dark",
    height=700,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
)

fig.show()

In [18]:
import polars as pl

TF_MIN = 3

audit = (
    mapped_events
    .sort("signal_ts")
    .tail(10)
    .with_columns([
        (pl.col("signal_ts") + pl.duration(minutes=TF_MIN)).alias("next_m3_open_ideal")
    ])
    .select([
        "signal_ts",
        "next_m3_open_ideal",
        "entry_ts",
        (pl.col("entry_ts") == pl.col("next_m3_open_ideal")).alias("entry_is_exact_open"),
        (pl.col("entry_ts") - pl.col("next_m3_open_ideal")).dt.total_seconds().alias("entry_delay_s"),
    ])
)

print(audit)

shape: (10, 5)
┌──────────────┬──────────────────────┬──────────────────────┬─────────────────────┬───────────────┐
│ signal_ts    ┆ next_m3_open_ideal   ┆ entry_ts             ┆ entry_is_exact_open ┆ entry_delay_s │
│ ---          ┆ ---                  ┆ ---                  ┆ ---                 ┆ ---           │
│ datetime[ms] ┆ datetime[ms]         ┆ datetime[ms]         ┆ bool                ┆ i64           │
╞══════════════╪══════════════════════╪══════════════════════╪═════════════════════╪═══════════════╡
│ 2025-11-26   ┆ 2025-11-26 09:33:00  ┆ 2025-11-26 09:33:05  ┆ false               ┆ 5             │
│ 09:30:00     ┆                      ┆                      ┆                     ┆               │
│ 2025-11-26   ┆ 2025-11-26 10:51:00  ┆ 2025-11-26 10:51:05  ┆ false               ┆ 5             │
│ 10:48:00     ┆                      ┆                      ┆                     ┆               │
│ 2025-11-26   ┆ 2025-11-26 15:03:00  ┆ 2025-11-26 15:03:00  ┆ true         

# 6. Motor de Simulación (Backtesting Engine)

El núcleo del análisis. Este motor recorre año tras año de datos históricos, simulando la ejecución de cada operación con parámetros reales (comisiones, spread dinámico). Calcula el resultado financiero de cada decisión teórica, permitiendo una evaluación precisa del rendimiento esperado.

In [19]:
import numpy as np
import polars as pl

def backtest_5s_engine_mid_triggers(
    df_5s: pl.DataFrame,
    sl_pips: float,
    rr: float,
    pip_value: float = 0.0001,                 # EURUSD
    commission_roundturn_pips: float = 0.8,    # conservadora
) -> tuple[pl.DataFrame, dict]:
    """
    Motor 5s (controlado):
    - Triggers SL/TP intrabar con OHLC mid (open/high/low/close)
    - PnL con ejecutables bid/ask (spread real) + comisión
    - SL-first si SL y TP en misma barra
    - Forced close por:
        (1) News: in_news_window
        (2) Tiempo/No overnight: ts >= hard_exit_ts (hard_exit_ts=min(18:00, entry+2h))
    """

    df = df_5s.sort("ts")

    ts = df["ts"].to_numpy()
    o  = df["open"].to_numpy()
    h  = df["high"].to_numpy()
    l  = df["low"].to_numpy()

    bid_last = df["bid_last"].to_numpy()
    ask_last = df["ask_last"].to_numpy()

    in_news = df["in_news_window"].to_numpy()

    signal_tf = df["signal_tf"].to_numpy()
    entry_allowed = df["entry_allowed"].to_numpy()

    hard_exit_ts_arr = df["hard_exit_ts"].to_numpy()

    sl_dist = sl_pips * pip_value
    tp_dist = sl_dist * rr

    trades = []

    in_pos = False
    side = 0

    entry_i = None
    entry_ts = None
    entry_price_exec = None  # ask (long) / bid (short)
    entry_price_mid = None   # open mid (para niveles SL/TP)
    sl_mid = None
    tp_mid = None
    hard_exit_ts = None

    i = 0
    n = len(ts)

    while i < n:
        if not in_pos:
            if entry_allowed[i] and signal_tf[i] != 0:
                side = int(signal_tf[i])
                entry_i = i
                entry_ts = ts[i]

                if side == 1:
                    entry_price_exec = float(ask_last[i])
                else:
                    entry_price_exec = float(bid_last[i])

                entry_price_mid = float(o[i])

                if side == 1:
                    sl_mid = entry_price_mid - sl_dist
                    tp_mid = entry_price_mid + tp_dist
                else:
                    sl_mid = entry_price_mid + sl_dist
                    tp_mid = entry_price_mid - tp_dist

                hard_exit_ts = hard_exit_ts_arr[i]

                in_pos = True

            i += 1
            continue

        exit_reason = None
        forced_close_news = 0
        forced_close_time = 0

        exit_price_mid = None
        exit_price_exec = None

        if in_news[i]:
            forced_close_news = 1
            exit_reason = "FORCED_NEWS"
            exit_price_exec = float(bid_last[i]) if side == 1 else float(ask_last[i])

        elif ts[i] >= hard_exit_ts:
            forced_close_time = 1
            exit_reason = "FORCED_TIME"
            exit_price_exec = float(bid_last[i]) if side == 1 else float(ask_last[i])

        else:
            if side == 1:
                hit_sl = float(l[i]) <= sl_mid
                hit_tp = float(h[i]) >= tp_mid

                if hit_sl and hit_tp:
                    exit_reason = "SL"   # SL-first
                    exit_price_mid = float(sl_mid)
                elif hit_sl:
                    exit_reason = "SL"
                    exit_price_mid = float(sl_mid)
                elif hit_tp:
                    exit_reason = "TP"
                    exit_price_mid = float(tp_mid)
            else:
                hit_sl = float(h[i]) >= sl_mid
                hit_tp = float(l[i]) <= tp_mid

                if hit_sl and hit_tp:
                    exit_reason = "SL"
                    exit_price_mid = float(sl_mid)
                elif hit_sl:
                    exit_reason = "SL"
                    exit_price_mid = float(sl_mid)
                elif hit_tp:
                    exit_reason = "TP"
                    exit_price_mid = float(tp_mid)

            if exit_reason in ("SL", "TP"):
                spr = float(ask_last[i] - bid_last[i])
                if side == 1:
                    exit_price_exec = exit_price_mid - spr / 2.0  # LONG sale al bid
                else:
                    exit_price_exec = exit_price_mid + spr / 2.0  # SHORT sale al ask

        if exit_reason is None:
            i += 1
            continue

        exit_ts = ts[i]
        duration_s = (exit_ts - entry_ts) / np.timedelta64(1, "s")

        if side == 1:
            pnl_pips_gross = (exit_price_exec - entry_price_exec) / pip_value
        else:
            pnl_pips_gross = (entry_price_exec - exit_price_exec) / pip_value

        pnl_pips_net = pnl_pips_gross - commission_roundturn_pips
        R = pnl_pips_net / sl_pips

        trades.append({
            "entry_ts": entry_ts,
            "exit_ts": exit_ts,
            "side": side,
            "sl_pips": float(sl_pips),
            "rr": float(rr),
            "exit_reason": exit_reason,
            "entry_price_exec": float(entry_price_exec),
            "exit_price_exec": float(exit_price_exec),
            "pnl_pips_gross": float(pnl_pips_gross),
            "pnl_pips_net": float(pnl_pips_net),
            "R": float(R),
            "duration_s": float(duration_s),
            "forced_close_news": int(forced_close_news),
            "forced_close_time": int(forced_close_time),
        })

        in_pos = False
        side = 0
        entry_i = None
        entry_ts = None
        entry_price_exec = None
        entry_price_mid = None
        sl_mid = None
        tp_mid = None
        hard_exit_ts = None

        i += 1

    trades_df = pl.DataFrame(trades) if trades else pl.DataFrame(
        schema={
            "entry_ts": pl.Datetime,
            "exit_ts": pl.Datetime,
            "side": pl.Int32,
            "sl_pips": pl.Float64,
            "rr": pl.Float64,
            "exit_reason": pl.Utf8,
            "entry_price_exec": pl.Float64,
            "exit_price_exec": pl.Float64,
            "pnl_pips_gross": pl.Float64,
            "pnl_pips_net": pl.Float64,
            "R": pl.Float64,
            "duration_s": pl.Float64,
            "forced_close_news": pl.Int8,
            "forced_close_time": pl.Int8,
        }
    )

    if trades_df.height == 0:
        summary = {
            "n_trades": 0,
            "win_rate": 0.0,
            "expectancy_R": 0.0,
            "total_R": 0.0,
            "max_drawdown_R": 0.0,
            "forced_close_news_count": 0,
            "forced_close_time_count": 0,
        }
        return trades_df, summary

    R_arr = trades_df["R"].to_numpy()
    equity = np.cumsum(R_arr)
    running_max = np.maximum.accumulate(equity)
    dd = running_max - equity
    max_dd = float(np.max(dd)) if len(dd) else 0.0

    summary = {
        "n_trades": int(trades_df.height),
        "win_rate": float(np.mean(R_arr > 0)),
        "expectancy_R": float(np.mean(R_arr)),
        "total_R": float(np.sum(R_arr)),
        "max_drawdown_R": max_dd,
        "forced_close_news_count": int(trades_df["forced_close_news"].sum()),
        "forced_close_time_count": int(trades_df["forced_close_time"].sum()),
    }
    return trades_df, summary

# 7. Optimización de Parámetros (Grid Search)

Ejecución de miles de simulaciones paralelas variando sistemáticamente los parámetros de riesgo (Stop Loss, Ratio Riesgo/Beneficio) y temporalidades. El objetivo es encontrar la configuración que maximiza el retorno ajustado al riesgo, evitando el sobreajuste (overfitting).

In [20]:
import polars as pl

TF_CONFIG = {"1m": 1, "3m": 3, "5m": 5, "15m": 15}
SL_GRID = [4, 5, 6, 7, 8, 10]
RR_GRID = [1.0, 1.2, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 5.0]


def ensure_in_news_window(df_5s: pl.DataFrame) -> pl.DataFrame:
    if "in_news_window" not in df_5s.columns:
        df_5s = df_5s.with_columns(pl.lit(False).alias("in_news_window"))
    return df_5s

def add_time_filters(df_5s: pl.DataFrame, last_entry_hour_utc: int = 16) -> pl.DataFrame:
    return df_5s.with_columns([
        (pl.col("ts").dt.hour() >= last_entry_hour_utc).alias("after_last_entry_hour")
    ])

def add_entry_allowed(df_5s: pl.DataFrame) -> pl.DataFrame:
    return df_5s.with_columns([
        (
            (pl.col("signal_tf") != 0) &
            (pl.col("in_news_window") == False) &
            (pl.col("after_last_entry_hour") == False)
        ).alias("entry_allowed")
    ])

def compute_signals_tf(df_tf: pl.DataFrame) -> pl.DataFrame:
    df = df_tf.sort("ts")

    df = df.with_columns([
        pl.when(pl.col("close") > pl.col("open")).then(1)
          .when(pl.col("close") < pl.col("open")).then(-1)
          .otherwise(None)  # doji ignorado
          .alias("candle_dir")
    ]).with_columns([
        pl.col("high").shift(1).alias("high_1"),
        pl.col("low").shift(1).alias("low_1"),
        pl.col("candle_dir").shift(1).alias("dir_1"),
    ])

    outside = (pl.col("high") > pl.col("high_1")) & (pl.col("low") < pl.col("low_1"))

    long_sig = (
        (pl.col("dir_1") == -1) &
        (pl.col("candle_dir") == -1) &
        outside &
        (pl.col("close") < pl.col("low_1"))
    )

    short_sig = (
        (pl.col("dir_1") == 1) &
        (pl.col("candle_dir") == 1) &
        outside &
        (pl.col("close") > pl.col("high_1"))
    )

    return df.with_columns([
        pl.when(long_sig).then(1)
          .when(short_sig).then(-1)
          .otherwise(0)
          .alias("signal")
    ])

import polars as pl

def map_tf_signals_to_5s(df_5s: pl.DataFrame, df_tf_signals: pl.DataFrame, tf_minutes: int) -> pl.DataFrame:
    """
    Mapeo robusto (gaps OK):
    - signal_ts: cierre TF
    - entry_ts_ideal = signal_ts + tf_minutes
    - entry_ts_real  = primer 5s existente >= entry_ts_ideal (asof forward)
    - Se pega signal_tf SOLO en entry_ts_real, usando join(on="ts") (sin keys raras)
    """
    df5 = df_5s.sort("ts")

    sig = (
        df_tf_signals
        .select([pl.col("ts").alias("signal_ts"), pl.col("signal")])
        .filter(pl.col("signal") != 0)
        .with_columns([
            (pl.col("signal_ts") + pl.duration(minutes=tf_minutes)).alias("entry_ts_ideal")
        ])
        .sort("entry_ts_ideal")
    )

    mapped = (
        sig.join_asof(
            df5.select(["ts"]).sort("ts"),
            left_on="entry_ts_ideal",
            right_on="ts",
            strategy="forward"
        )
        .drop_nulls(["ts"])                
        .select(["ts", "signal"])          
    )

    out = (
        df5
        .join(mapped, on="ts", how="left") 
        .with_columns(
            pl.col("signal").fill_null(0).cast(pl.Int8).alias("signal_tf")
        )
        .drop("signal")
    )

    return out

def add_trade_constraints(df_5s: pl.DataFrame, session_end_hour_utc: int = 18, max_hold_minutes: int = 120) -> pl.DataFrame:
    out = df_5s.with_columns([
        (pl.col("ts").dt.date().cast(pl.Datetime("ms")) + pl.duration(hours=session_end_hour_utc)).alias("session_close_ts"),
        (pl.col("ts") + pl.duration(minutes=max_hold_minutes)).alias("max_exit_ts_by_duration"),
    ])
    out = out.with_columns([
        pl.min_horizontal([pl.col("session_close_ts"), pl.col("max_exit_ts_by_duration")]).alias("hard_exit_ts")
    ])
    return out

def run_grid_search(
    df_5s_base: pl.DataFrame,
    pip_value: float = 0.0001,
    commission_roundturn_pips: float = 0.8,
    n_min_trades: int = 300,
    last_entry_hour_utc: int = 16,
    session_end_hour_utc: int = 18,
    max_hold_minutes: int = 120,
):
    df_5s_base = ensure_in_news_window(df_5s_base)

    df_5s_base = add_time_filters(df_5s_base, last_entry_hour_utc=last_entry_hour_utc)

    results = []

    for tf_str, tf_min in TF_CONFIG.items():
        df_tf = resample_ohlcv_microstructure(df_5s_base, every=tf_str)

        df_tf_sig = compute_signals_tf(df_tf)

        df_5s_exec = map_tf_signals_to_5s(df_5s_base, df_tf_sig, tf_minutes=tf_min)

        df_5s_exec = add_entry_allowed(df_5s_exec)

        df_5s_exec = add_trade_constraints(
            df_5s_exec,
            session_end_hour_utc=session_end_hour_utc,
            max_hold_minutes=max_hold_minutes
        )

        for sl in SL_GRID:
            for rr in RR_GRID:
                trades_df, summary = backtest_5s_engine_mid_triggers(
                    df_5s_exec,
                    sl_pips=sl,
                    rr=rr,
                    pip_value=pip_value,
                    commission_roundturn_pips=commission_roundturn_pips,
                )

                results.append({
                    "tf": tf_str,
                    "sl_pips": int(sl),
                    "rr": float(rr),
                    **summary
                })

    results_df = pl.DataFrame(results)

    feasible_df = results_df.filter(
        (pl.col("n_trades") >= n_min_trades) &
        (pl.col("expectancy_R") > 0)
    )

    ranked_df = feasible_df.sort(
        by=["max_drawdown_R", "expectancy_R", "total_R"],
        descending=[False, True, True]
    )

    return results_df, feasible_df, ranked_df


results_df, feasible_df, ranked_df = run_grid_search(df_5s_news)

ranked_df.head(20)

tf,sl_pips,rr,n_trades,win_rate,expectancy_R,total_R,max_drawdown_R,forced_close_news_count,forced_close_time_count
str,i64,f64,i64,f64,f64,f64,f64,i64,i64


# 8. Análisis de Resultados e Informe Final

Generación de métricas de rendimiento y rankings. Se seleccionan las mejores configuraciones basándose en la Esperanza Matemática (Expectancy) y la estabilidad de la curva de capital (Drawdown), presentando los resultados finales en tablas clasificadas para la toma de decisiones.

In [21]:
top5_por_tf = (
    results_df
    .with_columns([
        pl.col("max_drawdown_R").rank("dense").over("tf").alias("rk_dd"),
        pl.col("expectancy_R").rank("dense", descending=True).over("tf").alias("rk_exp"),
        pl.col("total_R").rank("dense", descending=True).over("tf").alias("rk_total"),
    ])
    .with_columns([
        (pl.col("rk_dd") * 1_000_000 + pl.col("rk_exp") * 1_000 + pl.col("rk_total")).alias("rk_combo")
    ])
    .sort(by=["tf", "rk_combo"], descending=[False, False])
    .group_by("tf", maintain_order=True)
    .head(10)
    .drop(["rk_dd", "rk_exp", "rk_total", "rk_combo"])
)

top5_por_tf

tf,sl_pips,rr,n_trades,win_rate,expectancy_R,total_R,max_drawdown_R,forced_close_news_count,forced_close_time_count
str,i64,f64,i64,f64,f64,f64,f64,i64,i64
"""15m""",10,1.0,688,0.530523,-0.044375,-30.53,46.85,12,98
"""15m""",10,1.2,685,0.494891,-0.041226,-28.24,46.865,17,124
"""15m""",10,3.0,671,0.400894,-0.040961,-27.485,55.38,24,261
"""15m""",8,4.0,679,0.359352,-0.047137,-32.00625,58.76875,19,223
"""15m""",10,5.0,668,0.398204,-0.051826,-34.62,59.505,25,308
…,…,…,…,…,…,…,…,…,…
"""5m""",10,4.0,1735,0.382133,-0.070193,-121.785,157.68,59,764
"""5m""",10,5.0,1733,0.380842,-0.070992,-123.03,158.48,60,794
"""5m""",10,2.0,1785,0.405602,-0.071549,-127.715,160.205,49,548
"""5m""",10,1.2,1856,0.470366,-0.073526,-136.465,161.965,35,335


In [22]:
top10_m3 = (
    results_df
    .filter(pl.col("tf") == "3m")
    .sort(
        by=["max_drawdown_R", "expectancy_R", "total_R"],
        descending=[False, True, True]
    )
    .head(40)
)

top10_m3

tf,sl_pips,rr,n_trades,win_rate,expectancy_R,total_R,max_drawdown_R,forced_close_news_count,forced_close_time_count
str,i64,f64,i64,f64,f64,f64,f64,i64,i64
"""3m""",10,5.0,2584,0.391641,-0.036505,-94.33,147.685,64,1191
"""3m""",10,4.0,2597,0.391606,-0.044935,-116.695,167.92,63,1137
"""3m""",10,3.5,2607,0.392789,-0.049536,-129.14,178.68,58,1099
"""3m""",10,3.0,2618,0.394958,-0.053113,-139.05,183.655,57,1033
"""3m""",10,2.5,2651,0.400981,-0.06281,-166.51,198.845,56,958
…,…,…,…,…,…,…,…,…,…
"""3m""",5,4.0,2975,0.278655,-0.157782,-469.4,489.3,31,457
"""3m""",5,3.5,2997,0.286954,-0.167554,-502.16,524.17,30,395
"""3m""",6,1.2,3186,0.472065,-0.162576,-517.966667,527.091667,20,83
"""3m""",5,3.0,3031,0.30287,-0.16839,-510.39,534.65,29,314
