# Final Backtesting Engine (EURUSD)
Este notebook implementa el motor de backtesting paso a paso (Bloques 1 a 10).

In [None]:
import polars as pl
from pathlib import Path
import sys
import datetime
import numpy as np

# ==========================================
# CONFIG & PATHS
# ==========================================
BASE_PATH = Path("ml_master_project/src")
DATA_FILE = BASE_PATH / "eurusd_5s_2021_2025_07_18utc.parquet"
NEWS_FILE = BASE_PATH / "news_events_high_EURUSD.parquet"
OUT_TRAIN = BASE_PATH / "train_results_M3_final.parquet"
OUT_TEST = BASE_PATH / "test_results_M3_final.parquet"

## Bloque 1: Carga de Datos 5s

In [2]:
def block_1_load_data():
    print(">>> [BLOQUE 1] Carga de Datos 5s...")
    if not DATA_FILE.exists(): raise FileNotFoundError(f"No se encuentra {DATA_FILE}")
    df = pl.scan_parquet(str(DATA_FILE)).filter((pl.col("ts").dt.time() >= pl.time(7,0,0)) & (pl.col("ts").dt.time() <= pl.time(18,0,0))).sort("ts").collect()
    print(f"Dataset shape: {df.shape}")
    print("✅ [BLOQUE 1] PASS")
    return df

df_5s = block_1_load_data()

>>> [BLOQUE 1] Carga de Datos 5s...
Dataset shape: (8449570, 10)
✅ [BLOQUE 1] PASS


## Bloque 2: Split Train/Test

In [3]:
def block_2_split_train_test(df_5s: pl.DataFrame):
    print("\n>>> [BLOQUE 2] Split Train/Test...")
    split_date = datetime.date(2025, 7, 1)
    df_train = df_5s.filter(pl.col("ts").dt.date() < split_date)
    df_test = df_5s.filter(pl.col("ts").dt.date() >= split_date)
    print(f"Train Shape: {df_train.shape}, Test Shape: {df_test.shape}")
    if df_train.height == 0 or df_test.height == 0: raise ValueError("Empty split")
    print("✅ [BLOQUE 2] PASS")
    return df_train, df_test

df_train, df_test = block_2_split_train_test(df_5s)


>>> [BLOQUE 2] Split Train/Test...
Train Shape: (7718101, 10), Test Shape: (731469, 10)
✅ [BLOQUE 2] PASS


## Bloque 3: Resample M3

In [4]:
def resample_ohlcv_microstructure(df_5s, every, ts_col="ts"):
    df_5s = df_5s.sort(ts_col)
    return df_5s.group_by_dynamic(ts_col, every=every, closed="left", label="left", start_by="window").agg([
        pl.col("open").first().alias("open"), pl.col("high").max().alias("high"), pl.col("low").min().alias("low"), pl.col("close").last().alias("close"),
        pl.col("ticks").sum().alias("ticks"), pl.col("bid_vol_sum").sum().alias("bid_vol_sum"), pl.col("ask_vol_sum").sum().alias("ask_vol_sum"),
        pl.col("bid_last").last().alias("bid_last"), pl.col("ask_last").last().alias("ask_last"),
        pl.len().alias("n_bars_5s")
    ]).sort(ts_col)

def block_3_resample(df_train, df_test):
    print("\n>>> [BLOQUE 3] Resample M3...")
    df_train_m3 = resample_ohlcv_microstructure(df_train, "3m")
    df_test_m3 = resample_ohlcv_microstructure(df_test, "3m")
    print(f"Train M3: {df_train_m3.shape}, Test M3: {df_test_m3.shape}")
    print("✅ [BLOQUE 3] PASS")
    return df_train_m3, df_test_m3

df_train_m3, df_test_m3 = block_3_resample(df_train, df_test)


>>> [BLOQUE 3] Resample M3...
Train M3: (233418, 11), Test M3: (23339, 11)
✅ [BLOQUE 3] PASS


## Bloque 4: Señales M3

In [5]:
def compute_signals_tf(df_tf):
    df = df_tf.sort("ts")
    df = df.with_columns([
        pl.when(pl.col("close") > pl.col("open")).then(1).when(pl.col("close") < pl.col("open")).then(-1).otherwise(None).alias("candle_dir")
    ])
    df = df.with_columns([
        pl.col("open").shift(1).alias("open_1"), pl.col("high").shift(1).alias("high_1"),
        pl.col("low").shift(1).alias("low_1"), pl.col("close").shift(1).alias("close_1"),
        pl.col("candle_dir").shift(1).alias("dir_1"),
    ])
    outside = (pl.col("high") > pl.col("high_1")) & (pl.col("low") < pl.col("low_1"))
    long_sig = (pl.col("dir_1") == -1) & (pl.col("candle_dir") == -1) & outside & (pl.col("close") < pl.col("low_1"))
    short_sig = (pl.col("dir_1") == 1) & (pl.col("candle_dir") == 1) & outside & (pl.col("close") > pl.col("high_1"))
    return df.with_columns([
        pl.when(long_sig).then(1).when(short_sig).then(-1).otherwise(0).alias("signal")
    ])

def block_4_signals(df_train_m3, df_test_m3):
    print("\n>>> [BLOQUE 4] Signals...")
    train_sig = compute_signals_tf(df_train_m3)
    test_sig = compute_signals_tf(df_test_m3)
    print(f"Signals Train: {train_sig.filter(pl.col('signal')!=0).height}")
    print("✅ [BLOQUE 4] PASS")
    return train_sig, test_sig

train_sig, test_sig = block_4_signals(df_train_m3, df_test_m3)


>>> [BLOQUE 4] Signals...
Signals Train: 3961
✅ [BLOQUE 4] PASS


## Bloque 5: Mapping Entry (Asof Forward)

In [6]:
def block_5_map_entry(df_5s, df_signals_m3):
    print("\n>>> [BLOQUE 5] Mapping...")
    signals_active = df_signals_m3.filter(pl.col("signal") != 0)
    signals_active = signals_active.with_columns((pl.col("ts") + pl.duration(minutes=3)).alias("entry_ts_ideal"))
    
    mapped = signals_active.sort("entry_ts_ideal").join_asof(
        df_5s.sort("ts"), left_on="entry_ts_ideal", right_on="ts", strategy="forward"
    )
    if "ts_right" in mapped.columns: mapped = mapped.rename({"ts_right": "entry_ts_real"})
    
    mapped = mapped.with_columns((pl.col("entry_ts_real") - pl.col("entry_ts_ideal")).dt.total_seconds().alias("entry_delay_s"))
    mapped = mapped.filter(pl.col("entry_ts_real").is_not_null())
    
    print(f"Mapped entries: {mapped.height}")
    print("✅ [BLOQUE 5] PASS")
    return mapped

train_map = block_5_map_entry(df_train, train_sig)
test_map = block_5_map_entry(df_test, test_sig)


>>> [BLOQUE 5] Mapping...
Mapped entries: 3961
✅ [BLOQUE 5] PASS

>>> [BLOQUE 5] Mapping...
Mapped entries: 389
✅ [BLOQUE 5] PASS


## Bloque 6: News Flag

In [7]:
def add_news_flag(df_5s, events, pre_min=15, post_min=15):
    df = df_5s.sort("ts")
    ev = events.select(pl.col("event_ts").cast(pl.Datetime("ms"), strict=False)).sort("event_ts").unique(subset=["event_ts"], keep="last")
    
    prev = df.join_asof(ev, left_on="ts", right_on="event_ts", strategy="backward").rename({"event_ts": "prev_event_ts"})
    nxt = df.join_asof(ev, left_on="ts", right_on="event_ts", strategy="forward").rename({"event_ts": "next_event_ts"})
    
    out = prev.with_columns(nxt.get_column("next_event_ts"))
    
    pre_s = pre_min * 60; post_s = post_min * 60; BIG = 10**12
    out = out.with_columns([
        pl.when(pl.col("prev_event_ts").is_null()).then(BIG).otherwise((pl.col("ts") - pl.col("prev_event_ts")).dt.total_seconds()).alias("secs_since_prev_event"),
        pl.when(pl.col("next_event_ts").is_null()).then(BIG).otherwise((pl.col("next_event_ts") - pl.col("ts")).dt.total_seconds()).alias("secs_until_next_event"),
    ]).with_columns([
        ((pl.col("secs_since_prev_event") <= post_s) | (pl.col("secs_until_next_event") <= pre_s)).alias("in_news_window")
    ])
    return out

def block_6_news(df_train_5s, df_test_5s):
    print("\n>>> [BLOQUE 6] News Flag...")
    if not NEWS_FILE.exists(): raise FileNotFoundError("News file missing")
    events = pl.read_parquet(str(NEWS_FILE))
    
    print("Flagging Train 5s...")
    df_train_news = add_news_flag(df_train_5s, events)
    print("Flagging Test 5s...")
    df_test_news = add_news_flag(df_test_5s, events)
    
    print(f"Train in_news: {df_train_news['in_news_window'].sum()}")
    print("✅ [BLOQUE 6] PASS")
    return df_train_news, df_test_news

train_news, test_news = block_6_news(df_train, df_test)


>>> [BLOQUE 6] News Flag...
Flagging Train 5s...
Flagging Test 5s...
Train in_news: 100560
✅ [BLOQUE 6] PASS


## Bloque 7: Filtros (Time & News)

In [8]:
def block_7_filters(df_mapped, df_5s_news):
    print("\n>>> [BLOQUE 7] Filters...")
    joined = df_mapped.join(df_5s_news.select(["ts", "in_news_window"]), left_on="entry_ts_real", right_on="ts", how="left")
    
    joined = joined.with_columns([
        (pl.col("entry_ts_real").dt.hour() < 16).alias("good_time"),
        (~pl.col("in_news_window")).alias("good_news"),
    ])
    joined = joined.with_columns(
        (pl.col("good_time") & pl.col("good_news")).alias("entry_allowed")
    )
    joined = joined.with_columns([
        (~pl.col("good_time")).alias("blocked_by_time"),
        (pl.col("in_news_window")).alias("blocked_by_news")
    ])
    
    session_close = pl.col("entry_ts_real").dt.truncate("1d") + pl.duration(hours=18)
    max_dur = pl.col("entry_ts_real") + pl.duration(hours=2)
    joined = joined.with_columns([
        pl.min_horizontal([session_close, max_dur]).alias("hard_exit_ts")
    ])
    
    print(f"Total: {joined.height}, Allowed: {joined.filter(pl.col('entry_allowed')).height}")
    print("✅ [BLOQUE 7] PASS")
    return joined

train_cands = block_7_filters(train_map, train_news)
test_cands = block_7_filters(test_map, test_news)


>>> [BLOQUE 7] Filters...
Total: 3961, Allowed: 3242
✅ [BLOQUE 7] PASS

>>> [BLOQUE 7] Filters...
Total: 389, Allowed: 324
✅ [BLOQUE 7] PASS


## Bloque 8: Motor 5s (Core)

In [9]:
def block_8_engine(df_5s: pl.DataFrame, df_trades: pl.DataFrame):
    print("\n>>> [BLOQUE 8] Engine 5s running...")
    
    df_5s = df_5s.sort("ts")
    
    SL_PIPS = 10 * 0.0001; TP_PIPS = 10 * 0.0001; TOUCH_15 = 15 * 0.0001; COMM_PIPS = 0.5 * 0.0001
    
    trades = df_trades.filter(pl.col("entry_allowed")).clone()
    n_trades = trades.height
    
    times = df_5s["ts"].cast(pl.Int64).to_numpy()
    opens = df_5s["open"].to_numpy()
    highs = df_5s["high"].to_numpy()
    lows = df_5s["low"].to_numpy()
    bids = df_5s["bid_last"].to_numpy()
    asks = df_5s["ask_last"].to_numpy()
    
    exit_ts_int = np.zeros(n_trades, dtype=np.int64)
    exit_reason = np.array(["" for _ in range(n_trades)], dtype=object)
    exit_price = np.zeros(n_trades, dtype=np.float64)
    pnl_gross = np.zeros(n_trades, dtype=np.float64)
    mae_pips = np.full(n_trades, np.nan)
    mfe_pips = np.full(n_trades, np.nan)
    first_touch = np.array(["NONE" for _ in range(n_trades)], dtype=object)
    spread_entry = np.full(n_trades, np.nan)
    spread_exit = np.full(n_trades, np.nan)
    
    entry_ts_arr = trades["entry_ts_real"].cast(pl.Int64).to_numpy()
    hard_exit_arr = trades["hard_exit_ts"].cast(pl.Int64).to_numpy()
    side_arr = trades["signal"].to_numpy()
    
    count = 0
    for i in range(n_trades):
        t_entry = entry_ts_arr[i]
        t_hard = hard_exit_arr[i]
        side = side_arr[i]
        
        idx_start = np.searchsorted(times, t_entry)
        idx_end = np.searchsorted(times, t_hard, side='right')
        
        if idx_start >= len(times): continue
            
        _high = highs[idx_start:idx_end]
        _low = lows[idx_start:idx_end]
        _bid = bids[idx_start:idx_end]
        _ask = asks[idx_start:idx_end]
        _open = opens[idx_start:idx_end]
        
        if len(_high) == 0: continue
            
        # Entry
        sp_entry_val = (_ask[0] - _bid[0]) / 0.0001
        spread_entry[i] = sp_entry_val
        
        px_entry_exec = _ask[0] if side == 1 else _bid[0]
        px_entry_mid = _open[0]
        
        # Target
        sl_price = px_entry_mid - SL_PIPS if side == 1 else px_entry_mid + SL_PIPS
        tp_price = px_entry_mid + TP_PIPS if side == 1 else px_entry_mid - TP_PIPS
        touch_mae = px_entry_mid - TOUCH_15 if side == 1 else px_entry_mid + TOUCH_15
        touch_mfe = px_entry_mid + TOUCH_15 if side == 1 else px_entry_mid - TOUCH_15
        
        # Sim
        hit_sl = (_low <= sl_price) if side == 1 else (_high >= sl_price)
        hit_tp = (_high >= tp_price) if side == 1 else (_low <= tp_price)
        
        idx_sl = np.argmax(hit_sl) if np.any(hit_sl) else 999999
        idx_tp = np.argmax(hit_tp) if np.any(hit_tp) else 999999
        
        final_idx = -1
        reason = "TIME"
        
        if idx_sl < idx_tp:
            final_idx = idx_sl; reason = "SL"
        elif idx_tp < idx_sl:
            final_idx = idx_tp; reason = "TP"
        elif idx_sl == idx_tp and idx_sl != 999999:
            reason = "SL"; final_idx = idx_sl
        else:
            final_idx = len(_high) - 1; reason = "TIME"
            
        px_exit_exec = 0.0
        if reason == "SL": px_exit_exec = sl_price
        elif reason == "TP": px_exit_exec = tp_price
        else: px_exit_exec = _bid[final_idx] if side == 1 else _ask[final_idx]

        sp_exit_val = (_ask[final_idx] - _bid[final_idx]) / 0.0001
        spread_exit[i] = sp_exit_val

        pnl = (px_exit_exec - px_entry_exec) if side == 1 else (px_entry_exec - px_exit_exec)
        pnl_val = pnl / 0.0001
        
        m_high = np.max(_high); m_low = np.min(_low)
        if side == 1:
            _mfe = (m_high - px_entry_mid) / 0.0001
            _mae = (px_entry_mid - m_low) / 0.0001
        else:
            _mfe = (px_entry_mid - m_low) / 0.0001
            _mae = (m_high - px_entry_mid) / 0.0001
        
        cross_mae = (_low <= touch_mae) if side == 1 else (_high >= touch_mae)
        cross_mfe = (_high >= touch_mfe) if side == 1 else (_low <= touch_mfe)
        i_mae = np.argmax(cross_mae) if np.any(cross_mae) else 999999
        i_mfe = np.argmax(cross_mfe) if np.any(cross_mfe) else 999999
        
        ft_label = "NONE"
        if i_mae < i_mfe: ft_label = "MAE"
        elif i_mfe < i_mae: ft_label = "MFE"
        elif i_mae == i_mfe and i_mae != 999999: ft_label = "MAE"
            
        exit_ts_int[i] = times[idx_start + final_idx]
        exit_reason[i] = reason
        exit_price[i] = px_exit_exec; pnl_gross[i] = pnl_val; mae_pips[i] = _mae; mfe_pips[i] = _mfe; first_touch[i] = ft_label
        count += 1
        
    print(f"Engine finished {count} trades.")
    
    time_unit = trades["entry_ts_real"].dtype.time_unit
    if time_unit is None: time_unit = 'us'
    
    out_trades = trades.with_columns([
        pl.Series(exit_ts_int).cast(pl.Datetime(time_unit)).alias("exit_price_ts"),
        pl.Series(exit_reason).alias("exit_reason"),
        pl.Series(exit_price).alias("exit_price_exec"),
        pl.Series(pnl_gross).alias("pnl_pips_gross"),
        pl.Series(mae_pips).alias("mae_pips"),
        pl.Series(mfe_pips).alias("mfe_pips"),
        pl.Series(first_touch).alias("first_touch_15p"),
        pl.Series(spread_entry).alias("spread_entry"),
        pl.Series(spread_exit).alias("spread_exit"),
    ])
    return out_trades.with_columns((pl.col("pnl_pips_gross") - COMM_PIPS/0.0001).alias("pnl_pips_net"))

train_res = block_8_engine(df_train, train_cands)
test_res = block_8_engine(df_test, test_cands)



>>> [BLOQUE 8] Engine 5s running...
Engine finished 3242 trades.

>>> [BLOQUE 8] Engine 5s running...
Engine finished 324 trades.


## Bloque 9: Dataset Final

In [10]:
def block_9_construct_dataset(df_candidates, df_results):
    print("\n>>> [BLOQUE 9] Final Dataset Construction & ML Targets...")
    
    if "ts" in df_candidates.columns:
        df_candidates = df_candidates.rename({"ts": "signal_ts"})
    
    result_cols = ["entry_ts_real", "exit_reason", "exit_price_exec", "pnl_pips_gross", "pnl_pips_net", 
                   "mae_pips", "mfe_pips", "first_touch_15p", "spread_entry", "spread_exit", "exit_price_ts"]
    
    final_df = df_candidates.join(
        df_results.select(result_cols), on="entry_ts_real", how="left"
    )
    
    # --- ML TARGETS---
    # MFE/MAE hasta 2 horas
    # target_X:  MFE alcanzo X pips antes que MAE?
    
    final_df = final_df.with_columns(
        (pl.col("exit_price_ts") - pl.col("entry_ts_real")).dt.total_seconds().alias("duration_s")
    ).with_columns([

        (pl.col("mfe_pips") / (pl.col("mae_pips") + 0.1)).alias("ratio_mfe_mae"),
        
        # MFE >= 15
        (pl.col("mfe_pips") >= 15).cast(pl.Int8).alias("target_15p"),
        # MFE >= 30
        (pl.col("mfe_pips") >= 30).cast(pl.Int8).alias("target_30p"),
        # MFE >= 50
        (pl.col("mfe_pips") >= 50).cast(pl.Int8).alias("target_50p")
    ])
    
    print(f"Final with ML Targets: {final_df.height}")
    assert final_df.height == df_candidates.height
    print("\u2705 [BLOQUE 9] PASS")
    return final_df

train_final = block_9_construct_dataset(train_cands, train_res)
test_final = block_9_construct_dataset(test_cands, test_res)


>>> [BLOQUE 9] Final Dataset Construction & ML Targets...
Final with ML Targets: 3961
✅ [BLOQUE 9] PASS

>>> [BLOQUE 9] Final Dataset Construction & ML Targets...
Final with ML Targets: 389
✅ [BLOQUE 9] PASS


## Bloque 10: Export & Metrics

In [11]:
def block_10_export(df_train_final, df_test_final):
    print("\n>>> [BLOQUE 10] Export & Metrics...")
    print(f"Saving Train to {OUT_TRAIN}...")
    df_train_final.write_parquet(str(OUT_TRAIN))
    print(f"Saving Test to {OUT_TEST}...")
    df_test_final.write_parquet(str(OUT_TEST))
    
    for name, df in [("TRAIN", df_train_final), ("TEST", df_test_final)]:
        executed = df.filter(pl.col("pnl_pips_net").is_not_null())
        n_exec = executed.height
        if n_exec > 0:
            win = executed.filter(pl.col("pnl_pips_net") > 0).height
            wr = (win/n_exec)*100
            avg_pnl = executed["pnl_pips_net"].mean()
            print(f"{name}: Executed={n_exec}, WR={wr:.2f}%, Avg PnL={avg_pnl:.2f} pips")
        else:
            print(f"{name}: No executed trades.")
            
    print("✅ [BLOQUE 10] PASS")

train_final = train_final.join(df_train_m3.select(["ts", "n_bars_5s"]), left_on="signal_ts", right_on="ts", how="left").rename({"n_bars_5s": "n_bars_5s_signal"})
test_final = test_final.join(df_test_m3.select(["ts", "n_bars_5s"]), left_on="signal_ts", right_on="ts", how="left").rename({"n_bars_5s": "n_bars_5s_signal"})

block_10_export(train_final, test_final)


>>> [BLOQUE 10] Export & Metrics...
Saving Train to /Users/rjsaavedra/Documents/Projects/ml_master_project/src/train_results_M3_final.parquet...
Saving Test to /Users/rjsaavedra/Documents/Projects/ml_master_project/src/test_results_M3_final.parquet...
TRAIN: Executed=3242, WR=51.33%, Avg PnL=-0.35 pips
TEST: Executed=324, WR=53.70%, Avg PnL=0.06 pips
✅ [BLOQUE 10] PASS


In [12]:
train_final

signal_ts,open,high,low,close,ticks,bid_vol_sum,ask_vol_sum,bid_last,ask_last,n_bars_5s_signal,candle_dir,open_1,high_1,low_1,close_1,dir_1,signal,entry_ts_ideal,entry_ts_real,open_right,high_right,low_right,close_right,ticks_right,bid_last_right,ask_last_right,bid_vol_sum_right,ask_vol_sum_right,entry_delay_s,in_news_window,good_time,good_news,entry_allowed,blocked_by_time,blocked_by_news,hard_exit_ts,exit_reason,exit_price_exec,pnl_pips_gross,pnl_pips_net,mae_pips,mfe_pips,first_touch_15p,spread_entry,spread_exit,exit_price_ts,duration_s,ratio_mfe_mae,target_15p,target_30p,target_50p,n_bars_5s_right
datetime[ms],f64,f64,f64,f64,u32,f64,f64,f64,f64,u32,i32,f64,f64,f64,f64,i32,i32,datetime[ms],datetime[ms],f64,f64,f64,f64,u32,f64,f64,f64,f64,i64,bool,bool,bool,bool,bool,bool,datetime[ms],str,f64,f64,f64,f64,f64,str,f64,f64,datetime[ms],i64,f64,i8,i8,i8,u32
2021-01-01 09:51:00,1.20946,1.2098,1.20931,1.2098,246,395.540001,396.480001,1.20978,1.20982,35,1,1.209455,1.209715,1.20936,1.209465,1,-1,2021-01-01 09:54:00,2021-01-01 09:54:00,1.209795,1.2098,1.209535,1.209535,20,1.20952,1.20955,93.87,18.71,0,false,true,true,true,false,false,2021-01-01 11:54:00,"""TP""",1.208795,7.25,6.75,0.05,28.5,"""MFE""",0.3,0.5,2021-01-01 10:15:00,1260,190.0,1,0,0,35
2021-01-01 10:36:00,1.20804,1.208355,1.20697,1.20734,506,756.040001,951.200003,1.20732,1.20736,36,-1,1.20818,1.20819,1.207855,1.208045,-1,1,2021-01-01 10:39:00,2021-01-01 10:39:00,1.20731,1.20737,1.20731,1.20737,13,1.20736,1.20738,17.25,16.51,0,false,true,true,true,false,false,2021-01-01 12:39:00,"""TP""",1.20831,9.3,8.8,3.65,15.6,"""MFE""",0.2,0.3,2021-01-01 11:52:05,4385,4.16,1,0,0,36
2021-01-01 13:57:00,1.208735,1.208975,1.20835,1.208485,371,774.230006,364.77,1.20848,1.20849,36,-1,1.20891,1.208965,1.208705,1.20876,-1,1,2021-01-01 14:00:00,2021-01-01 14:00:00,1.20849,1.2086,1.208475,1.20848,25,1.20847,1.20849,68.389999,16.42,0,false,true,true,true,false,false,2021-01-01 16:00:00,"""TP""",1.20949,10.0,9.5,5.0,14.65,"""NONE""",0.2,0.4,2021-01-01 14:19:35,1175,2.872549,0,0,0,36
2021-01-02 09:21:00,1.207835,1.20796,1.207455,1.207605,238,403.030002,400.520005,1.20759,1.20762,35,-1,1.207855,1.20792,1.207685,1.20783,-1,1,2021-01-02 09:24:00,2021-01-02 09:24:00,1.20761,1.207645,1.20761,1.207645,4,1.20763,1.20766,3.75,22.26,0,false,true,true,true,false,false,2021-01-02 11:24:00,"""SL""",1.20661,-10.5,-11.0,44.0,3.05,"""MAE""",0.3,0.4,2021-01-02 09:39:25,925,0.069161,0,0,0,35
2021-01-02 12:45:00,1.203255,1.20334,1.203,1.203095,169,304.870003,278.789999,1.20308,1.20311,33,-1,1.20328,1.203325,1.20311,1.203265,-1,1,2021-01-02 12:48:00,2021-01-02 12:48:05,1.203105,1.203115,1.203105,1.203115,3,1.2031,1.20313,1.74,4.39,5,false,true,true,true,false,false,2021-01-02 14:48:05,"""TP""",1.204105,9.75,9.25,7.1,18.75,"""MFE""",0.3,0.3,2021-01-02 13:00:35,750,2.604167,1,0,0,33
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2025-06-29 13:30:00,1.152815,1.15299,1.152495,1.152535,197,264.429997,238.019996,1.15252,1.15255,31,-1,1.15296,1.15297,1.15264,1.152835,-1,1,2025-06-29 13:33:00,2025-06-29 13:33:00,1.152525,1.152535,1.152525,1.152535,4,1.15252,1.15255,7.2,4.5,0,false,true,true,true,false,false,2025-06-29 15:33:00,"""TP""",1.153525,9.75,9.25,5.8,26.35,"""MFE""",0.3,0.4,2025-06-29 13:47:40,880,4.466102,1,0,0,31
2025-06-30 08:00:00,1.155345,1.155595,1.155135,1.15528,261,311.539997,380.859996,1.15525,1.15531,35,-1,1.155395,1.15555,1.155285,1.15535,-1,1,2025-06-30 08:03:00,2025-06-30 08:03:00,1.15527,1.155295,1.15527,1.155285,3,1.15526,1.15531,4.5,4.5,0,false,true,true,true,false,false,2025-06-30 10:03:00,"""TIME""",1.15535,0.4,-0.1,7.7,8.0,"""NONE""",0.5,0.4,2025-06-30 10:03:00,7200,1.025641,0,0,0,35
2025-06-30 12:06:00,1.153335,1.15338,1.15297,1.15297,191,526.830001,374.399996,1.15296,1.15298,30,-1,1.15335,1.153375,1.15316,1.15334,-1,1,2025-06-30 12:09:00,2025-06-30 12:09:00,1.152975,1.152975,1.15297,1.15297,2,1.15295,1.15299,9.48,3.69,0,false,true,true,true,false,false,2025-06-30 14:09:00,"""SL""",1.151975,-10.15,-10.65,71.55,0.0,"""MAE""",0.4,0.4,2025-06-30 12:14:40,340,0.0,0,0,0,30
2025-06-30 16:18:00,1.146855,1.1474,1.14671,1.1474,151,185.069998,209.359997,1.14738,1.14742,31,1,1.14679,1.146995,1.14677,1.14686,1,-1,2025-06-30 16:21:00,2025-06-30 16:21:00,1.147395,1.1474,1.14739,1.147395,4,1.14736,1.14743,8.1,5.4,0,false,false,true,false,true,false,2025-06-30 18:00:00,,,,,,,,,,,,,,,,31


## Bloque 11: Baseline Metrics

In [None]:
def calculate_metrics(df, name=""):
    df_exec = df.filter(pl.col("pnl_pips_net").is_not_null())
    if df_exec.height == 0: return
    
    n_trades = df_exec.height
    n_win = df_exec.filter(pl.col("pnl_pips_net") > 0).height
    win_rate = (n_win / n_trades) * 100
    
    total_return_pips = df_exec["pnl_pips_net"].sum()
    avg_return_pips = df_exec["pnl_pips_net"].mean() # Expectancy
    
    equity = df_exec["pnl_pips_net"].to_numpy().cumsum()
    high_water_mark = np.maximum.accumulate(equity)
    drawdowns = high_water_mark - equity
    max_dd_pips = np.max(drawdowns) if len(drawdowns) > 0 else 0
    
    print(f"--- {name} Baseline Metrics ---")
    print(f"Trades: {n_trades}")
    print(f"Win Rate: {win_rate:.2f}%")
    print(f"Expectancy (Avg PnL): {avg_return_pips:.2f} pips")
    print(f"Total Return: {total_return_pips:.2f} pips")
    print(f"Max Drawdown: {max_dd_pips:.2f} pips")
    
    return {
        "trades": n_trades,
        "win_rate": win_rate,
        "expectancy": avg_return_pips,
        "total_return": total_return_pips,
        "max_dd": max_dd_pips
    }

print("Calculating Metrics...")
block_11_metrics = calculate_metrics(train_final, "TRAIN")
calculate_metrics(test_final, "TEST")

Calculating Metrics...
--- TRAIN Baseline Metrics ---
Trades: 3242
Win Rate: 51.33%
Expectancy (Avg PnL): -0.35 pips
Total Return: -1124.65 pips
Max Drawdown: 1411.40 pips
--- TEST Baseline Metrics ---
Trades: 324
Win Rate: 53.70%
Expectancy (Avg PnL): 0.06 pips
Total Return: 18.20 pips
Max Drawdown: 168.95 pips


{'trades': 324,
 'win_rate': 53.70370370370371,
 'expectancy': 0.05617283950595113,
 'total_return': 18.199999999928167,
 'max_dd': np.float64(168.95000000002796)}

In [14]:
train_final.columns

['signal_ts',
 'open',
 'high',
 'low',
 'close',
 'ticks',
 'bid_vol_sum',
 'ask_vol_sum',
 'bid_last',
 'ask_last',
 'n_bars_5s_signal',
 'candle_dir',
 'open_1',
 'high_1',
 'low_1',
 'close_1',
 'dir_1',
 'signal',
 'entry_ts_ideal',
 'entry_ts_real',
 'open_right',
 'high_right',
 'low_right',
 'close_right',
 'ticks_right',
 'bid_last_right',
 'ask_last_right',
 'bid_vol_sum_right',
 'ask_vol_sum_right',
 'entry_delay_s',
 'in_news_window',
 'good_time',
 'good_news',
 'entry_allowed',
 'blocked_by_time',
 'blocked_by_news',
 'hard_exit_ts',
 'exit_reason',
 'exit_price_exec',
 'pnl_pips_gross',
 'pnl_pips_net',
 'mae_pips',
 'mfe_pips',
 'first_touch_15p',
 'spread_entry',
 'spread_exit',
 'exit_price_ts',
 'duration_s',
 'ratio_mfe_mae',
 'target_15p',
 'target_30p',
 'target_50p',
 'n_bars_5s_right']