### # 03_purge_and_label.ipynb

In [None]:
# 03_purge_and_label.ipynb

import pandas as pd
import numpy as np
import os

# --- CONFIG ---
DATA_DIR = "../data"
SYMBOL = "EURUSD"
INPUT_FILE = os.path.join(DATA_DIR, f"{SYMBOL}_H1_Engineered.parquet")
OUTPUT_TRAIN_BUY = os.path.join(DATA_DIR, "Train_Buy_Sniper.parquet")
OUTPUT_TRAIN_SELL = os.path.join(DATA_DIR, "Train_Sell_Sniper.parquet")

# Load Data
df = pd.read_parquet(INPUT_FILE)

# --- STEP 1: THE PURGE (Session Filter) ---
# Filter for London/NY Overlap: 13, 14, 15, 16 UTC
# NOTE: Ensure your MT5 data is UTC. If it's Broker Time (UTC+2/3), adjust these numbers!
# Assuming UTC for this example:
df['hour'] = df.index.hour
overlap_mask = df['hour'].isin([13, 14, 15, 16])
df_sniper = df[overlap_mask].copy()

print(f"ðŸ“‰ Purged Data: Reduced from {len(df)} to {len(df_sniper)} institutional candles.")

# --- STEP 2: TRIPLE BARRIER LABELING ---
# We use the next 4 hours as the horizon (since we trade H1)

def get_labels(row, horizon=4, profit_factor=2.0, loss_factor=1.0):
    # This is a simplified vectorizable version logic
    # In production, we'd use the Future M1 data for perfect accuracy.
    # For this notebook, we look ahead 4 rows in the H1 dataframe.
    pass 

# Vectorized Lookahead (Faster than looping)
# Look 1 to 4 steps ahead
indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=4)
df_sniper['fwd_high'] = df_sniper['high'].rolling(window=indexer).max()
df_sniper['fwd_low'] = df_sniper['low'].rolling(window=indexer).min()

# Dynamic Targets based on ATR (Volatility Adjusted)
# If ATR is 10 pips, Target is 20 pips (2.0), Stop is 10 pips (1.0)
df_sniper['target_up'] = df_sniper['close'] + (df_sniper['ATR'] * 1.5)
df_sniper['stop_down'] = df_sniper['close'] - (df_sniper['ATR'] * 1.0)
df_sniper['target_down'] = df_sniper['close'] - (df_sniper['ATR'] * 1.5)
df_sniper['stop_up'] = df_sniper['close'] + (df_sniper['ATR'] * 1.0)

# --- STEP 3: CLASS ASSIGNMENT ---

# Buy Logic: Did it hit Target Up before Stop Down?
# (Simplified for H1 check: High > Target AND Low > Stop)
# Note: This is imperfect without M1, but good for V1.
df_sniper['LABEL_BUY'] = np.where(
    (df_sniper['fwd_high'] >= df_sniper['target_up']) & 
    (df_sniper['low'] > df_sniper['stop_down']), 
    1, 0
)

# Sell Logic: Did it hit Target Down before Stop Up?
df_sniper['LABEL_SELL'] = np.where(
    (df_sniper['fwd_low'] <= df_sniper['target_down']) & 
    (df_sniper['high'] < df_sniper['stop_up']), 
    1, 0
)

# --- STEP 4: REGRESSION TARGETS ---
# Log return to the max excursion
df_sniper['REG_TARGET_HIGH'] = np.log(df_sniper['fwd_high'] / df_sniper['close'])
df_sniper['REG_TARGET_LOW'] = np.log(df_sniper['fwd_low'] / df_sniper['close'])

# --- STEP 5: SPLIT BY REGIME (The "Hail Mary") ---

# Dataset A: Bull Market (Only for Buy Model)
df_bull = df_sniper[df_sniper['close'] > df_sniper['EMA_200']].copy()
# Dataset B: Bear Market (Only for Sell Model)
df_bear = df_sniper[df_sniper['close'] < df_sniper['EMA_200']].copy()

# Save
df_bull.to_parquet(OUTPUT_TRAIN_BUY)
df_bear.to_parquet(OUTPUT_TRAIN_SELL)

print(f"âœ… Saved Bull Data: {len(df_bull)} rows")
print(f"âœ… Saved Bear Data: {len(df_bear)} rows")