### Focus: The "Uncensored" Truth.

### Imports & Load Data

In [None]:
# Cell 1: Imports & Load
import pandas as pd
import numpy as np
import os

RAW_FILE = "../data/EURUSD_H1_Raw.parquet"
LABELED_FILE = "../data/EURUSD_H1_Labeled.parquet"

df = pd.read_parquet(RAW_FILE)
print(f"ðŸ”„ Loaded {len(df)} rows.")

### 2: Dynamic Barriers (The Triple Barrier)

In [None]:
# Cell 2: Dynamic Barriers (The Triple Barrier)
# 1. Volatility (Daily ATR proxy on H1)
# We use a 24-hour rolling volatility window
VOL_WINDOW = 24
df['returns'] = df['close'].pct_change()
df['volatility'] = df['returns'].rolling(window=VOL_WINDOW).std()

# 2. Barrier Parameters
# We are allowing the trade to run for up to 24 hours
T_BARRIER = 24 
PT_MULTIPLIER = 2.0  # Take Profit = 2x Volatility
SL_MULTIPLIER = 2.0  # Stop Loss = 2x Volatility

### 3: The Labeling Loop (Uncensored)

In [None]:
# Cell 3: The Labeling Loop (Uncensored)
# No "hours_to_close" filter. If it's a win, it's a win.

labels = []
# Pre-convert to numpy for speed
close_arr = df['close'].values
high_arr = df['high'].values
low_arr = df['low'].values
vol_arr = df['volatility'].values

print("ðŸ§ª labeling...")

for i in range(len(df) - T_BARRIER):
    curr_close = close_arr[i]
    curr_vol = vol_arr[i]
    
    if np.isnan(curr_vol):
        labels.append(np.nan)
        continue
        
    # Set Barriers
    upper = curr_close * (1 + (curr_vol * PT_MULTIPLIER))
    lower = curr_close * (1 - (curr_vol * SL_MULTIPLIER))
    
    # Look Ahead
    future_highs = high_arr[i+1 : i+1+T_BARRIER]
    future_lows = low_arr[i+1 : i+1+T_BARRIER]
    
    # Check Touches
    hit_tp = np.where(future_highs >= upper)[0]
    hit_sl = np.where(future_lows <= lower)[0]
    
    first_tp = hit_tp[0] if len(hit_tp) > 0 else T_BARRIER + 999
    first_sl = hit_sl[0] if len(hit_sl) > 0 else T_BARRIER + 999
    
    # Logic
    if first_tp < first_sl:
        labels.append(1)  # Buy Win
    elif first_sl < first_tp:
        labels.append(-1) # Sell Win
    else:
        labels.append(0)  # Time Expired (Hold)

# Pad ending
labels.extend([np.nan] * T_BARRIER)
df['label'] = labels

# Cleanup
df = df.dropna(subset=['label'])
df['label'] = df['label'].astype(int)
print(f"ðŸ“Š Labels: {df['label'].value_counts()}")

### Visualization (NEW)

### Save

In [None]:
# Cell 4: Save
df.to_parquet(LABELED_FILE)
print("ðŸ’¾ Saved Labeled Data.")