### # 02_resample_and_engineer.ipynb

In [None]:


import pandas as pd
import pandas_ta as ta  # RECOMMEND: pip install pandas_ta
import numpy as np
import os

# --- CONFIG ---
DATA_DIR = "../data"
SYMBOL = "EURUSD"
INPUT_FILE = os.path.join(DATA_DIR, f"{SYMBOL}_M1_Raw.parquet")
OUTPUT_FILE = os.path.join(DATA_DIR, f"{SYMBOL}_H1_Engineered.parquet")

# --- LOAD ---
print("⏳ Loading M1 Data...")
df_m1 = pd.read_parquet(INPUT_FILE)

# --- STEP 1: RESAMPLE TO H1 ---
# We aggregate M1 into H1 bars to get the "clean" OHLC
ohlc_dict = {
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'tick_volume': 'sum'
}
df_h1 = df_m1.resample('1H').agg(ohlc_dict).dropna()
print(f"✅ Resampled to {len(df_h1)} H1 candles.")

# --- STEP 2: FEATURE ENGINEERING (CONTINUOUS) ---
# We calculate these NOW, before removing any hours, to preserve continuity.

# 1. Trend Filters (The 200 EMA)
df_h1['EMA_200'] = ta.ema(df_h1['close'], length=200)
df_h1['EMA_50'] = ta.ema(df_h1['close'], length=50)

# 2. The "Sniper" Features (Velocity & Extension)
# Slope: How fast is the 50 EMA changing? (Radians or simple diff)
df_h1['EMA_50_Slope'] = df_h1['EMA_50'].diff() 
# Distance: How far is price from the 200 EMA? (Mean Reversion pressure)
df_h1['Dist_from_200'] = (df_h1['close'] - df_h1['EMA_200']) / df_h1['EMA_200']

# 3. Oscillators
df_h1['RSI'] = ta.rsi(df_h1['close'], length=14)
df_h1['ATR'] = ta.atr(df_h1['high'], df_h1['low'], df_h1['close'], length=14)

# 4. Volatility Context
df_h1['Rel_Vol'] = df_h1['tick_volume'] / df_h1['tick_volume'].rolling(20).mean()

# --- STEP 3: CLEANUP ---
df_h1.dropna(inplace=True)

# Save
df_h1.to_parquet(OUTPUT_FILE)
print(f"✅ Features Engineered. Saved to {OUTPUT_FILE}")
print(df_h1[['close', 'EMA_200', 'EMA_50_Slope', 'RSI']].tail())