In [None]:
!pip install ta

In [None]:
import sys
import os

# Add the root directory "chaoticX" to the system path
sys.path.append(os.path.abspath(".."))  # one level up from Notebooks/


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ta
from Data.timeFrames import timeFrame
from Data.binanceAPI import BinanceAPI
from Core.zone_detection import ZoneDetector
from Core.zone_merge import ZoneMerger
from Core.zone_reactions import ZoneReactor


In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/OHLC.csv')

In [None]:
df

In [None]:
import requests

def get_orderbook_price(symbol, target_price):
    url = "https://api.binance.com/api/v3/depth"
    params = {"symbol": symbol.upper(), "limit": 10000}
    res = requests.get(url, params=params).json()

    target_price = str(float(target_price))  # Ensure formatting

    bids = {price: qty for price, qty in res["bids"]}
    asks = {price: qty for price, qty in res["asks"]}

    bid_qty = bids.get(target_price, None)
    ask_qty = asks.get(target_price, None)

    return {
        "bid_quantity": bid_qty,
        "ask_quantity": ask_qty,
        "response" : res
    }

# Example usage:
print(get_orderbook_price("BTCUSDT", 109000.00))


In [None]:
df.to_csv('OHLC.csv')

In [None]:
api = BinanceAPI()
df_4h = api.get_ohlcv(interval='4h')
df_1d = api.get_ohlcv(interval='1D')
df_1h = api.get_ohlcv()

In [None]:
df_4h.tail(5)

In [None]:
detector1h = ZoneDetector(df_1h)
zone_1h = detector1h.get_zones()
detector4h = ZoneDetector(df_4h,timeframe= "4h")
zone_4h = detector4h.get_zones()
detector1d = ZoneDetector(df_1d,timeframe="1D")
zone_1d = detector1d.get_zones()


In [None]:
zone_1d

In [None]:
merger = ZoneMerger(zone_1h+zone_4h+zone_1d)
zones = merger.merge()
#zones = merger.add_liq_confluence(zones)

SMC Structure Detection (FVG)

In [None]:
def detect_fvg(df,threshold = 300,timeframe = '1h'):
    """
    Detect Fair Value Gaps (FVGs)
    """

    fvg_indices = []

    highs = df['high'].values
    lows = df['low'].values
    opens = df['open'].values
    closes = df['close'].values
    volumes = df['volume'].values
    ema20 = df['ema20'].values
    ema50 = df['ema50'].values
    atr = df['atr'].values
    rsi = df['rsi'].values
    atr_mean = df['atr_mean'].values

    length = len(df)

    close_rolling = df['close'].rolling(window=5)
    volume_rolling = df['volume'].rolling(window=5)

    avg_volume_past_5 = volume_rolling.mean().values
    prev_volatility_5 = close_rolling.std().values
    momentum_5 = closes - np.roll(closes, 5)

    for i in range(5, length - 1):
        prev_high = highs[i - 1]
        prev_low = lows[i - 1]
        next_high = highs[i + 1]
        next_low = lows[i + 1]

        body = abs(opens[i] - closes[i])
        candle_range = highs[i] - lows[i]
        body_ratio = body / candle_range if candle_range != 0 else 0
        wick_ratio = 1 - body_ratio
        body_size = body
        volume_on_creation = volumes[i]

        if next_low > prev_high:
            gap = next_low - prev_high
            if gap >= threshold:
                # Touch index search — can be optimized further with precomputed conditions if needed
                touch_indx = next(
                    (j for j in range(i + 2, length) if opens[j] > next_low and lows[j] < next_low), None
                )
                fvg_indices.append({
                    'index': i,
                    'type': 'Bullish FVG',
                    'ema 20': ema20[i],
                    'ema 50': ema50[i],
                    'atr': atr[i],
                    'rsi': rsi[i],
                    'atr_mean': atr_mean[i],
                    'zone_high': next_low,
                    'zone_low': prev_high,
                    'zone_width': gap,
                    'body_size': body_size,
                    'wick_ratio': wick_ratio,
                    'volume_on_creation': volume_on_creation,
                    'avg_volume_past_5': avg_volume_past_5[i],
                    'prev_volatility_5': prev_volatility_5[i],
                    'momentum_5': momentum_5[i],
                    'touch_index': touch_indx,
                    'time_frame': timeframe,
                })

        elif next_high < prev_low:
            gap = prev_low - next_high
            if gap >= threshold:
                touch_indx = next(
                    (j for j in range(i + 2, length) if opens[j] < next_high and highs[j] > next_high), None
                )
                fvg_indices.append({
                    'index': i,
                    'type': 'Bearish FVG',
                    'ema 20': ema20[i],
                    'ema 50': ema50[i],
                    'atr': atr[i],
                    'rsi': rsi[i],
                    'atr_mean': atr_mean[i],
                    'zone_high': prev_low,
                    'zone_low': next_high,
                    'zone_width': gap,
                    'body_size': body_size,
                    'wick_ratio': wick_ratio,
                    'volume_on_creation': volume_on_creation,
                    'avg_volume_past_5': avg_volume_past_5[i],
                    'prev_volatility_5': prev_volatility_5[i],
                    'momentum_5': momentum_5[i],
                    'touch_index': touch_indx,
                    'time_frame': timeframe,
                })

    return fvg_indices

In [None]:
def check_fvg_reactions(df, fvg_list, lookahead=24, N=24):
    results = []

    for fvg in fvg_list:
        idx = fvg['index']
        high = fvg['zone_high']
        low = fvg['zone_low']
        direction = fvg['type']
        entered = False
        bounced = False
        violated = False
        partial_mitigated = False
        time_to_entry = None
        price_retrace_ratio = None
        volume_on_entry = None
        bounce_candles = None
        violated_candles = None
        max_price_move = None  # NEW
        reaction_type = 'no entry'

        for j in range(idx + 2, min(idx + 2 + lookahead, len(df))):
            candle = df.iloc[j]

            wick_enters = candle['low'] <= high and candle['high'] >= low
            close_inside = low <= candle['close'] <= high

            if wick_enters:
                entered = True
                entry_price = candle['low'] if direction == 'Bullish FVG' else candle['high']
                price_retrace_ratio = (entry_price - low) / (high - low) if direction == 'Bullish FVG' else (high - entry_price) / (high - low)
                volume_on_entry = candle['volume']
                if time_to_entry is None:
                    time_to_entry = j - idx

                if not close_inside:
                    partial_mitigated = True
                    reaction_type = 'partial mitigate'

                if direction == 'Bullish FVG':
                    if candle['close'] > high:
                        bounced = True
                        bounce_candles = j - idx - time_to_entry


                        for k in range(j+1,min(j +N, len(df))):
                            # Check if price reverses (violation)
                            if df.iloc[k]['close'] < low:
                                violated = True
                                violated_candles = k-j
                                # Look ahead for max price movement after bounce
                                max_high = df.iloc[j : k]['high'].max()
                                max_price_move = max_high - high
                                reaction_type = 'bounce + break'
                                break
                        if not violated:
                            # Look ahead for max price movement after bounce
                            max_high = df.iloc[j : j + N]['high'].max()
                            max_price_move = max_high - high
                            reaction_type = 'clean bounce'
                        break

                    elif candle['close'] < low:
                        violated = True
                        violated_candles = j-time_to_entry-idx
                        reaction_type = 'violation'
                        break

                elif direction == 'Bearish FVG':
                    if candle['close'] < low:
                        bounced = True
                        bounce_candles = j - idx -time_to_entry
                        for k in range(j+1,min(j +N, len(df))):
                            # Check if price reverses (violation)
                            if df.iloc[k]['close'] > high:
                                violated = True
                                violated_candles = k-j
                                min_low = df.iloc[j : k]['low'].min()
                                max_price_move = low - min_low
                                reaction_type = 'bounce + break'
                                break
                        if not violated:
                            min_low = df.iloc[j : j+N]['low'].min()
                            max_price_move = low - min_low
                            reaction_type = 'clean bounce'
                        break

                    elif candle['close'] > high:
                        violated = True
                        violated_candles = j-time_to_entry-idx
                        reaction_type = 'violation'
                        break


        results.append({
            **fvg,
            'entered': entered,
            'partial_mitigated': partial_mitigated and not close_inside,
            'bounced': bounced,
            'violated': violated,
            'time_to_entry' : time_to_entry,
            'price_retrace_ratio' : price_retrace_ratio,
            'volume_on_entry' : volume_on_entry,
            'bounce_after_candles': bounce_candles,
            'violated_after_candles' : violated_candles,
            'max_price_move': max_price_move,  # NEW
            'reaction_type' : reaction_type
        })

    return results


In [None]:
fvg = detect_fvg(df)


In [None]:
fvg

SMC Structure Detection (OB)

In [None]:
def detect_order_blocks(df, min_body_ratio=0.3, timeframe='1h'):
    """
    Optimized detection of bullish and bearish Order Blocks (OB).
    """
    ob_list = []

    highs = df['high'].values
    lows = df['low'].values
    opens = df['open'].values
    closes = df['close'].values
    volumes = df['volume'].values
    ema20 = df['ema20'].values
    ema50 = df['ema50'].values
    atr = df['atr'].values
    rsi = df['rsi'].values
    atr_mean = df['atr_mean'].values

    for i in range(5, len(df) - 2):
        open_, close_ = opens[i], closes[i]
        high_, low_ = highs[i], lows[i]
        prev_close = closes[i - 1]
        next_close = closes[i + 1]
        next2_close = closes[i + 2]

        body = abs(open_ - close_)
        candle_range = high_ - low_
        if candle_range == 0:
            continue

        body_ratio = body / candle_range
        if body_ratio < min_body_ratio:
            continue

        wick_ratio = 1 - body_ratio
        zone_high, zone_low = high_, low_
        zone_width = zone_high - zone_low
        body_size = body
        volume_on_creation = volumes[i]

        avg_volume_past_5 = volumes[i-5:i].mean()
        prev_volatility_5 = closes[i-5:i].std()
        momentum_5 = close_ - closes[i - 5]

        # --- Bullish OB Detection ---
        if close_ < open_:
            if (prev_close > low_ and
                next_close > high_ and
                next2_close > closes[i + 1]):

                # Delay touch check until required
                touch_indx = next(
                    (j for j in range(i + 3, len(df))
                     if opens[j] > zone_high and lows[j] < zone_high),
                    None
                )

                ob_list.append({
                    'index': i,
                    'type': 'Bullish OB',
                    'ema 20': ema20[i],
                    'ema 50': ema50[i],
                    'atr': atr[i],
                    'rsi': rsi[i],
                    'atr_mean': atr_mean[i],
                    'zone_high': zone_high,
                    'zone_low': zone_low,
                    'zone_width': zone_width,
                    'body_size': body_size,
                    'wick_ratio': wick_ratio,
                    'volume_on_creation': volume_on_creation,
                    'avg_volume_past_5': avg_volume_past_5,
                    'prev_volatility_5': prev_volatility_5,
                    'momentum_5': momentum_5,
                    'touch_index': touch_indx,
                    'time_frame': timeframe,
                })

        # --- Bearish OB Detection ---
        elif close_ > open_:
            if (prev_close < high_ and
                next_close < low_ and
                next2_close < closes[i + 1]):

                touch_indx = next(
                    (j for j in range(i + 3, len(df))
                     if opens[j] < zone_low and highs[j] > zone_low),
                    None
                )

                ob_list.append({
                    'index': i,
                    'type': 'Bearish OB',
                    'ema 20': ema20[i],
                    'ema 50': ema50[i],
                    'atr': atr[i],
                    'rsi': rsi[i],
                    'atr_mean': atr_mean[i],
                    'zone_high': zone_high,
                    'zone_low': zone_low,
                    'zone_width': zone_width,
                    'body_size': body_size,
                    'wick_ratio': wick_ratio,
                    'volume_on_creation': volume_on_creation,
                    'avg_volume_past_5': avg_volume_past_5,
                    'prev_volatility_5': prev_volatility_5,
                    'momentum_5': momentum_5,
                    'touch_index': touch_indx,
                    'time_frame': timeframe,
                })

    return ob_list


In [None]:
def check_ob_reactions(df, ob_list, lookahead=24, N=24):
    results = []

    for ob in ob_list:
        idx = ob['index']
        high = ob['zone_high']
        low = ob['zone_low']
        direction = ob['type']
        entered = False
        bounced = False
        violated = False
        partial_mitigated = False
        time_to_entry = None
        price_retrace_ratio = None
        volume_on_entry = None
        bounce_candles = None
        violated_candles = None
        max_price_move = None  # NEW
        reaction_type = 'no entry'

        for j in range(idx + 2, min(idx + 2 + lookahead, len(df))):
            candle = df.iloc[j]

            wick_enters = candle['low'] <= high and candle['high'] >= low
            close_inside = low <= candle['close'] <= high

            if wick_enters:
                entered = True
                entry_price = candle['low'] if direction == 'Bullish OB' else candle['high']
                price_retrace_ratio = (entry_price - low) / (high - low) if direction == 'Bullish OB' else (high - entry_price) / (high - low)
                volume_on_entry = candle['volume']
                if time_to_entry is None:
                    time_to_entry = j - idx

                if not close_inside:
                    partial_mitigated = True
                    reaction_type = 'partial mitigate'

                if direction == 'Bullish OB':
                    if candle['close'] > high:
                        bounced = True
                        bounce_candles = j - time_to_entry -idx


                        for k in range(j+1,min(j +N, len(df))):
                            # Check if price reverses (violation)
                            if df.iloc[k]['close'] < low:
                                violated = True
                                violated_candles = k-j
                                # Look ahead for max price movement after bounce
                                max_high = df.iloc[j : k]['high'].max()
                                max_price_move = max_high - high
                                reaction_type = 'bounce + break'
                                break
                        if not violated:
                            # Look ahead for max price movement after bounce
                            max_high = df.iloc[j : j + N]['high'].max()
                            max_price_move = max_high - high
                            reaction_type = 'clean bounce'
                        break

                    elif candle['close'] < low:
                        violated = True
                        violated_candles = j-time_to_entry-idx
                        reaction_type = 'violation'
                        break

                elif direction == 'Bearish OB':
                    if candle['close'] < low:
                        bounced = True
                        bounce_candles = j - time_to_entry - idx
                        for k in range(j+1,min(j +N, len(df))):
                            # Check if price reverses (violation)
                            if df.iloc[k]['close'] > high:
                                violated = True
                                violated_candles = k-j
                                min_low = df.iloc[j : k]['low'].min()
                                max_price_move = low - min_low
                                reaction_type = 'bounce + break'
                                break
                        if not violated:
                            min_low = df.iloc[j : j+N]['low'].min()
                            max_price_move = low - min_low
                            reaction_type = 'clean bounce'
                        break

                    elif candle['close'] > high:
                        violated = True
                        violated_candles = j-time_to_entry-idx
                        reaction_type = 'violation'
                        break


        results.append({
            **ob,
            'entered': entered,
            'partial_mitigated': partial_mitigated and not close_inside,
            'bounced': bounced,
            'violated': violated,
            'time_to_entry' : time_to_entry,
            'price_retrace_ratio' : price_retrace_ratio,
            'volume_on_entry' : volume_on_entry,
            'bounce_after_candles': bounce_candles,
            'violated_after_candle' : violated_candles,
            'max_price_move': max_price_move,  # NEW
            'reaction_type' : reaction_type
        })

    return results

In [None]:
obs = detect_order_blocks(df)
obs

In [None]:
ob_res = check_ob_reactions(df,obs)
ob_res

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
def analysis_on_structure(result):
    df = pd.DataFrame(result)
    print(df['reaction_type'].value_counts())
    mean_by_reaction = df.groupby('reaction_type')[[
         'price_retrace_ratio', 'volume_on_entry',
        'time_to_entry', 'bounce_after_candles', 'max_price_move'
    ]].mean()

    print(mean_by_reaction)
    success = df['reaction_type'].isin(['clean bounce', 'bounce + break'])
    success_rate = success.sum() / len(df)

    print(f"Success rate: {success_rate:.2%}")
    print("Average time to entry:", df['time_to_entry'].mean())
    print("Average price retrace ratio:", df['price_retrace_ratio'].mean())
    bounce_moves = df[df['bounced'] == True]['max_price_move'].mean()
    violation_count = df[df['violated'] == True].shape[0]

    print("Average max price move after bounce:", bounce_moves)
    print("Number of violated :", violation_count)

    sns.boxplot(x='reaction_type', y='max_price_move', data=df)
    plt.xticks(rotation=45)
    plt.title("Max Price Move by Reaction Type")
    plt.show()


In [None]:
analysis_on_structure(ob_res)

In [None]:
analysis_on_structure(fvg_res)

In [None]:
df.iloc[4533]

SMC Structure Detection(BOS/CHOCH)

In [None]:
def detect_swings(df, window=20):
    swings = []

    for i in range(0, len(df)):

        if(i < window):
            high_window = df['high'].iloc[i:i + window + 1]
            low_window = df['low'].iloc[i:i + window + 1]
        elif(i+window >= len(df)):
            high_window = df['high'].iloc[i - window:]
            low_window = df['low'].iloc[i - window:]
        else:
            high_window = df['high'].iloc[i - window:i + window + 1]
            low_window = df['low'].iloc[i - window:i + window + 1]

        center_high = df['high'].iloc[i]
        center_low = df['low'].iloc[i]

        is_swing_high = center_high == high_window.max()
        is_swing_low = center_low == low_window.min()

        if is_swing_high:
            swings.append({'index': i, 'Type': 'Swing High', 'Price': center_high,'swing_strength':window,
                        'ema 20' : df.iloc[i]['ema20'],
                        'ema 50' : df.iloc[i]['ema50'],
                        'atr' : df.iloc[i]['atr'],
                        'rsi' : df.iloc[i]['rsi'],
                        'atr_mean' : df.iloc[i]['atr_mean']})
        elif is_swing_low:
            swings.append({'index': i, 'Type': 'Swing Low', 'Price': center_low,'swing_strength':window,
                        'ema 20' : df.iloc[i]['ema20'],
                        'ema 50' : df.iloc[i]['ema50'],
                        'atr' : df.iloc[i]['atr'],
                        'rsi' : df.iloc[i]['rsi'],
                        'atr_mean' : df.iloc[i]['atr_mean']})

    return swings


def label_structure_from_swings(swings):
    labeled_swings = []
    last_high = None
    last_low = None
    trend = None

    for s in swings:
        idx = s['index']
        stype = s['Type']
        price = s['Price']
        label = None

        if stype == 'Swing High':
            label = 'HH' if last_high is None or price > last_high else 'LH'
            last_high = price
        elif stype == 'Swing Low':
            label = 'HL' if last_low is None or price > last_low else 'LL'
            last_low = price

        trend = 'Bullish' if label == 'HH' else ('Bearish' if label == 'LL' else trend)

        labeled_swings.append({
            'index': idx,
            'swing_type': stype,
            'price': price,
            'structure_label': label,
            'trend': trend
        })

    return labeled_swings


def lookahead_bos_choch(labeled_swings, df, lookahead=20):
    results = []
    current_trend = None

    for s in labeled_swings:
        idx = s['index']
        label = s['structure_label']
        price = s['price']
        signal = None
        trigger_candle = None

        for j in range(1, lookahead + 1):
            if idx + j >= len(df):
                break
            close_price = df['close'].iloc[idx + j]

            if label == 'HH' and close_price > price:
                signal = 'BOS'
                current_trend = 'Bullish'
                trigger_candle = idx + j
                break

            elif label == 'LH' and close_price > price:
                signal = 'CHOCH'
                current_trend = 'Bullish'
                trigger_candle = idx + j
                break

            elif label == 'LL' and close_price < price:
                signal = 'BOS'
                current_trend = 'Bearish'
                trigger_candle = idx + j
                break

            elif label == 'HL' and close_price < price:
                signal = 'CHOCH'
                current_trend = 'Bearish'
                trigger_candle = idx + j
                break


        s['signal'] = signal
        s['trigger_candle'] = trigger_candle
        s['trend_after'] = current_trend
        results.append(s)

    return results


def extract_structure_features(df, window=20, lookahead=20):
    swings = detect_swings(df, window)
    labeled_swings = label_structure_from_swings(swings)
    structure_events = lookahead_bos_choch(labeled_swings, df, lookahead)

    features = []
    prev = None
    highest = None
    lowest = None

    for s in structure_events:
        entry = s.copy()
        entry['distance_to_prev_swing'] = s['index'] - prev['index'] if prev else None
        entry['price_change_from_last'] = s['price'] - prev['price'] if prev else None

        if s['structure_label'] in ['HH', 'LH']:
            entry['swing_strength'] = abs(s['price'] - (highest if highest is not None else s['price']))
            highest = max(highest or s['price'], s['price'])
        elif s['structure_label'] in ['LL', 'HL']:
            entry['swing_strength'] = abs(s['price'] - (lowest if lowest is not None else s['price']))
            lowest = min(lowest or s['price'], s['price'])
        else:
            entry['swing_strength'] = None

        entry['is_extreme'] = (
            (s['trend'] == 'Bullish' and s['price'] == highest) or
            (s['trend'] == 'Bearish' and s['price'] == lowest)
        )

        features.append(entry)
        prev = s

    return features


In [None]:
structure_feature = extract_structure_features(df,20,50)
structure_feature_df = pd.DataFrame(structure_feature)

In [None]:
print(structure_feature_df['signal'].value_counts())
print(structure_feature_df.groupby('signal')[['swing_strength', 'distance_to_prev_swing', 'price_change_from_last']].mean()
)
print(pd.crosstab(structure_feature_df['signal'], structure_feature_df['trend'])
)
structure_feature_df['bos_delay'] = structure_feature_df['trigger_candle'] - structure_feature_df['index']
print(structure_feature_df.groupby('signal')['bos_delay'].mean())
extreme_stats = structure_feature_df[structure_feature_df['signal'] == 'BOS'].groupby('is_extreme')['swing_strength'].mean()
print(extreme_stats)



SMC Structure Detection (Liquidity Zone)

In [None]:
def detect_liquidity_zones(df, swings, range_pct=0.01,timeframe = '1h'):
    """
    Detect buy-side and sell-side liquidity zones based on repeated highs/lows.

    Args:
        df (pd.DataFrame): OHLCV dataframe with 'high' and 'low' columns.
        swings (list): Output of detect_swings(), containing swing highs/lows.
        range_pct (float): Percent range to cluster equal highs/lows.

    Returns:
        List of liquidity zones with type, level, start/end index, and swept index.
    """
    n = len(df)
    liquidity_zones = []

    highs = [s for s in swings if s['Type'] == 'Swing High']
    lows = [s for s in swings if s['Type'] == 'Swing Low']

    pip_range = (df['high'].max() - df['low'].min()) * range_pct
    ohlc_high = df['high'].values
    ohlc_low = df['low'].values

    def process_zone(candidates, direction):
        result = []
        used = set()
        for i, base in enumerate(candidates):
            if base['index'] in used:
                continue

            base_level = base['Price']
            range_low = base_level - pip_range
            range_high = base_level + pip_range

            group = [base]
            prices = [base['Price']]
            end_idx = base['index']

            for other in candidates[i+1:]:
                if other['index'] in used:
                    continue
                if range_low <= other['Price'] <= range_high:
                    group.append(other)
                    used.add(other['index'])
                    prices.append(other['Price'])
                    end_idx = other['index']

            if len(group) < 2:
                continue  # not enough for liquidity

            avg_level = sum(prices) / len(prices)
            zone_high = avg_level + pip_range
            zone_low = avg_level - pip_range
            liquidity_height = zone_high - zone_low
            equal_level_deviation = np.std(prices)
            duration = end_idx - group[0]['index']
            swing_strengths = [g['swing_strength'] for g in group if 'swing_strength' in g]
            avg_swing_strength = np.mean(swing_strengths) if swing_strengths else None

            # Average volume around touches
            volumes = [df.iloc[g['index']]['volume'] for g in group if g['index'] < len(df)]
            avg_volume = np.mean(volumes) if volumes else None

            ema20s = [g['ema 20'] for g in group if 'ema 20' in g]
            ema50s = [g['ema 50'] for g in group if 'ema 50' in g]
            rsis = [g['rsi'] for g in group if 'rsi' in g]
            atrs = [g['atr'] for g in group if 'atr' in g]
            atr_means = [g['atr_mean'] for g in group if 'atr_mean' in g]

            # Find sweep candle
            start = end_idx + 1
            swept_index = None
            if start < n:
                if direction == 'Sell-Side':
                    cond = ohlc_high[start:] >= range_high
                else:
                    cond = ohlc_low[start:] <= range_low

                if np.any(cond):
                    swept_index = start + int(np.argmax(cond))

            result.append({
                'type': f'{direction} Liq',
                'level': avg_level,
                'zone_high': zone_high,
                'zone_low': zone_low,
                'count': len(group),
                'index' : group[0]['index'],
                'indexs': [g['index'] for g in group],
                'end_index': end_idx,
                'swept_index': swept_index,
                'liquidity_height': liquidity_height,
                'equal_level_deviation': equal_level_deviation,
                'avg_volume_around_zone': avg_volume,
                'duration_between_first_last_touch': duration,
                'avg_swing_strength': avg_swing_strength,
                'avg_ema_20' : np.mean(ema20s),
                'avg_ema_50' : np.mean(ema50s),
                'avg_rsi' : np.mean(rsis),
                'avg_atr' : np.mean(atrs),
                'avg_atr_mean' : np.mean(atr_means),
                'time_frame' : timeframe,
            })


        return result

    buy_side = process_zone(lows, 'Buy-Side')
    sell_side = process_zone(highs, 'Sell-Side')

    return buy_side + sell_side


In [None]:
def get_liq_touches(df, liquidity_zones):
    lows = df['low'].values
    highs = df['high'].values
    opens = df['open'].values

    results = []
    for zone in liquidity_zones:
        zone_high = zone['zone_high']
        zone_low = zone['zone_low']
        start_idx = zone['index'] + 2
        end_idx = zone['end_index'] - 2

        touches = []
        for i in range(start_idx, min(end_idx, len(df))):
            low = lows[i]
            high = highs[i]
            open_price = opens[i]

            if (high >= zone_low and open_price < zone_low) or (open_price > zone_high and low <= zone_high):
                touches.append(i)

        results.append({**zone, 'touch_indexs': touches})

    return results


In [None]:
swings = detect_swings(df, 15)
liquidity = detect_liquidity_zones(df,swings)
liquidity

In [None]:
def check_liquidity_reactions_on_touches(df, liquidity_zones, lookahead=24, max_touches=5):
    results = []

    for zone in liquidity_zones:
        zone_high = zone['zone_high']
        zone_low = zone['zone_low']
        ltype = zone['type']

        touches = []
        touch_count = 0
        start = zone['start_index']

        for i in range(zone['start_index'] + 1, len(df)):
            low = df['low'].iloc[i]
            high = df['high'].iloc[i]
            volume = df['volume'].iloc[i]
            open_price = df['open'].iloc[i]

            if (high >= zone_low and open_price < zone_low) or ( open_price > zone_high and low <= zone_high):
                '''if touch_count >= max_touches:
                    break'''
                touch_index = i
                touch_result = {
                    'touch_index': touch_index,
                    'touch_volume' : volume,
                    'time_to_entry' : start - i,
                    'reaction_type': 'neutral',
                    'bounced': False,
                    'violated': False,
                    'price_move_after_touch': None
                }

                if touch_index + lookahead < len(df):
                    future = df.iloc[touch_index + 1 : touch_index + 1 + lookahead]

                    if ltype == 'Buy-Side Liq':
                        if any(future['close'] < zone_low):
                            touch_result['reaction_type'] = 'violation'
                            touch_result['violated'] = True
                        elif any(future['close'] > df['close'].iloc[touch_index]):
                            touch_result['reaction_type'] = 'bounce'
                            touch_result['bounced'] = True
                        else:
                            touch_result['reaction_type'] = 'neutral'
                        touch_result['price_move_after_touch'] = future['high'].max() - df['high'].iloc[touch_index]

                    elif ltype == 'Sell-Side Liq':
                        if any(future['close'] > zone_high):
                            touch_result['reaction_type'] = 'violation'
                            touch_result['violated'] = True
                        elif any(future['close'] < df['close'].iloc[touch_index]):
                            touch_result['reaction_type'] = 'bounce'
                            touch_result['bounced'] = True
                        else:
                            touch_result['reaction_type'] = 'neutral'
                        touch_result['price_move_after_touch'] = df['low'].iloc[touch_index] - future['low'].min()

                touches.append(touch_result)
                touch_count += 1
                start = i

        results.append({
            **zone,
            'touch_count': len(touches),
            'touches': touches
        })

    return results


In [None]:
liq_res = check_liquidity_reactions_on_touches(df,liquidity)
liq_res

In [None]:
def add_distance_to_nearest_zones_above_below(zones, threshold=0.1):
    """
    Adds the nearest zone above and below the current zone based on mid-price distance.
    Each zone must have: 'zone_high', 'zone_low', 'type', and either 'index' or 'start_index'.
    """
    updated_zones = []

    for i, zone in enumerate(zones):
        this_mid = (zone['zone_high'] + zone['zone_low']) / 2

        if zone['type'] in ['Sell-Side Liq','Buy-Side Liq']:
          for index in zone['touch_indexs']:

            min_dist_above = float('inf')
            min_dist_below = float('inf')

            nearest_above_type = None
            nearest_below_type = None
            nearest_above_zone = None
            nearest_below_zone = None
            nearest_above_index = None
            nearest_below_index = None
            h1=None
            h2=None
            l1=None
            l2=None

            valid_zones = [
        z for z in zones
        if z.get('touch_index') is None or z.get('touch_index') > index or (z.get('swept_index',0) > index)
    ]


            for j, other in enumerate(valid_zones):
                if i == j:
                    continue

                other_mid = (other['zone_high'] + other['zone_low']) / 2
                dist = abs(this_mid - other_mid)
                if dist > (this_mid * threshold):
                    continue

                if other_mid > this_mid and dist < min_dist_above:
                    min_dist_above = dist
                    nearest_above_zone = other

                elif other_mid < this_mid and dist < min_dist_below:
                    min_dist_below = dist
                    nearest_below_zone = other

            updated = zone.copy()
            updated['distance_to_nearest_zone_above'] = min_dist_above
            updated['nearest_zone_above'] = nearest_above_zone

            updated['distance_to_nearest_zone_below'] = min_dist_below
            updated['nearest_zone_below'] = nearest_below_zone
            updated['liq_touch_idx'] = index

            #updated['valid_zones'] = valid_zones

            updated_zones.append(updated)

        else:

          min_dist_above = float('inf')
          min_dist_below = float('inf')

          nearest_above_type = None
          nearest_below_type = None
          nearest_above_zone = None
          nearest_below_zone = None
          nearest_above_index = None
          nearest_below_index = None
          h1=None
          h2=None
          l1=None
          l2=None

          valid_zones = [
      z for z in zones
      if z.get('touch_index') is None or z.get('touch_index') > zone.get('index', 0) or (z.get('swept_index',0) > zone.get('index',0))
  ]


          for j, other in enumerate(valid_zones):
              if i == j:
                  continue

              other_mid = (other['zone_high'] + other['zone_low']) / 2
              dist = abs(this_mid - other_mid)
              if dist > (this_mid * threshold):
                  continue

              if other_mid > this_mid and dist < min_dist_above:
                  min_dist_above = dist
                  nearest_above_zone = other

              elif other_mid < this_mid and dist < min_dist_below:
                  min_dist_below = dist
                  nearest_below_zone = other

          updated = zone.copy()
          updated['distance_to_nearest_zone_above'] = min_dist_above
          updated['nearest_zone_above'] = nearest_above_zone

          updated['distance_to_nearest_zone_below'] = min_dist_below
          updated['nearest_zone_below'] = nearest_below_zone

          #updated['valid_zones'] = valid_zones

          updated_zones.append(updated)

    return updated_zones


In [None]:
fvg_1h = detect_fvg(df)
ob_1h = detect_order_blocks(df,0.3)
swings = detect_swings(df,15)
liq_1h = detect_liquidity_zones(df,swings)
liq_1h = get_liq_touches(df,liq_1h)
fvg_4h = detect_fvg(df_4h,timeframe='4h')
ob_4h = detect_order_blocks(df_4h,0.3,'4h')
swings_4h = detect_swings(df_4h,15)
liq_4h = detect_liquidity_zones(df_4h,swings_4h,timeframe='4h')
liq_4h = get_liq_touches(df_4h,liq_4h)
fvg_1d = detect_fvg(df_1d,timeframe='1D')
ob_1d = detect_order_blocks(df_1d,0.3,'1D')
swings_1d = detect_swings(df_1d,15)
liq_1d = detect_liquidity_zones(df_1d,swings_1d,timeframe='1D')
liq_1d = get_liq_touches(df_1d,liq_1d)

In [None]:
for fvg in fvg_4h + ob_4h:
    if fvg.get('index') is not None:
        fvg['index'] = fvg['index'] * 4
    if fvg.get('touch_index') is not None:
        fvg['touch_index'] = fvg['touch_index'] * 4

for fvg in fvg_1d + ob_1d:
    if fvg.get('index') is not None:
        fvg['index'] = fvg['index'] * 24
    if fvg.get('touch_index') is not None:
        fvg['touch_index'] = fvg['touch_index'] * 24

for liq in liq_4h:
    if liq.get('index') is not None:
        liq['index'] = liq['index'] * 4
    if liq.get('swept_index') is not None:
        liq['swept_index'] = liq['swept_index'] * 4
    if liq.get('end_index') is not None:
        liq['end_index'] = liq['end_index'] * 4
    if liq.get('touch_indexs') is not None:
        liq['touch_indexs'] = [i * 4 for i in liq['touch_indexs'] if i is not None]

for liq in liq_1d:
    if liq.get('index') is not None:
        liq['index'] = liq['index'] * 24
    if liq.get('swept_index') is not None:
        liq['swept_index'] = liq['swept_index'] * 24
    if liq.get('end_index') is not None:
        liq['end_index'] = liq['end_index'] * 24
    if liq.get('touch_indexs') is not None:
        liq['touch_indexs'] = [i * 24 for i in liq['touch_indexs'] if i is not None]


In [None]:
class ZoneMerger:
    def __init__(self, zones, threshold=0.002):
        self.zones = zones
        self.threshold = threshold

    def merge(self):
            merged = []
            used = set()

            for i, zone in enumerate(self.zones):
                if i in used:
                    continue

                group = [zone]
                used.add(i)

                z_high = zone['zone_high'] * (1 + self.threshold)
                z_low = zone['zone_low'] * (1 - self.threshold)

                for j, other in enumerate(self.zones):
                    if j in used or i == j:
                        continue

                    other_high = other['zone_high'] * (1 + self.threshold)
                    other_low = other['zone_low'] * (1 - self.threshold)

                    # Check if zones overlap
                    if (
                        (other_low <= z_high and other_high >= z_high) or
                        (other_high >= z_low and other_low <= z_low) or
                        (other_low >= z_low and other_high <= z_high) or
                        (other_low <= z_low and other_high >= z_high)
                    ):
                        group.append(other)
                        used.add(j)

                        # Expand merged zone bounds
                        z_high = max(z_high, other['zone_high'] * (1 + self.threshold))
                        z_low = min(z_low, other['zone_low'] * (1 - self.threshold))

                # Merge metadata
                merged_zone = {
                    'zone_high': max(z['zone_high'] for z in group),
                    'zone_low': min(z['zone_low'] for z in group),
                    'zone_width': max(z['zone_high'] for z in group) - min(z['zone_low'] for z in group),
                    'types': list(set(z['type'] for z in group)),
                    'timeframes': list(set(z['time_frame'] for z in group)),
                    'count': len(group),
                    'sources': group
                }

                merged.append(merged_zone)

            return merged


In [None]:
all_zones = fvg_1h+ob_1h+liq_1h + fvg_4h+ob_4h+liq_4h+fvg_1d+ob_1d+liq_1d
zm = ZoneMerger(all_zones)
result = zm.merge()

In [None]:
result[0]

In [None]:
all_zones = fvg+ob+liq
all_zones = add_distance_to_nearest_zones_above_below(all_zones)

In [None]:
all_zones = sorted(all_zones,key = lambda x : x['index'])

In [None]:
all_zones[-1]

In [None]:
all_zones_reactions = check_fvg_reactions(df,fvg) + check_ob_reactions(df,ob) + check_liquidity_reactions_on_touches(df,liq)

In [None]:
all_zones_reactions = add_distance_to_nearest_zones_above_below(all_zones_reactions)

In [None]:
df_zone_reaction = pd.DataFrame(all_zones_reactions)
df_zone_reaction.head()

In [None]:
df_zone_reaction.columns

In [None]:
df_zone_reaction = df_zone_reaction.drop(columns=['entered',
'partial_mitigated',
'bounced',
'violated',
'price_retrace_ratio',
'bounce_after_candles',
'violated_after_candles','violated_after_candle','liquidity_height','avg_swing_strength',
'max_price_move'])

In [None]:
df_zone_reaction.loc[df_zone_reaction['start_index'].notna(),'index'] = df_zone_reaction.loc[df_zone_reaction['start_index'].notna(),'index'].fillna(df_zone_reaction['start_index'])

In [None]:
df_zone_reaction = df_zone_reaction.drop(columns=['start_index','end_index','swept_index','count','touch_count'])

In [None]:
def flatten_zone_reaction_row(row):
    """
    Flattens a single liquidity zone row into multiple rows based on touches.

    Args:
        row (pd.Series): A row from a DataFrame containing a 'touches' column with list of dicts.

    Returns:
        List of dicts (flattened rows).
    """
    flattened = []

    touches = row.get('touches', [])
    if not isinstance(touches, list) or len(touches) == 0:
        # Return the row as-is with no touch-specific data
        base = row.drop(labels='touches').to_dict()
        flattened.append(base)
        return flattened

    for touch in touches:
        base = row.drop(labels='touches').to_dict()
        base['reaction_type'] = touch.get('reaction_type')
        base['time_to_entry'] = touch.get('time_to_entry')
        base['volume_on_entry'] = touch.get('touch_volume')
        flattened.append(base)

    return flattened

def flatten_reaction_dataframe(df):
    """
    Apply flatten_liquidity_row to each row and combine all into a new flattened DataFrame.
    """
    all_rows = []
    for _, row in df.iterrows():
        flattened_rows = flatten_zone_reaction_row(row)
        all_rows.extend(flattened_rows)
    return pd.DataFrame(all_rows)


In [None]:
df_zone_reaction = flatten_reaction_dataframe(df_zone_reaction)

In [None]:
df_zone_reaction['level'] = df_zone_reaction['level'].fillna((df_zone_reaction['zone_high']+df_zone_reaction['zone_low'])/2)
df_zone_reaction['zone_width'] = df_zone_reaction['zone_width'].fillna(0)
df_zone_reaction['body_size'] = df_zone_reaction['body_size'].fillna(0)
df_zone_reaction['wick_ratio'] = df_zone_reaction['wick_ratio'].fillna(0)
df_zone_reaction['volume_on_creation'] = df_zone_reaction['volume_on_creation'].fillna(0)
df_zone_reaction['avg_volume_past_5'] = df_zone_reaction['avg_volume_past_5'].fillna(0)
df_zone_reaction['prev_volatility_5'] = df_zone_reaction['prev_volatility_5'].fillna(0)
df_zone_reaction['momentum_5'] = df_zone_reaction['momentum_5'].fillna(0)
df_zone_reaction['avg_volume_around_zone'] = df_zone_reaction['avg_volume_around_zone'].fillna(0)
df_zone_reaction['equal_level_deviation'] = df_zone_reaction['equal_level_deviation'].fillna(0)
df_zone_reaction['duration_between_first_last_touch'] = df_zone_reaction['duration_between_first_last_touch'].fillna(0)

In [None]:
df_zone_reaction['type'].unique()

In [None]:
df_zone_reaction.to_csv('reactions.csv',index=False)

In [None]:
df_zone_reaction.describe()

In [None]:
df_zone_reaction.notna().sum()

In [None]:
df_zone_reaction= df_zone_reaction.dropna()

In [None]:
df_ob_fvg_reaction = df_zone_reaction.loc[~df_zone_reaction['type'].isin(['Buy-Side Liq','Sell-Side Liq'])]
df_liq_reaction = df_zone_reaction.loc[df_zone_reaction['type'] .isin(['Buy-Sidde Liq','Sell-Side Liq'])]

In [None]:
reaction_type1 = list(df_ob_fvg_reaction.reaction_type.unique())
reaction_type2 = list(df_liq_reaction.reaction_type.unique())
zone_type = list(df_zone_reaction['type'].unique())

In [None]:
df_ob_fvg_reaction['type'] = df_ob_fvg_reaction['type'].apply(lambda x : zone_type.index(x))
df_ob_fvg_reaction['nearest_zone_type'] = df_ob_fvg_reaction['nearest_zone_type'].apply(lambda x : zone_type.index(x))
df_liq_reaction['type'] = df_liq_reaction['type'].apply(lambda x : zone_type.index(x))
df_liq_reaction['nearest_zone_type'] = df_liq_reaction['nearest_zone_type'].apply(lambda x : zone_type.index(x))



In [None]:
df_ob_fvg_reaction['reaction_type'] = df_ob_fvg_reaction['reaction_type'].apply(lambda x : reaction_type1.index(x))
df_liq_reaction['reaction_type'] = df_liq_reaction['reaction_type'].apply(lambda x : reaction_type2.index(x))

In [None]:
df_zone_reaction.dtypes

In [None]:
X1 = df_liq_reaction.drop(columns=['reaction_type','nearest_zone_index','index','zone_high','zone_low','duration_between_first_last_touch'])
Y1 = df_liq_reaction['reaction_type']
X2 = df_ob_fvg_reaction.drop(columns=['reaction_type','nearest_zone_index','index','zone_high','zone_low','duration_between_first_last_touch'])
Y2 = df_ob_fvg_reaction['reaction_type']

In [None]:
from sklearn.model_selection import train_test_split

train_x1,val_x1,train_y1,val_y1 = train_test_split(X1,Y1,test_size=0.7)
train_x2,val_x2,train_y2,val_y2 = train_test_split(X2,Y2,test_size=0.7)

In [None]:
from xgboost import XGBClassifier

liq_model = XGBClassifier(
    n_estimators=150,
    max_depth=6,
    learning_rate=0.05,
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss'
)
ob_fvg_model = XGBClassifier(
    n_estimators=150,
    max_depth=6,
    learning_rate=0.05,
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss'
)
liq_model.fit(train_x1, train_y1)
ob_fvg_model.fit(train_x2,train_y2)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# After training
y_pred1 = liq_model.predict(val_x1)
y_pred2 = ob_fvg_model.predict(val_x2)

print(classification_report(val_y1, y_pred1))
print(classification_report(val_y2, y_pred2))


In [None]:
print(X2.columns)
print(X1.columns)

In [None]:
def label_direction_based_on_zone_touch(df, fvg, ob, liq,  threshold=0.25):
    labels = []
    df = df.reset_index(drop=True)
    n = len(df)

    all_zones = []

    for zone_list, zone_type in [(fvg, 'FVG'), (ob, 'OB'), (liq, 'LIQ')]:
        for z in zone_list:
            if 'zone_high' not in z:
                z['zone_high'] = z.get('zone_high', z.get('Price', 0) + 0.001)
            if 'zone_low' not in z:
                z['zone_low'] = z.get('zone_low', z.get('Price', 0) - 0.001)
            z['zone_type'] = zone_type if not (zone_type== 'Swing') else z['Type']
            z['mid_price'] = (z['zone_high'] + z['zone_low']) / 2
            z['index'] = z.get('index', z.get('start_index'))
            if z['index'] is not None:
                all_zones.append(z)

    used_indx = set()
    i = 0
    while i < n:
        print(f"Processing candle {i+1}/{n}", end='\r')
        curr_price = df.loc[i, 'close']

        # Filter past zones once per i
        past_zones = [
            z for z in all_zones
            if z['index'] < i and (z['zone_high'], z['zone_low'], z['zone_type']) not in used_indx
            and abs(z['mid_price'] - curr_price) < curr_price*threshold
            or (z['zone_type'] == 'LIQ' and not z['swept_index']==None and z['swept_index']>i and z['index'] < i and abs(z['mid_price'] - curr_price) < curr_price*threshold)
        ]

        if not past_zones:
            labels.append({
                'index': i,
                'candle detail': df.iloc[i].to_dict(),
                'direction': None,
                'touched_index' : None,
                'touched_zone_type': None,
                'target_zone_index': None,
                'zone_detail' : None,
                'past_zones' : None
            })
            i += 1
            continue

        touched_zone = None
        touched_index = None

        for zone in past_zones:
            z_high = zone['zone_high']
            z_low = zone['zone_low']

            # Use slicing and vectorized comparison instead of for-loop
            future_slice = df.iloc[i+1:]
            condition = ((future_slice['low'] <= z_high) & (future_slice['open'] > z_high)) | ((future_slice['open'] < z_low) & (future_slice['high'] >= z_low))
            match = future_slice[condition]

            if not match.empty:
                touched_index = match.index[0]
                touched_zone = zone
                if(touched_zone['zone_type'] in ['FVG','Swing High','Swing Low','OB']):
                    used_indx.add((zone['zone_high'], zone['zone_low'], zone['zone_type']))


        if touched_zone and touched_index:
            price_slice = df.loc[i:touched_index - 1].copy()
            zone_mid = touched_zone['mid_price']
            direction_series = np.where(zone_mid > price_slice['close'], 'long', 'short')

            # Build labels all at once
            for j, (idx, row) in enumerate(price_slice.iterrows()):
                labels.append({
                    'index': idx,
                    'candle detail': row.to_dict(),
                    'direction': direction_series[j],
                    'touched_index' : touched_index,
                    'touched_zone_type': touched_zone['zone_type'],
                    'target_zone_index': touched_zone['index'],
                    'zone_detail' : touched_zone,
                    'past_zones' : past_zones
                })

            # Advance i
            i = touched_index + 1  # skip to after the touch
        else:
            labels.append({
                'index': i,
                'candle detail': df.iloc[i].to_dict(),
                'direction': 'none',
                'touched_index' : None,
                'touched_zone_type': None,
                'target_zone_index': None,
                'zone_detail' : None,
                'past_zones' : past_zones
            })
            i += 1

    return labels


In [None]:
labels = label_direction_based_on_zone_touch(df,fvg,ob,liq,threshold=0.1)

In [None]:
# Assuming 'labels' is the result from your function
df_labels = pd.DataFrame(labels)
# Expand 'candle detail'
candle_df = pd.json_normalize(df_labels['candle detail'])

# Expand 'touched_zone_detail'
zone_df = pd.json_normalize(df_labels['zone_detail'])

# Combine with top-level fields
df_final = pd.concat([df_labels.drop(columns=['candle detail', 'zone_detail']),
                      candle_df.add_prefix('candle_'),
                      zone_df.add_prefix('zone_')],
                     axis=1)


In [None]:
candle_df.head()

In [None]:
df_final.head(10)

In [None]:
df_final.to_csv('direction_1h.csv',index= False)

In [None]:
def flatten_label_row(labeled_row):
    flat_rows = []
    current_index = labeled_row['index']
    curr_price = labeled_row['candle detail']['close']
    touched_zone = labeled_row.get('zone_detail')
    past_zones = labeled_row.get('past_zones',[])
    if not past_zones:
        return []

    for zone in labeled_row['past_zones']:
        flat_row = {
            'candle_index': current_index,
            'zone_index': zone.get('index',None),
            'zone_type': zone.get('zone_type',None),
            'zone_mid_price': float(zone['mid_price']),
            'zone_high': float(zone['zone_high']),
            'zone_low': float(zone['zone_low']),
            'zone_age': current_index - zone['index'],
            'zone_range': float(zone['zone_high']) - float(zone['zone_low']),
            'zone_mid_distance': abs(curr_price - float(zone['mid_price'])),
            'price_above_zone': (curr_price > float(zone['zone_high'])),
            'price_inside_zone': (float(zone['zone_low']) < curr_price < float(zone['zone_high'])),
            'candle_close': curr_price,
            'candle_open': labeled_row['candle detail']['open'],
            'candle_high': labeled_row['candle detail']['high'],
            'candle_low': labeled_row['candle detail']['low'],
            'touched_index': labeled_row.get('touched_index'),
            'is_target': (
                touched_zone and
                zone['index'] == touched_zone['index'] and
                zone['zone_type'] == touched_zone['zone_type'] and
                abs(zone['zone_high'] - touched_zone['zone_high']) < 1e-6 and
                abs(zone['zone_low'] - touched_zone['zone_low']) < 1e-6
            )
        }

        flat_rows.append(flat_row)

    return flat_rows


In [None]:
def flatten_all_rows(labeled_data):
    all_flat = []
    for row in labeled_data:
        all_flat.extend(flatten_label_row(row))
    return pd.DataFrame(all_flat)


In [None]:
df_past_zones = flatten_all_rows(labels)

In [None]:
df_past_zones = pd.read_csv('past_zone.csv')

In [None]:
df_past_zones = df_past_zones.dropna()

In [None]:
zone_type = list(df_past_zones['zone_type'].unique())
is_target = list(df_past_zones['is_target'].unique())
price_above_zone = list(df_past_zones['price_above_zone'].unique())
prive_inside_zone = list(df_past_zones['price_inside_zone'].unique())

In [None]:
df_past_zones['zone_type'] = df_past_zones['zone_type'].apply(lambda x : zone_type.index(x))
df_past_zones['is_target'] = df_past_zones['is_target'].apply(lambda x : is_target.index(x))
df_past_zones['price_above_zone'] = df_past_zones['price_above_zone'].apply(lambda x : price_above_zone.index(x))
df_past_zones['price_inside_zone'] = df_past_zones['price_inside_zone'].apply(lambda x : prive_inside_zone.index(x))

In [None]:
df_past_zones.columns

In [None]:
df_past_zones.to_csv('past_zone.csv',index=False)

In [None]:
df_swings = pd.DataFrame(swings)
df_swings.to_csv('swings.csv',index=False)

In [None]:
df_final.notna().sum()

In [None]:
labels[10]

In [None]:
for i in labels:
    if i['index'] == 13670:
        print(i)

In [None]:
fvg

In [None]:
print(labels)

In [None]:
df_with_labels = df_1h.join(labels, how='left')


In [None]:
df.iloc[17254]

Data Combination of SMC Elements

In [None]:
def smc_combine(df):
    merged = df.copy()
    merged = merged.reset_index(drop = True)
    merged['index'] = merged.index  # ensure index column exists
    fvg = detect_fvg(df)
    #fvg = check_fvg_reactions(df,fvg)
    ob = detect_order_blocks(df)
    #ob = check_ob_reactions(df,ob)
    swings = detect_swings(df,20)
    labeled_swings = label_structure_from_swings(swings)
    bos_choch=lookahead_bos_choch(labeled_swings,df,50)
    liq = detect_liquidity_zones(df,swings)
    #liq = check_liquidity_reactions(df,liq)

    fvg_df = pd.DataFrame(fvg).set_index('index')
    ob_df = pd.DataFrame(ob).set_index('index')
    bos_choch_df = pd.DataFrame(bos_choch).set_index('trigger_candle')
    labeled_swings_df = pd.DataFrame(labeled_swings).set_index('index')
    liq_df = pd.DataFrame(liq).set_index('start_index')
    merged = merged.merge(fvg_df, on='index', how='left', suffixes=('', '_fvg'))
    merged = merged.merge(ob_df, on='index', how='left', suffixes=('', '_ob'))
    merged = merged.merge(labeled_swings_df,on = 'index',how='left',suffixes=('','_swings'))
    merged = merged.merge(bos_choch_df, on='index', how='left', suffixes=('', '_bos_choch'))
    merged = merged.merge(liq_df, left_on='index', right_index=True, how='left', suffixes=('', '_liq'))
    merged['timestamp'] = df.index
    return merged

In [None]:
smc_15m = smc_combine(df)
smc_1hr = smc_combine(df_1h)
smc_4hr = smc_combine(df_4h)
smc_1d = smc_combine(df_1d)

In [None]:
def add_htf_features(base_df,htf_df,prefix):
    base_df = base_df.copy()
    htf_df = htf_df.copy()
    base_df = base_df.set_index('timestamp')
    htf_df = htf_df.set_index('timestamp')
    for feature in ['trend', 'fvg_high', 'fvg_low', 'ob_high', 'ob_low','level','swing_type']:
        if feature in htf_df.columns:
            base_df[f'{prefix}_{feature}'] = htf_df[feature].reindex(base_df.index, method='ffill')

    return base_df.reset_index()

In [None]:
smc_15m = add_htf_features(smc_15m, smc_1hr, prefix='1h')
smc_15m = add_htf_features(smc_15m,smc_4hr, prefix='4h')
smc_15m = add_htf_features(smc_15m, smc_1d, prefix='1d')


In [None]:
smc_15m.tail()

In [None]:
smc_4hr.tail(20)

In [None]:
smc_15m.to_csv('smc.csv',index=False)

In [None]:
train_data = pd.read_csv('smc.csv')

# --- 1. Zone Confluence ---
train_data['fvg_ob_confluence'] = (
    (train_data['type'] == train_data['type_ob']) &
    train_data['type'].notna()
).astype(int)

train_data['fvg_liq_confluence'] = (
    (train_data['type'].notna()) &
    (train_data['type_liq'].notna()) &
    (abs(train_data['level'] - train_data['low']) < 1000)
).astype(int)

train_data['ob_liq_confluence'] = (
    (train_data['type_ob'].notna()) &
    (train_data['type_liq'].notna()) &
    (abs(train_data['level'] - train_data['low']) < 1000)
).astype(int)

# --- 2. Timing Features ---
train_data['time_since_bos_choch'] = train_data['index'] - train_data['trigger_candle']
train_data['delay_to_liquidity_sweep'] = train_data['swept_index'] - train_data['index']

# --- 3. Reaction Strengths ---
train_data['avg_max_price_move'] = train_data[[
    'max_price_move',
    'max_price_move_ob',
    'max_price_move_liq'
]].mean(axis=1)

train_data['has_strong_reaction'] = train_data['reaction_type'].isin(['clean bounce', 'bounce + break']).astype(int)

# --- 4. Labels ---
train_data['reaction_success'] = train_data['reaction_type'].isin(['clean bounce', 'bounce + break']).astype(int)
train_data['reaction_failed'] = (train_data['reaction_type'] == 'violation').astype(int)

train_data['setup_success'] = (
    (train_data['reaction_type_ob'].isin(['clean bounce'])) |
    (train_data['reaction_type'].isin(['clean bounce'])) |
    (train_data['reaction_type_liq'].isin(['clean bounce']))
).astype(int)

# Show the newly added features
train_data.tail()


In [None]:
drop_cols = [
    'timestamp', 'index', 'price', 'trigger_candle', 'price_bos_choch',
    'trend_bos_choch', 'signal_bos_choch', 'trend_after_bos_choch','reaction_type',
    'reaction_type_ob',
    'reaction_type_liq','has_strong_reaction',
       'reaction_success', 'reaction_failed','price_retrace_ratio','max_price_move','price_retrace_ratio_ob',
       'max_price_move_ob','max_price_move_liq','avg_max_price_move','bounced','partial_mitigated','bounced_ob'
       ,'partial_mitigated_ob','bounced_liq' ,'violated','violated_ob','violated_liq'
]


In [None]:
train_data.fillna({
    'structure_label': 'None',
    'signal': 'None',
    'trend': 'Unknown',

    'swing_type': 'None',
    'type': 'None',
    'type_ob': 'None',
    'type_liq': 'None'
}, inplace=True)

# Fill numeric NaNs
train_data.fillna(0, inplace=True)


In [None]:
from sklearn.preprocessing import LabelEncoder

categorical_cols = [
    'structure_label', 'signal', 'trend',
    'swing_type', 'type', 'type_ob', 'type_liq'
]

le = LabelEncoder()
for col in categorical_cols:
    train_data[col] = le.fit_transform(train_data[col].astype(str))


In [None]:
X = df_past_zones.drop(columns=['reaction_type','index','zone_high','zone_low'])
Y = df_past_zones['reaction_type']

In [None]:
X = df_past_zones.drop(columns=['is_target','candle_index','zone_index','touched_index','zone_type'])
Y = df_past_zones['is_target']


In [None]:
X = X.reset_index(drop= True)

In [None]:
from sklearn.preprocessing import LabelEncoder

# Encode all object columns
for col in X.columns:
    if X[col].dtype == 'object':
        X[col] = LabelEncoder().fit_transform(X[col].astype(str))

# Cast entire DataFrame to float32
X = X.astype('float32')


In [None]:
# Ensure all features are either int or float
cols = list(X.columns)
cols


In [None]:
correlations = df_zone_reaction.corr()['reaction_type'].sort_values(ascending=False)
print(correlations)


In [None]:
from sklearn.model_selection import train_test_split

train_x,val_x,train_y,val_y = train_test_split(X,Y,test_size=0.7)


In [None]:
val_x.tail(5)

In [None]:
from xgboost import XGBClassifier

model = XGBClassifier(
    n_estimators=150,
    max_depth=6,
    learning_rate=0.05,
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss'
)

model.fit(train_x, train_y)

In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=150,
    max_depth=6,
    random_state=42,
)

model.fit(train_x, train_y)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# After training
y_pred = model.predict(val_x)
y_proba = model.predict_proba(val_x)[:,1]

print(classification_report(val_y, y_pred))
print("AUC:", roc_auc_score(val_y, y_proba))


In [None]:
# Get feature importance as dictionary
importance_dict = model.get_booster().get_score(importance_type='gain')  # You can also use: 'weight', 'cover', 'total_gain', 'total_cover'

# Convert to DataFrame for easier viewing
importance_df = pd.DataFrame.from_dict(importance_dict, orient='index', columns=['Importance'])
importance_df.index.name = 'Feature'
importance_df = importance_df.sort_values(by='Importance', ascending=False)

print(importance_df)

In [None]:
reaction_type

In [None]:
model.feature_importances_

In [None]:
X.iloc[-40]

In [None]:
model.predict(X.iloc[-41].values.reshape(1,-1))


In [None]:
from joblib import dump, load


dump(model,'smcXGBoost.pkl')
print("Saved successfully")

In [None]:
for k in train_data.iloc[50].keys():
    print(f'{k} : {train_data.iloc[50][k]}')