In [1]:
import sys; sys.path.append("../../")

import os
import shutil
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from sklearn.model_selection import train_test_split

In [2]:
from common.load_ticker import load_ticker

spy_df = load_ticker(
    base_dir = "../../../../parquet_minute/", 
    time_col = "Date",
    symbol_col="Symbol",
    seed = 42, 
    symbol= "SPY",
    verbose=True
)

spy_df

[LOAD TICKER] Loaded SPY: 61 files -> shape (488041, 7)
[LOAD TICKER] Date range: 2020-08-21 13:30:00+00:00  to  2025-08-21 19:59:00+00:00


Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume
0,2020-08-21 13:30:00+00:00,SPY,337.920013,338.029999,337.839996,337.929993,530274
1,2020-08-21 13:31:00+00:00,SPY,337.920013,338.079987,337.920013,338.000000,280279
2,2020-08-21 13:32:00+00:00,SPY,338.019989,338.040009,337.875000,338.000000,180445
3,2020-08-21 13:33:00+00:00,SPY,338.010010,338.083405,337.910004,338.029999,303505
4,2020-08-21 13:34:00+00:00,SPY,338.019989,338.089996,337.940002,338.059998,132404
...,...,...,...,...,...,...,...
488036,2025-08-21 19:55:00+00:00,SPY,635.669983,635.889893,635.659973,635.850098,407279
488037,2025-08-21 19:56:00+00:00,SPY,635.854980,635.885010,635.669983,635.700012,194769
488038,2025-08-21 19:57:00+00:00,SPY,635.710022,635.775024,635.409973,635.409973,315516
488039,2025-08-21 19:58:00+00:00,SPY,635.414978,635.474976,635.145020,635.184998,375316


In [3]:
from common.clean_data import clean_data

spy_clean_df = clean_data(
    df=spy_df,
    timestamp_col = "Date",
    symbol_col = "Symbol",
    drop_duplicate_rows = True,
    drop_duplicate_cols = True,
    drop_constant_columns = True,
    drop_constant_rows = True,
    replace_placeholders = True,
    placeholders=("Null", "null", "NULL", "NaN", "nan", "NAN", "None", "none", "NONE"),
    fill_missing = True,
    convert_numeric = True,
    sort_by = "timestamp",
    verbose = True,
)

spy_clean_df

[---CLEAN---] Starting Shape=(488041, 7)
[---CLEAN---] Preserving: Symbol and Date
[---CLEAN---] Step 1: Remove Duplicate Columns.
[---CLEAN---] ------- Original Column Count: 7, After: 7, Removed: 0 in 0.04478s
[---CLEAN---] Step 2: Remove Duplicate Rows.
[---CLEAN---] ------- Original Row Count: 488041, After: 488041, Removed: 0 in 0.08925s
[---CLEAN---] Step 3: Remove Constant Columns.
[---CLEAN---] ------- Original Column Count: 7, After: 7, Removed: 0 in 0.02985s
[---CLEAN---] Step 4: Remove Constant Rows.
[---CLEAN---] ------- Original Row Count: 488041, After: 488041, Removed: 0 in 0.12138s
[---CLEAN---] Step 5: Replacing Placeholder Values
[---CLEAN---] ------- Total Nulls After Replacement: 0 in 0.01674s
[---CLEAN---] Step 6: Sorting by Timestamp.
[---CLEAN---] ------- Sorted by timestamp in 0.01756s
[---CLEAN---] Step 7: Interpolating Missing and NaN Values.
[---CLEAN---] ------- Initial Nulls: 0, After Fill: 0, Filled: 0 in 0.036s
[---CLEAN---] Step 8: Converting Data to Num

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume
0,2020-08-21 13:30:00+00:00,SPY,337.920013,338.029999,337.839996,337.929993,530274
1,2020-08-21 13:31:00+00:00,SPY,337.920013,338.079987,337.920013,338.000000,280279
2,2020-08-21 13:32:00+00:00,SPY,338.019989,338.040009,337.875000,338.000000,180445
3,2020-08-21 13:33:00+00:00,SPY,338.010010,338.083405,337.910004,338.029999,303505
4,2020-08-21 13:34:00+00:00,SPY,338.019989,338.089996,337.940002,338.059998,132404
...,...,...,...,...,...,...,...
488036,2025-08-21 19:55:00+00:00,SPY,635.669983,635.889893,635.659973,635.850098,407279
488037,2025-08-21 19:56:00+00:00,SPY,635.854980,635.885010,635.669983,635.700012,194769
488038,2025-08-21 19:57:00+00:00,SPY,635.710022,635.775024,635.409973,635.409973,315516
488039,2025-08-21 19:58:00+00:00,SPY,635.414978,635.474976,635.145020,635.184998,375316


In [4]:
from iteration_003.charting.generate_features import generate_features
spy_clean_df = spy_clean_df.tail(100)
gen_df = generate_features(
        df = spy_clean_df
        )


In [5]:
# gen_df = gen_df[45:80]
# 

In [6]:
gen_df

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Volume,CandleDirectionLabel,SwingTypeLabel,RangeHigh,RangeLow,ZoneLow,ZoneHigh,ZoneType,ZoneId,ZonePivotIdx,ZonePending,ZoneActive,ZoneInvalidated,ZoneLifecycle
487941,2025-08-21 18:20:00+00:00,SPY,636.080017,636.190002,636.039978,636.041992,74258,Medium-Low Bearish Sentiment,,,,,,,,,[],[],[],[]
487942,2025-08-21 18:21:00+00:00,SPY,636.049988,636.145020,635.989990,636.039978,41423,Low Bearish Sentiment,Distribution,,635.989990,,,,,,[],[],[],[]
487943,2025-08-21 18:22:00+00:00,SPY,636.044983,636.114990,635.869995,635.885010,117733,Neutral Bearish Sentiment,Distribution,,635.869995,,,,,,[],[],[],[]
487944,2025-08-21 18:23:00+00:00,SPY,635.890015,635.979980,635.729980,635.760010,58726,Neutral Bearish Sentiment,Distribution,,635.729980,635.729980,635.979980,Support,S1,3.0,"[{'id': 'S1', 'type': 'Support', 'low': 635.72...",[],[],"[{'id': 'S1', 'type': 'Support', 'low': 635.72..."
487945,2025-08-21 18:24:00+00:00,SPY,635.753723,636.039978,635.753723,636.030029,55917,High Bullish Sentiment,Distribution,,635.729980,635.753723,636.039978,Resistance,R1,4.0,"[{'id': 'S1', 'type': 'Support', 'low': 635.72...",[],[],"[{'id': 'S1', 'type': 'Support', 'low': 635.72..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
488036,2025-08-21 19:55:00+00:00,SPY,635.669983,635.889893,635.659973,635.850098,407279,Medium-High Bullish Sentiment,Accumulation,635.889893,635.515015,635.659973,635.889893,Resistance,R20,95.0,"[{'id': 'S20', 'type': 'Support', 'low': 635.3...","[{'id': 'R15', 'type': 'Resistance', 'low': 63...","[{'id': 'S1', 'type': 'Support', 'low': 635.72...","[{'id': 'S1', 'type': 'Support', 'low': 635.72..."
488037,2025-08-21 19:56:00+00:00,SPY,635.854980,635.885010,635.669983,635.700012,194769,High Bearish Sentiment,Distribution,635.889893,635.669983,,,,,,"[{'id': 'R20', 'type': 'Resistance', 'low': 63...","[{'id': 'R15', 'type': 'Resistance', 'low': 63...","[{'id': 'S1', 'type': 'Support', 'low': 635.72...","[{'id': 'S1', 'type': 'Support', 'low': 635.72..."
488038,2025-08-21 19:57:00+00:00,SPY,635.710022,635.775024,635.409973,635.409973,315516,Medium-High Bearish Sentiment,Distribution,635.889893,635.409973,,,,,,"[{'id': 'R20', 'type': 'Resistance', 'low': 63...","[{'id': 'R15', 'type': 'Resistance', 'low': 63...","[{'id': 'S1', 'type': 'Support', 'low': 635.72...","[{'id': 'S1', 'type': 'Support', 'low': 635.72..."
488039,2025-08-21 19:58:00+00:00,SPY,635.414978,635.474976,635.145020,635.184998,375316,Medium-High Bearish Sentiment,Distribution,635.889893,635.145020,635.080017,635.474976,Support,S21,98.0,"[{'id': 'S21', 'type': 'Support', 'low': 635.0...","[{'id': 'R15', 'type': 'Resistance', 'low': 63...","[{'id': 'S1', 'type': 'Support', 'low': 635.72...","[{'id': 'S1', 'type': 'Support', 'low': 635.72..."


In [7]:
import cv2, numpy as np, pandas as pd, ast

def _as_list(x):
    if isinstance(x, list): return x
    if isinstance(x, str):
        try: return list(ast.literal_eval(x))
        except Exception: return []
    return [] if x is None else [x]

def simple_draw_with_zones(gen_df, n=100, save_path="last100_zones.png", zone_extend=20, right_padding=0.2):
    """
    Draw candlesticks + supply/demand zones using ZoneLifecycle.
    - Zones are drawn from pivot_idx → end_idx (or extend if still active).
    - Only zones that confirmed (status Active or Invalid after confirmation) are drawn.
    """
    # ---- window + canvas ----
    win = gen_df.tail(n).reset_index(drop=True)
    H, W, margin = 800, 1200, 50

    # compute padded width
    W = int(W * (1 + right_padding))
    img = np.ones((H, W, 3), dtype=np.uint8) * 255

    # absolute positions
    start_pos = len(gen_df) - len(win)
    end_pos   = len(gen_df) - 1

    # y-scale
    pmin, pmax = float(win["Low"].min()), float(win["High"].max())
    if pmax == pmin:
        pmax = pmin + 1e-6
    def y(p):
        return int(margin + (pmax - p) * (H - 2*margin) / (pmax - pmin))

    # x-scale
    step = (W - 2*margin) / (len(win) + zone_extend)
    def x_left(pos):   return int(margin + (pos - start_pos) * step)
    def x_right(pos):  return int(margin + (pos - start_pos + 1) * step)
    def x_center(pos): return int(margin + (pos - start_pos) * step + step/2)
    body_w = max(2, int(step*0.6))

    # ---- draw candles ----
    for i, row in win.iterrows():
        o, c, h, l = row.Open, row.Close, row.High, row.Low
        x = x_center(i)
        col = (0,200,0) if c >= o else (0,0,200)
        cv2.line(img, (x, y(h)), (x, y(l)), col, 2)
        cv2.rectangle(img,
                      (x - body_w//2, y(max(o, c))),
                      (x + body_w//2, y(min(o, c))),
                      col, -1)

    # ---- gather final lifecycle state ----
    lifecycle = {}
    for i in range(len(gen_df)):
        for z in _as_list(gen_df.iloc[i].get("ZoneLifecycle", [])):
            lifecycle[z["id"]] = z  # keep latest record for each zone

    # ---- draw zones ----
    overlay = img.copy()
    for zid, z in lifecycle.items():
        status = z.get("status", "Pending")
        pivot  = z.get("pivot_idx")
        c_at   = z.get("confirmed_at")

        # skip zones that never confirmed
        if c_at is None:
            continue

        if pivot is None or not (start_pos <= pivot <= end_pos):
            continue

        # end index handling
        end_idx = z.get("end_idx")
        if status == "Active" and end_idx is None:
            # still active → extend
            last_pos = end_pos
            active_at_end = True
        else:
            last_pos = end_idx if end_idx is not None else end_pos
            active_at_end = False

        # zone start/end in pixels
        x_start = x_left(pivot)
        x_end   = x_right(last_pos + zone_extend) if active_at_end else x_right(last_pos)

        # bounds
        zlow, zhigh, ztype = z["low"], z["high"], z["type"]
        if pd.isna(zlow) or pd.isna(zhigh):
            continue

        y0, y1 = y(zlow), y(zhigh)
        top, bot = min(y0, y1), max(y0, y1)
        color = (0,0,200) if ztype == "Resistance" else (0,200,0)

        cv2.rectangle(overlay, (x_start, top), (x_end, bot), color, -1)
        cv2.line(overlay, (x_start, top), (x_end, top), (50,50,50), 1)
        cv2.line(overlay, (x_start, bot), (x_end, bot), (50,50,50), 1)
        cv2.putText(overlay, str(zid),
                    (max(margin, x_start+6), max(top-6, margin+12)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, (35,35,35), 2, cv2.LINE_AA)

    # ---- blend + frame ----
    img = cv2.addWeighted(overlay, 0.25, img, 0.75, 0)
    cv2.rectangle(img, (margin, margin), (W - margin, H - margin), (180,180,180), 1)
    cv2.imwrite(save_path, img)


In [8]:
simple_draw_with_zones(gen_df, n=100, save_path="last100_zones.png")


In [9]:
import cv2, numpy as np, pandas as pd, ast

def _as_list(x):
    if isinstance(x, list): return x
    if isinstance(x, str):
        try: return list(ast.literal_eval(x))
        except Exception: return []
    return [] if x is None else [x]

def simple_draw_with_zones(
    gen_df,
    n=100,
    save_path="last100_zones_a.png",
    zone_extend=20,
    right_padding=0.2,
    active_only=True   # 🔑 NEW flag
):
    """
    Draw candlesticks + supply/demand zones using ZoneLifecycle.

    Args:
        active_only (bool): 
            - True → plot only zones with status "Active"
            - False → plot all confirmed zones (Active + Invalidated/Completed)
    """
    # ---- window + canvas ----
    win = gen_df.tail(n).reset_index(drop=True)
    H, W, margin = 800, 1200, 50

    # compute padded width
    W = int(W * (1 + right_padding))
    img = np.ones((H, W, 3), dtype=np.uint8) * 255

    # absolute positions
    start_pos = len(gen_df) - len(win)
    end_pos   = len(gen_df) - 1

    # y-scale
    pmin, pmax = float(win["Low"].min()), float(win["High"].max())
    if pmax == pmin:
        pmax = pmin + 1e-6
    def y(p):
        return int(margin + (pmax - p) * (H - 2*margin) / (pmax - pmin))

    # x-scale
    step = (W - 2*margin) / (len(win) + zone_extend)
    def x_left(pos):   return int(margin + (pos - start_pos) * step)
    def x_right(pos):  return int(margin + (pos - start_pos + 1) * step)
    def x_center(pos): return int(margin + (pos - start_pos) * step + step/2)
    body_w = max(2, int(step*0.6))

    # ---- draw candles ----
    for i, row in win.iterrows():
        o, c, h, l = row.Open, row.Close, row.High, row.Low
        x = x_center(i)
        col = (0,200,0) if c >= o else (0,0,200)
        cv2.line(img, (x, y(h)), (x, y(l)), col, 2)
        cv2.rectangle(img,
                      (x - body_w//2, y(max(o, c))),
                      (x + body_w//2, y(min(o, c))),
                      col, -1)

    # ---- gather final lifecycle state ----
    lifecycle = {}
    for i in range(len(gen_df)):
        for z in _as_list(gen_df.iloc[i].get("ZoneLifecycle", [])):
            lifecycle[z["id"]] = z  # keep latest record for each zone

    # ---- draw zones ----
    overlay = img.copy()
    for zid, z in lifecycle.items():
        status = z.get("status", "Pending")
        pivot  = z.get("pivot_idx")
        c_at   = z.get("confirmed_at")

        # skip zones that never confirmed
        if c_at is None:
            continue

        # skip inactive if only active requested
        if active_only and status != "Active":
            continue

        if pivot is None or not (start_pos <= pivot <= end_pos):
            continue

        # end index handling
        end_idx = z.get("end_idx")
        if status == "Active" and end_idx is None:
            # still active → extend
            last_pos = end_pos
            active_at_end = True
        else:
            last_pos = end_idx if end_idx is not None else end_pos
            active_at_end = False

        # zone start/end in pixels
        x_start = x_left(pivot)
        x_end   = x_right(last_pos + zone_extend) if active_at_end else x_right(last_pos)

        # bounds
        zlow, zhigh, ztype = z["low"], z["high"], z["type"]
        if pd.isna(zlow) or pd.isna(zhigh):
            continue

        y0, y1 = y(zlow), y(zhigh)
        top, bot = min(y0, y1), max(y0, y1)
        color = (0,0,200) if ztype == "Resistance" else (0,200,0)

        cv2.rectangle(overlay, (x_start, top), (x_end, bot), color, -1)
        cv2.line(overlay, (x_start, top), (x_end, top), (50,50,50), 1)
        cv2.line(overlay, (x_start, bot), (x_end, bot), (50,50,50), 1)
        cv2.putText(overlay, str(zid),
                    (max(margin, x_start+6), max(top-6, margin+12)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, (35,35,35), 2, cv2.LINE_AA)

    # ---- blend + frame ----
    img = cv2.addWeighted(overlay, 0.25, img, 0.75, 0)
    cv2.rectangle(img, (margin, margin), (W - margin, H - margin), (180,180,180), 1)
    cv2.imwrite(save_path, img)


In [10]:
simple_draw_with_zones(gen_df, active_only=True)   # only current active zones

In [13]:
import cv2, numpy as np, pandas as pd, ast

def _as_list(x):
    if isinstance(x, list): return x
    if isinstance(x, str):
        try: return list(ast.literal_eval(x))
        except Exception: return []
    return [] if x is None else [x]

def simple_draw_with_zones_line(gen_df, n=100, save_path="last100_zones_line.png", zone_extend=20, right_padding=0.2):
    """
    Draw a price line + supply/demand zones using ZoneLifecycle.
    - Zones are drawn from pivot_idx → end_idx (or extended if still active).
    - Only zones that confirmed (status Active or Invalid after confirmation) are drawn.
    """
    # ---- window + canvas ----
    win = gen_df.tail(n).reset_index(drop=True)
    H, W, margin = 800, 1200, 50
    W = int(W * (1 + right_padding))
    img = np.ones((H, W, 3), dtype=np.uint8) * 255

    start_pos = len(gen_df) - len(win)
    end_pos   = len(gen_df) - 1

    # y-scale
    pmin, pmax = float(win["Low"].min()), float(win["High"].max())
    if pmax == pmin:
        pmax = pmin + 1e-6
    def y(p):
        return int(margin + (pmax - p) * (H - 2*margin) / (pmax - pmin))

    # x-scale
    step = (W - 2*margin) / (len(win) + zone_extend)
    def x_center(pos): return int(margin + (pos - start_pos) * step + step/2)

    # ---- draw price line ----
    for i in range(1, len(win)):
        x1, y1 = x_center(i-1), y(win.iloc[i-1].Close)
        x2, y2 = x_center(i), y(win.iloc[i].Close)
        cv2.line(img, (x1, y1), (x2, y2), (0, 0, 0), 2)

    # ---- gather final lifecycle state ----
    lifecycle = {}
    for i in range(len(gen_df)):
        for z in _as_list(gen_df.iloc[i].get("ZoneLifecycle", [])):
            lifecycle[z["id"]] = z  # keep latest state

    # ---- draw zones ----
    overlay = img.copy()
    for zid, z in lifecycle.items():
        status = z.get("status", "Pending")
        pivot  = z.get("pivot_idx")
        c_at   = z.get("confirmed_at")
        if c_at is None or pivot is None or not (start_pos <= pivot <= end_pos):
            continue

        end_idx = z.get("end_idx")
        if status == "Active" and end_idx is None:
            last_pos = end_pos
            active_at_end = True
        else:
            last_pos = end_idx if end_idx is not None else end_pos
            active_at_end = False

        # zone start/end in pixels
        x_start = x_center(pivot)
        x_end = x_center(last_pos + zone_extend) if active_at_end else x_center(last_pos)

        # bounds
        zlow, zhigh, ztype = z["low"], z["high"], z["type"]
        if pd.isna(zlow) or pd.isna(zhigh):
            continue

        y0, y1 = y(zlow), y(zhigh)
        top, bot = min(y0, y1), max(y0, y1)
        color = (0,0,200) if ztype == "Resistance" else (0,200,0)

        cv2.rectangle(overlay, (x_start, top), (x_end, bot), color, -1)
        cv2.line(overlay, (x_start, top), (x_end, top), (50,50,50), 1)
        cv2.line(overlay, (x_start, bot), (x_end, bot), (50,50,50), 1)
        cv2.putText(overlay, str(zid),
                    (max(margin, x_start+6), max(top-6, margin+12)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, (35,35,35), 2, cv2.LINE_AA)

    # ---- blend + frame ----
    img = cv2.addWeighted(overlay, 0.25, img, 0.75, 0)
    cv2.rectangle(img, (margin, margin), (W - margin, H - margin), (180,180,180), 1)
    cv2.imwrite(save_path, img)


In [14]:
simple_draw_with_zones_line(gen_df)   # only current active zones