In [1]:
import sys; sys.path.append('../')
from utils.load_ticker import load_ticker
from utils.clean_data import clean_data

df = load_ticker(
    base_dir = "../../parquet_minute/", 
    time_col = "Date",
    symbol_col="Symbol",
    seed = 42, 
    symbol= "SPY",
    verbose=False
)

df_1m = clean_data(
    df=df,
    timestamp_col = "Date",
    symbol_col = "Symbol",
    drop_duplicate_rows = True,
    drop_duplicate_cols = True,
    drop_constant_columns = True,
    drop_constant_rows = True,
    replace_placeholders = True,
    placeholders=("Null", "null", "NULL", "NaN", "nan", "NAN", "None", "none", "NONE"),
    fill_missing = True,
    convert_numeric = True,
    sort_by = "timestamp",
    verbose = False,
)

# df_1m = df_1m.tail(100000)

In [2]:
import pandas as pd

import os

output_path = "resampled_data"
os.makedirs(output_path, exist_ok=True)


df_1m["timestamp"] = pd.to_datetime(df_1m["Date"], utc=True)
df_1m = df_1m.drop_duplicates(subset="timestamp")
df_1m = df_1m.set_index("timestamp").sort_index()


df_1m.to_csv(os.path.join(output_path, "ohlcv_1m.csv"))

def resample_ohlc(df, rule):
    required_cols = {"Open", "High", "Low", "Close", "Volume"}
    if not required_cols.issubset(df.columns):
        raise ValueError(f"Missing columns: {required_cols - set(df.columns)}")
    
    df_resampled = df.resample(rule, label="right", closed="right").agg({
        "Open": "first",
        "High": "max",
        "Low": "min",
        "Close": "last",
        "Volume": "sum"
    }).dropna(how="any")
    
    return df_resampled

TIMEFRAMES = {
    "3m": "3min",
    "5m": "5min",
    "15m": "15min",
    "1h": "1h",
    "4h": "4h",
    "1d": "1d"
}

resampled_dfs = {
    tf: resample_ohlc(df_1m, rule) for tf, rule in TIMEFRAMES.items()
}

for tf, df in resampled_dfs.items():
    df.to_csv(os.path.join(output_path, f"ohlcv_{tf}.csv"))

In [None]:
# detect_levels.py
import os
import pandas as pd
from StockMarketComputerVisionResearch.new.new_old_now.detect_break_levels import detect_break_levels

# Input/output paths
input_path = "resampled_data"
level_output_path = "break_level_data"
os.makedirs(level_output_path, exist_ok=True)

for file in os.listdir(input_path):
    if file.endswith(".csv") and file.startswith("ohlcv_"):
        tf = file.replace("ohlcv_", "").replace(".csv", "")
        df = pd.read_csv(os.path.join(input_path, file), index_col=0, parse_dates=True)

        break_levels = detect_break_levels(df)
        break_levels.to_csv(os.path.join(level_output_path, f"break_levels_{tf}.csv"), index=False)

        print(f"✅ Levels saved for {tf}: {len(break_levels)} levels")


✅ Levels saved for 15m: 5581 levels
✅ Levels saved for 1h: 1440 levels
✅ Levels saved for 1m: 80877 levels
✅ Levels saved for 3m: 26909 levels
✅ Levels saved for 1d: 207 levels
✅ Levels saved for 5m: 16248 levels
✅ Levels saved for 4h: 477 levels


In [None]:
# track_breaks.py
import os
import pandas as pd
from StockMarketComputerVisionResearch.new.new_old_now.detect_break_levels import track_break_events  # NEW FUNCTION

input_path = "resampled_data"
level_input_path = "break_level_data"
event_output_path = "break_event_data"
os.makedirs(event_output_path, exist_ok=True)

for file in os.listdir(input_path):
    if file.endswith(".csv") and file.startswith("ohlcv_"):
        tf = file.replace("ohlcv_", "").replace(".csv", "")
        
        df = pd.read_csv(os.path.join(input_path, file), index_col=0, parse_dates=True)
        levels_df_path = os.path.join(level_input_path, f"break_levels_{tf}.csv")
        
        if not os.path.exists(levels_df_path):
            print(f"⚠️ Skipping {tf} — no levels file found.")
            continue
        
        levels_df = pd.read_csv(levels_df_path, parse_dates=["level_time"])
        
        # ✅ Use the STACK version
        break_events = track_break_events(df, levels_df)
        
        break_events.to_csv(
            os.path.join(event_output_path, f"break_events_{tf}.csv"),
            index=False
        )

        print(f"✅ Breaks saved for {tf}: {len(break_events)} events")


✅ Breaks saved for 15m: 5441 events
✅ Breaks saved for 1h: 1364 events
✅ Breaks saved for 1m: 80362 events
✅ Breaks saved for 3m: 26620 events
✅ Breaks saved for 1d: 187 events
✅ Breaks saved for 5m: 16001 events
✅ Breaks saved for 4h: 435 events


In [None]:
import os
from StockMarketComputerVisionResearch.new.new_old_now.load_all_structure_data import load_all_structure_data
from StockMarketComputerVisionResearch.new.new_old_now.generate_images import generate_images


# Load everything
candle_data, break_levels, break_events = load_all_structure_data(
    resampled_dir="resampled_data",
    level_dir="break_level_data",
    event_dir="break_event_data"
)

# Output folder for images
output_dir = "dataset"

label_to_id = {
    "price": 0,
    "support": 1,
    "resistance": 2
}

# Generate break level visualizations
generate_images(
    candle_data,
    break_levels,
    break_events,
    output_root="dataset",
    image_size=(640, 640),
    dot_radius= 2
)





✅ Loaded timeframes: ['15m', '1d', '1h', '1m', '3m', '4h', '5m']
🧠 Using base TF: 15m, Rendering 33792 frames
🖼️ Rendered 0/33792 frames
🖼️ Rendered 100/33792 frames
🖼️ Rendered 200/33792 frames
🖼️ Rendered 300/33792 frames
🖼️ Rendered 400/33792 frames
🖼️ Rendered 500/33792 frames
🖼️ Rendered 600/33792 frames
🖼️ Rendered 700/33792 frames
🖼️ Rendered 800/33792 frames


KeyboardInterrupt: 

In [6]:
import cv2
import os
import glob
import numpy as np
from datetime import datetime

def export_tf_video(
    root_dir="./dataset",
    timeframes_top=("1m", "3m", "5m", "15m"),
    timeframes_bottom=("1h", "4h", "1d"),
    output_file=None,
    fps=10,
    scale=0.7,
):
    all_timeframes = timeframes_top + timeframes_bottom

    if output_file is None:
        ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        output_file = f"{root_dir}/videos/tf_{ts}.mp4"

    os.makedirs(os.path.dirname(output_file), exist_ok=True)

    files = {
        tf: sorted(glob.glob(os.path.join(root_dir, tf, "images", "*.png")))
        for tf in all_timeframes
    }
    num_frames = min(len(lst) for lst in files.values() if lst)

    if num_frames == 0:
        raise ValueError("❌ No frames found in one or more timeframe folders!")

    # Read one sample to determine base height
    sample = cv2.imread(files[all_timeframes[0]][0])
    if sample is None:
        raise ValueError("❌ Could not load sample image.")

    base_height = int(sample.shape[0] * scale)
    title_height = 30
    chart_height = base_height + title_height

    # Frame processing with title and black border
    def process_frame(path, label):
        img = cv2.imread(path)
        if img is None:
            return None
        if scale != 1.0:
            img = cv2.resize(img, (int(img.shape[1] * scale), int(img.shape[0] * scale)))
        h, w = img.shape[:2]
        img = cv2.resize(img, (int(w * base_height / h), base_height))

        # Title bar
        title_bar = np.full((title_height, img.shape[1], 3), 255, dtype=np.uint8)
        font = cv2.FONT_HERSHEY_SIMPLEX
        text_size = cv2.getTextSize(label, font, 0.6, 1)[0]
        text_x = (title_bar.shape[1] - text_size[0]) // 2
        text_y = (title_height + text_size[1]) // 2
        cv2.putText(title_bar, label, (text_x, text_y), font, 0.6, (0, 0, 0), 1)

        # Stack and draw border
        combined = np.vstack([title_bar, img])
        cv2.rectangle(combined, (0, 0), (combined.shape[1] - 1, combined.shape[0] - 1), (0, 0, 0), 1)
        return combined

    # Determine output width
    top_width = sum(process_frame(files[tf][0], tf).shape[1] for tf in timeframes_top)
    bot_width = sum(process_frame(files[tf][0], tf).shape[1] for tf in timeframes_bottom)
    out_width = max(top_width, bot_width)
    out_height = chart_height * 2

    # Init video writer
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_file, fourcc, fps, (out_width, out_height))

    # Frame loop
    for i in range(num_frames):
        row_imgs = []
        for tf_group in [timeframes_top, timeframes_bottom]:
            frames = []
            for tf in tf_group:
                img = process_frame(files[tf][i], tf)
                if img is not None:
                    frames.append(img)
            if frames:
                row = np.hstack(frames)
                if row.shape[1] < out_width:
                    pad = np.full((chart_height, out_width - row.shape[1], 3), 255, dtype=np.uint8)
                    row = np.hstack([row, pad])
                row_imgs.append(row)

        full_frame = np.vstack(row_imgs)
        out.write(full_frame)

    out.release()
    print(f"✅ Video saved: {output_file}")


# ✅ Run this cell in Jupyter
export_tf_video(fps=10, scale=0.6)


✅ Video saved: ./dataset/videos/tf_2025-09-23_07-03-08.mp4
