In [1]:
import pandas as pd

# --- Load data ---
df_1m = pd.read_csv("./data/agg_data/fx/C:EURUSD_1m_last1y.csv", parse_dates=["timestamp"])


In [2]:
import pandas as pd
from pathlib import Path
from collections import deque
from tqdm import tqdm
import os
import csv

from pipeline.aggregator import TimeframeAggregator
from pipeline.zone_engine import ZoneEngine
from pipeline.generate_image import ImageGenerator

# --- Config ---
csv_path = "./data/agg_data/fx/C:EURUSD_1m_last1y.csv"
save_dir = Path("./dataset")
timeframes = ["1m", "3m", "5m", "15m", "1h", "4h", "1d"]
candle_limits = {tf: 60 for tf in timeframes}
image_size = (640, 640)
preload_days = 1

# --- Setup ---
df = pd.read_csv(csv_path, parse_dates=["timestamp"])
buffer = deque(maxlen=10000)
aggregator = TimeframeAggregator(buffer)
zone_engine = ZoneEngine(timeframes)
image_gen = ImageGenerator(candle_limits, image_size=image_size)

# --- Preload period ---
preload_minutes = preload_days * 24 * 60
pbar = tqdm(total=preload_minutes, desc=f"Preloading {preload_days} days", unit="min")

for i in range(preload_minutes):
    if i >= len(df):
        break
    bar = df.iloc[i].to_dict()
    buffer.append(bar)
    resampled = aggregator.resample_all(timeframes)

    for tf in timeframes:
        df_tf = resampled[tf]
        if len(df_tf) < candle_limits[tf]:
            continue

        last_bar = df_tf.iloc[-1].to_dict()
        zone_engine.update(tf, last_bar)  # alignment handled inside ZoneEngine

    pbar.update(1)
pbar.close()
print(f"[INFO] Preload complete after {preload_days} days. Starting image generation.")

# --- Prepare meta.csv ---
meta_path = save_dir / "meta.csv"
os.makedirs(save_dir, exist_ok=True)

# Build header dynamically
header = ["timestamp", "close"]
for tf in timeframes:
    header.append(f"{tf}_img")
    header.append(f"{tf}_lbl")

with open(meta_path, "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=header)
    writer.writeheader()

# --- Main loop ---
for i in tqdm(range(preload_minutes, len(df))):
    bar = df.iloc[i].to_dict()
    buffer.append(bar)

    resampled = aggregator.resample_all(timeframes)
    timestamp = pd.to_datetime(df.iloc[i]["timestamp"])
    close_price = df.iloc[i]["close"]

    tf_image_paths = {}

    for tf in timeframes:
        df_tf = resampled[tf]
        if len(df_tf) < candle_limits[tf]:
            continue

        last_candle = df_tf.iloc[-1]
        last_ts = pd.to_datetime(last_candle["timestamp"])

        # Align to tf granularity
        tf_minutes = zone_engine._parse_tf_to_minutes(tf)
        aligned_ts = timestamp.floor(f"{tf_minutes}min")

        # Only update when both dataset and resampled candle are aligned
        if last_ts == aligned_ts and timestamp == aligned_ts:
            zone_engine.update(tf, last_candle.to_dict())

        # Always fetch zones (may be stale if no new update yet)
        zones = zone_engine.get_visible_zones(tf)

        # Paths
        img_path = save_dir / "images" / tf / f"{timestamp}.png"
        lbl_path = save_dir / "labels" / tf / f"{timestamp}.txt"

        # Generate image + labels
        image_gen.generate_image(tf, df_tf.tail(candle_limits[tf]), zones, str(img_path))

        tf_image_paths[f"{tf}_img"] = str(img_path)
        tf_image_paths[f"{tf}_lbl"] = str(lbl_path)

    # Write one row per 1m bar
    if tf_image_paths:
        row = {"timestamp": timestamp, "close": close_price}
        row.update(tf_image_paths)

        with open(meta_path, "a", newline="") as f:
            writer = csv.DictWriter(f, fieldnames=header)
            writer.writerow(row)

print(f"[DONE] Meta written incrementally to {meta_path}")




Preloading 1 days: 100%|██████████| 1440/1440 [00:12<00:00, 117.55min/s]


[INFO] Preload complete after 1 days. Starting image generation.


  1%|          | 1958/366867 [01:04<3:20:03, 30.40it/s]


KeyboardInterrupt: 