In [None]:
import sys; sys.path.append('../')
from utils.load_ticker import load_ticker
from utils.clean_data import clean_data


df = load_ticker(
    base_dir = "../../parquet_minute/", 
    time_col = "Date",
    symbol_col="Symbol",
    seed = 42, 
    symbol= "SPY",
    verbose=False
)


df_1m = clean_data(
    df=df,
    timestamp_col = "Date",
    symbol_col = "Symbol",
    drop_duplicate_rows = True,
    drop_duplicate_cols = True,
    drop_constant_columns = True,
    drop_constant_rows = True,
    replace_placeholders = True,
    placeholders=("Null", "null", "NULL", "NaN", "nan", "NAN", "None", "none", "NONE"),
    fill_missing = True,
    convert_numeric = True,
    sort_by = "timestamp",
    verbose = False,
)


df_1m = df_1m.tail(10000)

In [None]:
from image_generation import MultiTimeframeImageGen
import pandas as pd
from tqdm import tqdm

# --- prep dataframe ---
df_1m['Date'] = pd.to_datetime(df_1m['Date'])
df_1m = df_1m.set_index('Date').sort_index()

# --- init ---
gen = MultiTimeframeImageGen(
    ["1m", "3m", "5m", "15m", "1h", "4h", "1d"],
    base_tf="1m",
    window_size=1 * 24 * 60,
    output_root="dataset"
)

# # --- preload only ---
# preload_df = df_1m.iloc[:gen.window_size]
# for ts, row in tqdm(preload_df.iterrows(),
#                     total=len(preload_df),
#                     desc="Preloading"):
#     row.name = ts
#     gen.get_last(row, preload=True)  # no images

# --- generate after preload ---
generate_df = df_1m.iloc[gen.window_size:]
for ts, row in tqdm(generate_df.iterrows(),
                    total=len(generate_df),
                    desc="Generating images"):
    row.name = ts
    gen.get_last(row, preload=False)  # saves images


In [None]:
import cv2
import os
import glob
import numpy as np
from datetime import datetime

def export_tf_video(
    root_dir="./dataset",
    timeframes_top=("1m", "3m", "5m", "15m"),
    timeframes_bottom=("1h", "4h", "1d"),
    output_file=None,
    fps=10,
    scale=0.7,
):
    all_timeframes = timeframes_top + timeframes_bottom

    if output_file is None:
        ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        output_file = f"{root_dir}/videos/tf_{ts}.mp4"

    os.makedirs(os.path.dirname(output_file), exist_ok=True)

    files = {
        tf: sorted(glob.glob(os.path.join(root_dir, tf, "images", "*.png")))
        for tf in all_timeframes
    }
    num_frames = min(len(lst) for lst in files.values() if lst)

    if num_frames == 0:
        raise ValueError("No frames found in one or more timeframe folders!")

    # Read one sample to determine base height
    sample = cv2.imread(files[all_timeframes[0]][0])
    if sample is None:
        raise ValueError("Could not load sample image.")

    base_height = int(sample.shape[0] * scale)
    title_height = 30
    chart_height = base_height + title_height

    # Frame processing with title and black border
    def process_frame(path, label):
        img = cv2.imread(path)
        if img is None:
            return None
        if scale != 1.0:
            img = cv2.resize(img, (int(img.shape[1] * scale), int(img.shape[0] * scale)))
        h, w = img.shape[:2]
        img = cv2.resize(img, (int(w * base_height / h), base_height))

        # Title bar
        title_bar = np.full((title_height, img.shape[1], 3), 255, dtype=np.uint8)
        font = cv2.FONT_HERSHEY_SIMPLEX
        text_size = cv2.getTextSize(label, font, 0.6, 1)[0]
        text_x = (title_bar.shape[1] - text_size[0]) // 2
        text_y = (title_height + text_size[1]) // 2
        cv2.putText(title_bar, label, (text_x, text_y), font, 0.6, (0, 0, 0), 1)

        # Stack and draw border
        combined = np.vstack([title_bar, img])
        cv2.rectangle(combined, (0, 0), (combined.shape[1] - 1, combined.shape[0] - 1), (0, 0, 0), 1)
        return combined

    # Determine output width
    top_width = sum(process_frame(files[tf][0], tf).shape[1] for tf in timeframes_top)
    bot_width = sum(process_frame(files[tf][0], tf).shape[1] for tf in timeframes_bottom)
    out_width = max(top_width, bot_width)
    out_height = chart_height * 2

    # Init video writer
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_file, fourcc, fps, (out_width, out_height))

    # Frame loop
    for i in range(num_frames):
        row_imgs = []
        for tf_group in [timeframes_top, timeframes_bottom]:
            frames = []
            for tf in tf_group:
                img = process_frame(files[tf][i], tf)
                if img is not None:
                    frames.append(img)
            if frames:
                row = np.hstack(frames)
                if row.shape[1] < out_width:
                    pad = np.full((chart_height, out_width - row.shape[1], 3), 255, dtype=np.uint8)
                    row = np.hstack([row, pad])
                row_imgs.append(row)

        full_frame = np.vstack(row_imgs)
        out.write(full_frame)

    out.release()
    print(f"✅ Video saved: {output_file}")


# ✅ Run this cell in Jupyter
export_tf_video(fps=10, scale=0.6)
