In [1]:
import sys; sys.path.append('../')
import pandas as pd

df_1m = pd.read_parquet("./data/btc_1min_clean_2018_2025H1.parquet")
# df_1m = df_1m.tail(1000000)

In [2]:
df_1m

Unnamed: 0,window_start,ticker,volume,open,close,high,low,transactions
0,2018-01-01 00:00:00+00:00,X:BTC-USD,24.247324,13769.00,13841.01,13994.40,13745.65,177
1,2018-01-01 00:01:00+00:00,X:BTC-USD,37.126923,13994.40,13879.73,14013.80,13741.00,210
2,2018-01-01 00:02:00+00:00,X:BTC-USD,33.290021,13772.00,14011.00,14052.30,13741.00,203
3,2018-01-01 00:03:00+00:00,X:BTC-USD,53.375096,13741.00,13822.39,14000.40,13712.98,461
4,2018-01-01 00:04:00+00:00,X:BTC-USD,24.575963,13823.19,13803.20,13999.00,13678.60,288
...,...,...,...,...,...,...,...,...
3941276,2025-06-29 23:56:00+00:00,X:BTC-USD,3.136182,108436.49,108396.24,108452.81,108359.50,244
3941277,2025-06-29 23:57:00+00:00,X:BTC-USD,1.348653,108396.24,108411.22,108413.54,108370.00,165
3941278,2025-06-29 23:58:00+00:00,X:BTC-USD,0.874252,108411.21,108389.07,108411.22,108386.34,137
3941279,2025-06-29 23:59:00+00:00,X:BTC-USD,0.416727,108389.07,108387.20,108389.07,108386.41,139


In [3]:
# rename all columns to TitleCase
df_1m.columns = [col.capitalize() for col in df_1m.columns]

# also rename window_start → Date
df_1m = df_1m.rename(columns={"Window_start": "Date"})


In [4]:
from image_generation import MultiTimeframeImageGen
import pandas as pd
from tqdm import tqdm

# --- prep dataframe ---
df_1m['Date'] = pd.to_datetime(df_1m['Date'])
df_1m = df_1m.set_index('Date').sort_index()

# --- init ---
gen = MultiTimeframeImageGen(
    timeframes=["1m", "3m", "5m", "15m", "1h", "4h", "1d"],
    base_tf="1m",
    output_root="./dataset",
    window_sizes={
        "1m": 60,   
        "3m": 60,   
        "5m": 60,   
        "15m":60,   
        "1h": 60,  
        "4h": 60,  
        "1d": 60 
    }
)

# --- preload ---
minutes_needed = max(
    gen.window_sizes[tf] * gen.tf_to_minutes[tf]
    for tf in gen.timeframes
)
preload_df = df_1m.iloc[:minutes_needed]
for ts, row in tqdm(preload_df.iterrows(), total=len(preload_df), desc="Preloading"):
    row.name = ts
    gen.get_last(row, preload=True)

# --- generate ---
generate_df = df_1m.iloc[minutes_needed:]
for ts, row in tqdm(generate_df.iterrows(), total=len(generate_df), desc="Generating images"):
    row.name = ts
    gen.get_last(row, preload=False)


Preloading: 100%|██████████| 86400/86400 [17:12<00:00, 83.64it/s]
Generating images:   7%|▋         | 250601/3854881 [3:19:07<47:43:54, 20.98it/s]


KeyboardInterrupt: 

In [1]:
import cv2
import os
import glob
import numpy as np
from datetime import datetime

def export_tf_video(
    root_dir="./dataset",
    timeframes_top=("1m", "3m", "5m", "15m"),
    timeframes_bottom=("1h", "4h", "1d"),
    output_file=None,
    fps=10,
    scale=0.7,
):
    all_timeframes = timeframes_top + timeframes_bottom

    if output_file is None:
        ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        output_file = f"{root_dir}/videos/tf_{ts}.mp4"

    os.makedirs(os.path.dirname(output_file), exist_ok=True)

    # Collect files for each timeframe
    files = {
        tf: sorted(glob.glob(os.path.join(root_dir, tf, "images", "*.png")))
        for tf in all_timeframes
    }

    # Require all timeframes to have frames
    if not all(len(lst) > 0 for lst in files.values()):
        raise ValueError("⚠️ One or more timeframe folders are empty!")

    num_frames = min(len(lst) for lst in files.values())

    # Read one sample to set base sizes
    sample = cv2.imread(files[all_timeframes[0]][0])
    if sample is None:
        raise ValueError("⚠️ Could not load sample image.")

    base_height = int(sample.shape[0] * scale)
    title_height = 30
    chart_height = base_height + title_height

    def process_frame(path, label):
        img = cv2.imread(path)
        if img is None:
            return None

        # Scale consistently to target height
        h, w = img.shape[:2]
        new_w = int(w * base_height / h)
        img = cv2.resize(img, (new_w, base_height))

        # Title bar
        title_bar = np.full((title_height, img.shape[1], 3), 255, dtype=np.uint8)
        font = cv2.FONT_HERSHEY_SIMPLEX
        text_size = cv2.getTextSize(label, font, 0.6, 1)[0]
        text_x = (title_bar.shape[1] - text_size[0]) // 2
        text_y = (title_height + text_size[1]) // 2 - 3  # better centering
        cv2.putText(title_bar, label, (text_x, text_y), font, 0.6, (0, 0, 0), 1)

        # Stack and border
        combined = np.vstack([title_bar, img])
        cv2.rectangle(combined, (0, 0), (combined.shape[1] - 1, combined.shape[0] - 1), (0, 0, 0), 1)
        return combined

    # Determine output canvas size
    top_width = sum(process_frame(files[tf][0], tf).shape[1] for tf in timeframes_top)
    bot_width = sum(process_frame(files[tf][0], tf).shape[1] for tf in timeframes_bottom)
    out_width = max(top_width, bot_width)
    out_height = chart_height * 2

    # Init video writer
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_file, fourcc, fps, (out_width, out_height))

    # Frame loop
    for i in range(num_frames):
        row_imgs = []
        for tf_group in [timeframes_top, timeframes_bottom]:
            frames = []
            for tf in tf_group:
                img = process_frame(files[tf][i], tf)
                if img is not None:
                    frames.append(img)
            if frames:
                row = np.hstack(frames)
                # Pad to full width if needed
                if row.shape[1] < out_width:
                    pad = np.full((chart_height, out_width - row.shape[1], 3), 255, dtype=np.uint8)
                    row = np.hstack([row, pad])
                row_imgs.append(row)

        # Stack rows into final frame
        full_frame = np.vstack(row_imgs)
        out.write(full_frame)

    out.release()
    print(f"✅ Video saved: {output_file}")


# Example run
export_tf_video(fps=10, scale=0.6)


✅ Video saved: ./dataset/videos/tf_2025-09-24_20-32-36.mp4
