In [None]:
import subprocess
import tempfile
from pathlib import Path

render_dir = Path("outputs")
fps = 10

# Get all camera directories
camera_dirs = sorted([d for d in render_dir.iterdir() if d.is_dir()])
print(f"Found {len(camera_dirs)} camera directories:")
for cam_dir in camera_dirs:
    print(f"  - {cam_dir.name}")

In [None]:
# Concatenate camera images into individual videos

for camera_dir in camera_dirs:
    camera_name = camera_dir.name

    # Get all jpg images in camera directory and sort by timestamp
    image_path_list = sorted(camera_dir.glob("*.jpg"), key=lambda x: int(x.stem))

    if not image_path_list:
        print(f"Warning: No images found in {camera_dir}")
        continue

    print(f"Processing {camera_name}: {len(image_path_list)} frames")

    video_path = render_dir / f"{camera_name}.mp4"

    with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", dir=Path(".").absolute()) as tmp_file:
        for img_path in image_path_list:
            tmp_file.write(f"file '{img_path.absolute()}'\n")
        tmp_file.flush()

        tmp_file_path = tmp_file.name

        # fmt: off
        subprocess.run([
            "ffmpeg", "-y", "-r", str(fps), "-f", "concat", "-safe", "0", "-i", tmp_file_path,
            "-c:v", "libx264", "-pix_fmt", "yuv420p", str(video_path)
        ], check=True)
        # fmt: on

    print(f"  -> Created {video_path.name}")

print("\nAll videos created successfully!")

In [None]:
import subprocess


def concat_videos_grid(layout, video_dict, output_file, width=1920, height=1080, scale_output=1.0):
    """
    Concatenate videos in a grid layout with black padding for empty spaces

    Args:
        layout: Video layout list (list of lists), each row can have different number of videos
                Empty string "" indicates a blank space that will be filled with black padding
        video_dict: Dictionary mapping camera names to video paths
        output_file: Output file path
        width: Single video width
        height: Single video height
        scale_output: Scale factor for output (e.g., 0.5 for half resolution)
    """

    rows = len(layout)
    max_cols = max(len(row) for row in layout)

    input_files = []
    flat_layout = []

    # Build input file list (skip empty strings)
    for row in layout:
        for key in row:
            if key:  # Skip empty strings
                flat_layout.append(key)
                input_files.extend(["-i", video_dict[key]])

    # Build filter chain
    filter_parts = []

    # Step 1: scale all inputs
    for i in range(len(flat_layout)):
        filter_parts.append(f"[{i}:v]scale={width}:{height}[{i}s]")

    # Step 2: build each row with padding for empty slots
    video_idx = 0

    for row_idx, row in enumerate(layout):
        row_inputs = []

        for col_idx, key in enumerate(row):
            if key:  # Has video
                row_inputs.append(f"[{video_idx}s]")
                video_idx += 1
            else:  # Empty slot - use black padding
                # Create black video for this slot
                filter_parts.append(f"color=black:s={width}x{height}:d=1[row{row_idx}_col{col_idx}]")
                row_inputs.append(f"[row{row_idx}_col{col_idx}]")

        # Concatenate all inputs in this row horizontally
        row_inputs_str = "".join(row_inputs)
        filter_parts.append(f"{row_inputs_str}hstack=inputs={len(row)}[row{row_idx}]")

    # Step 3: build vstack (all rows)
    row_pads = "".join([f"[row{i}]" for i in range(rows)])

    # Apply output scaling if needed
    if scale_output != 1.0:
        output_width = int(width * max_cols * scale_output)
        output_height = int(height * rows * scale_output)
        vstack_cmd = (
            f"{row_pads}vstack=inputs={rows}[intermediate];[intermediate]scale={output_width}:{output_height}[out]"
        )
    else:
        vstack_cmd = f"{row_pads}vstack=inputs={rows}[out]"

    # Merge all filters (separated by ;)
    filter_complex = ";".join(filter_parts) + ";" + vstack_cmd

    print(f"Grid size: {rows} rows, max {max_cols} columns")
    print(f"Output resolution: {int(width * max_cols * scale_output)}x{int(height * rows * scale_output)}")
    print(f"Filter chain: {filter_complex}\n")

    # Build ffmpeg command with improved settings
    cmd = (
        ["ffmpeg"]
        + input_files
        + [
            "-filter_complex",
            filter_complex,
            "-map",
            "[out]",
            # Video codec settings
            "-c:v",
            "libx264",
            "-preset",
            "medium",
            "-crf",
            "23",
            # Frame rate synchronization
            "-vsync",
            "vfr",
            "-r",
            "10",
            # Pixel format
            "-pix_fmt",
            "yuv420p",
            # Overwrite output
            "-y",
            output_file,
        ]
    )

    print(
        f"Executing command:\nffmpeg {' '.join(input_files)} -filter_complex '{filter_complex}' -map '[out]' -c:v libx264 -preset medium -crf 23 -vsync vfr -r 10 -pix_fmt yuv420p -y {output_file}\n"
    )

    subprocess.run(cmd, check=True)
    print(f"\nâœ“ Video concatenation complete: {output_file}")


# Define grid layout with varying number of videos per row
# Empty string "" indicates blank space (will be filled with black padding)
layout = [
    ["", "camera_front_tele_30fov", ""],  # Centered with black padding on both sides
    ["camera_cross_left_120fov", "camera_front_wide_120fov", "camera_cross_right_120fov"],  # 3 videos
    ["camera_rear_left_70fov", "", "camera_rear_right_70fov"],  # 2 videos with black padding in middle
]

# Build video dictionary from existing video files
videos = {}
for cam_dir in camera_dirs:
    video_path = render_dir / f"{cam_dir.name}.mp4"
    if video_path.exists():
        videos[cam_dir.name] = str(video_path)
    else:
        print(f"Warning: Video not found for {cam_dir.name}")

# Verify all videos in layout exist (ignore empty strings)
missing = [k for row in layout for k in row if k and k not in videos]
if missing:
    print(f"Error: Missing videos for cameras: {missing}")
else:
    output_file = str(render_dir / "merge.mp4")
    # Scale output to 0.5 to reduce resolution from 11520x6480 to 5760x3240
    # Set to 1.0 for full resolution (11520x6480)
    # Set to 0.25 for even smaller (2880x1620)
    concat_videos_grid(layout, videos, output_file, width=3840, height=2160, scale_output=0.5)