In [3]:
import os
import cv2

def get_output_filename(output_dir, frames_dir, name):
    """
    Returns an output filename of the form:
        <grandparent_folder_basename>_<name>.mp4

    Example:
        frames_dir = "/some/path/frames"
        name = "result"
        => output: "some_result.mp4"
    """
    # 1) Find the parent of frames_dir
    parent = os.path.dirname(os.path.abspath(frames_dir))
    # 2) Find the grandparent of frames_dir
    # grandparent = os.path.dirname(parent)
    # 3) Extract only the basename (e.g. 'some' in the above example)
    # grandparent_basename = os.path.basename(grandparent)
    parent = os.path.basename(parent)

    # 4) Join them with an underscore
    out_name = f"{output_dir}/{parent}_{name}.mp4"
    return out_name


def visualize_predictions_to_video(
    output_dir,
    frames_dir,
    prediction_txt,
    fps,
    name="result"
):
    """
    Creates a video with bounding boxes overlaid on the frames.

    Args:
        frames_dir (str):
            Path to directory containing frames named "00000001.jpg", etc.
        prediction_txt (str):
            Path to a text file where each line is:
              <frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, ...
        fps (int or float):
            Frames per second for the output video.
        name (str):
            Additional suffix for the final output name (the part after the underscore).
            By default "result", so output might be <grandparent>_result.mp4.
    """
    os.makedirs(output_dir, exist_ok=True)
    # 1) Decide final output video name, e.g. "some_result.mp4"
    output_video = get_output_filename(output_dir, frames_dir, name)

    # 2) Parse the prediction file
    boxes_by_frame = {}
    max_frame_index = 0

    with open(prediction_txt, 'r') as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            parts = line.split(',')
            if len(parts) < 6:
                continue  # skip malformed lines
            frame_str, id_str, left_str, top_str, w_str, h_str = parts[:6]
            # try:
            # parse the numbers
            obj_id = int(float(id_str))
            frame_idx = int(frame_str)
            left = int(float(left_str))
            top = int(float(top_str))
            width = int(float(w_str))
            height = int(float(h_str))
            # except ValueError:
            #     # skip lines that can't be parsed
            #     continue

            if frame_idx not in boxes_by_frame:
                boxes_by_frame[frame_idx] = []
            boxes_by_frame[frame_idx].append((obj_id, left, top, width, height))

            if frame_idx > max_frame_index:
                max_frame_index = frame_idx

    # 3) Find size of frames by reading first existing frame
    first_frame_path = None
    for i in range(1, max_frame_index + 1):
        candidate = os.path.join(frames_dir, f"{i:08d}.jpg")
        if os.path.isfile(candidate):
            first_frame_path = candidate
            break

    if first_frame_path is None:
        raise FileNotFoundError(f"No frames found in {frames_dir} (e.g. 00000001.jpg, etc.)")

    first_img = cv2.imread(first_frame_path)
    if first_img is None:
        raise ValueError(f"Cannot read image {first_frame_path}")
    frame_height, frame_width = first_img.shape[:2]

    # 4) Set up VideoWriter
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out_video = cv2.VideoWriter(output_video, fourcc, fps, (frame_width, frame_height))

    COLORS = [
        (0, 255, 0),    # green
        (0, 0, 255),    # red
        (255, 0, 0),    # blue
        (255, 255, 0),  # cyan
        (255, 0, 255),  # magenta
        (0, 255, 255),  # yellow
        (128, 0, 128),  # purple
        (128, 128, 0),  # olive
        (0, 128, 128),  # teal
        (128, 0, 255),  # some custom color
    ]

    # 5) Iterate frames. 1..max_frame_index
    for frame_idx in range(1, max_frame_index + 1):
        frame_path = os.path.join(frames_dir, f"{frame_idx:08d}.jpg")
        if not os.path.isfile(frame_path):
            print(f"Warning: missing frame {frame_path}")
            continue

        img = cv2.imread(frame_path)
        if img is None:
            print(f"Warning: cannot read {frame_path}")
            continue

        # If bboxes exist for this frame, we draw them in ascending ID order
        if frame_idx in boxes_by_frame:
            sorted_bboxes = sorted(boxes_by_frame[frame_idx], key=lambda x: x[0])  # sort by obj_id
            for (obj_id, left, top, w, h) in sorted_bboxes:
                right = left + w
                bottom = top + h

                # color = (0, 255, 0)  # green
                color = COLORS[obj_id % len(COLORS)]
                thickness = 2
                cv2.rectangle(img, (left, top), (right, bottom), color, thickness)
                label = f"ID: {obj_id}"
                cv2.putText(img, label, (left, max(top - 5, 0)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, cv2.LINE_AA)

        out_video.write(img)

    out_video.release()
    print(f"Video saved to {output_video}")

    

In [4]:
frames_dir = "/nfs/home/leo0511/Project/Pig/HybridSORT/datasets/dancetrack/test/camera2_Night2Day_30min_20241210_0515_0530/img1"
prediction_txt = "/nfs/home/leo0511/Project/Pig/HybridSORT/YOLOX_outputs/yolox_pig_val/False/track_vis/2025_03_03_11_49_10.txt"
output_dir = "./Pig_demo"
fps = 1
name = "hybridSORT-reid"

visualize_predictions_to_video(output_dir, frames_dir, prediction_txt, fps, name)


Video saved to ./Pig_demo/camera2_Night2Day_30min_20241210_0515_0530_hybridSORT.mp4
