In [3]:
import cv2
from ultralytics import YOLO
import os
import json

# Load YOLO Models
main_object_model = YOLO("yolov8n.pt")  # For main objects
sub_object_model = YOLO("yolov8n.pt")  # For sub-objects (can be fine-tuned)

# Save JSON Output
def save_json(detections, output_path="outputs", frame_number=1):
    os.makedirs(output_path, exist_ok=True)
    json_file = os.path.join(output_path, f"frame_{frame_number}.json")
    with open(json_file, "w") as f:
        json.dump(detections, f, indent=4)
    print(f"Saved JSON for frame {frame_number} at {json_file}")

# Save Cropped Sub-Object Images
def save_cropped_images(frame, obj, output_folder="outputs"):
    os.makedirs(output_folder, exist_ok=True)
    if "subobject" in obj:
        sub_bbox = obj["subobject"]["bbox"]
        x1, y1, x2, y2 = map(int, sub_bbox)
        cropped_img = frame[y1:y2, x1:x2]
        sub_object_name = f"{obj['object']}_{obj['subobject']['object']}_{obj['id']}.jpg"
        image_path = os.path.join(output_folder, sub_object_name)
        cv2.imwrite(image_path, cropped_img)
        print(f"Saved cropped sub-object image: {image_path}")

def detect_sub_objects(frame, bbox):
    x1, y1, x2, y2 = map(int, bbox)
    roi = frame[y1:y2, x1:x2]  # Region of interest (cropped area)
    results = sub_object_model(roi)

    sub_objects = []
    sub_object_id = 0  # Counter for unique sub-object IDs

    for result in results:
        if not result.boxes:  # Check if there are no detections
            print("No sub-object detections in this ROI.")  # Debugging info
            continue

        for box in result.boxes:
            sub_bbox = box.xyxy[0].tolist()  # Bounding box of sub-object
            sub_label = sub_object_model.names[int(box.cls[0])]  # Sub-object name

            sub_object_id += 1  # Increment sub-object ID
            sub_objects.append({
                "object": sub_label,
                "id": sub_object_id,  # Assign unique ID
                "bbox": [int(sub_bbox[0]), int(sub_bbox[1]), int(sub_bbox[2]), int(sub_bbox[3])]
            })

    return sub_objects



def object_subobject_pipeline(video_path, output_folder="outputs"):
    os.makedirs(output_folder, exist_ok=True)
    video = cv2.VideoCapture(video_path)

    # Check if the video was successfully opened
    if not video.isOpened():
        print(f"Error: Could not open video file {video_path}")
        return

    frame_count = 0
    global_object_id = 0  # Counter for unique object IDs

    # Get total frames and frame rate (Optional for better debugging)
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    print(f"Total frames in the video: {total_frames}")
    frame_rate = video.get(cv2.CAP_PROP_FPS)
    print(f"Frame rate: {frame_rate} FPS")

    while True:
        ret, frame = video.read()
        if not ret:
            print("No more frames or video ended.")
            break  # Exit the loop if no frame is retrieved

        frame_count += 1
        print(f"Processing Frame {frame_count}/{total_frames}...")

        # Check if the frame is valid
        if frame is None:
            print(f"Warning: Received an empty frame at Frame {frame_count}. Skipping.")
            continue

        # Detect Main Objects
        results = main_object_model(frame)
        detections = []

        for result in results:
            for box in result.boxes:
                bbox = box.xyxy[0].tolist()
                label = main_object_model.names[int(box.cls[0])]

                global_object_id += 1
                main_object = {
                    "object": label,
                    "id": global_object_id,
                    "bbox": [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]
                }

                # Detect Sub-Objects
                sub_objects = detect_sub_objects(frame, bbox)
                if sub_objects:
                    main_object["subobject"] = sub_objects[0]

                detections.append(main_object)

                # Save Sub-Object Images
                save_cropped_images(frame, main_object, output_folder)

        # Save JSON Output
        save_json(detections, output_folder, frame_number=frame_count)

        # Optional: Debug only first 5 frames for testing
        if frame_count > 5:
            print("Stopping after 5 frames for debugging.")
            break

    video.release()
    print("Processing Complete!")


# Run the Detection Pipeline
if __name__ == "__main__":
#     video_path = "data\zidane.jpg"  # Path to your video file
    video_path = "data/test_video.mp4"
    object_subobject_pipeline(video_path)

Total frames in the video: 660
Frame rate: 23.976023976023978 FPS
Processing Frame 1/660...

0: 384x640 24 cars, 4 trucks, 116.2ms
Speed: 4.3ms preprocess, 116.2ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 480x640 (no detections), 128.0ms
Speed: 5.0ms preprocess, 128.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)
No sub-object detections in this ROI.

0: 640x352 (no detections), 108.1ms
Speed: 2.0ms preprocess, 108.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 352)
No sub-object detections in this ROI.

0: 640x544 (no detections), 146.4ms
Speed: 4.6ms preprocess, 146.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 544)
No sub-object detections in this ROI.

0: 448x640 (no detections), 96.1ms
Speed: 2.6ms preprocess, 96.1ms inference, 1.1ms postprocess per image at shape (1, 3, 448, 640)
No sub-object detections in this ROI.

0: 480x640 (no detections), 97.5ms
Speed: 3.0ms preprocess, 97.5ms inference, 1.

No sub-object detections in this ROI.

0: 480x640 (no detections), 117.9ms
Speed: 3.2ms preprocess, 117.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
No sub-object detections in this ROI.

0: 640x640 (no detections), 139.1ms
Speed: 8.0ms preprocess, 139.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)
No sub-object detections in this ROI.

0: 640x544 (no detections), 157.3ms
Speed: 6.0ms preprocess, 157.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 544)
No sub-object detections in this ROI.

0: 352x640 (no detections), 112.6ms
Speed: 3.1ms preprocess, 112.6ms inference, 1.0ms postprocess per image at shape (1, 3, 352, 640)
No sub-object detections in this ROI.

0: 352x640 (no detections), 77.5ms
Speed: 2.0ms preprocess, 77.5ms inference, 1.0ms postprocess per image at shape (1, 3, 352, 640)
No sub-object detections in this ROI.

0: 416x640 1 car, 107.7ms
Speed: 2.5ms preprocess, 107.7ms inference, 1.0ms postprocess per image 

No sub-object detections in this ROI.

0: 480x640 (no detections), 83.2ms
Speed: 2.0ms preprocess, 83.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
No sub-object detections in this ROI.

0: 640x320 (no detections), 79.1ms
Speed: 2.5ms preprocess, 79.1ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 320)
No sub-object detections in this ROI.

0: 640x448 (no detections), 134.6ms
Speed: 3.0ms preprocess, 134.6ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 448)
No sub-object detections in this ROI.

0: 480x640 1 car, 107.1ms
Speed: 3.0ms preprocess, 107.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)
Saved cropped sub-object image: outputs\car_car_94.jpg

0: 480x640 (no detections), 104.6ms
Speed: 2.6ms preprocess, 104.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
No sub-object detections in this ROI.

0: 512x640 (no detections), 89.3ms
Speed: 2.5ms preprocess, 89.3ms inference, 1.0ms postproce

Saved cropped sub-object image: outputs\truck_car_136.jpg

0: 544x640 1 train, 126.3ms
Speed: 5.5ms preprocess, 126.3ms inference, 2.5ms postprocess per image at shape (1, 3, 544, 640)
Saved cropped sub-object image: outputs\car_train_137.jpg

0: 416x640 1 car, 103.0ms
Speed: 3.9ms preprocess, 103.0ms inference, 2.6ms postprocess per image at shape (1, 3, 416, 640)
Saved cropped sub-object image: outputs\car_car_138.jpg

0: 640x576 (no detections), 144.0ms
Speed: 7.6ms preprocess, 144.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 576)
No sub-object detections in this ROI.

0: 448x640 (no detections), 94.1ms
Speed: 3.0ms preprocess, 94.1ms inference, 1.0ms postprocess per image at shape (1, 3, 448, 640)
No sub-object detections in this ROI.

0: 640x640 (no detections), 154.6ms
Speed: 7.4ms preprocess, 154.6ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 640)
No sub-object detections in this ROI.

0: 512x640 (no detections), 132.5ms
Speed: 3.0ms preproces

No sub-object detections in this ROI.
Saved JSON for frame 6 at outputs\frame_6.json
Stopping after 5 frames for debugging.
Processing Complete!
