In [4]:
import cv2
import numpy as np
from ultralytics import YOLO

# Load YOLOv8 model
model = YOLO(".local/yolov8n.pt")

# Input video file path
input_video_path = "./.local/world.mp4"  # Replace with your video file path
output_video_path = "./.local/world_yolo.mp4"

cap = cv2.VideoCapture(input_video_path)

# Check if the video file is successfully opened
if not cap.isOpened():
    print(f"Error: Cannot open video {input_video_path}")
    exit()

# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Define codec and create VideoWriter object for saving the output video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for MP4
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# Define the specific color (RGB format)
bounding_box_color = (0, 0, 200)

# Process the video frame by frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Perform object detection on the frame
    results = model(frame)

    # Draw bounding boxes and labels on the frame
    for result in results:
        for box in result.boxes:
            # Extract box coordinates, confidence, and class
            x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
            confidence = box.conf[0]
            class_id = int(box.cls[0])
            label = f"{model.names[class_id]} {confidence:.2f}"

            # Draw the bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), bounding_box_color, 2)

            # Put the label above the bounding box
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, bounding_box_color, 2)

    # Write the processed frame to the output video
    out.write(frame)

    # Display the frame (optional)
    cv2.imshow("Object Detection", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()

print(f"Object detection completed. Output saved to {output_video_path}")

0: 384x640 1 cat, 3 bottles, 1 tv, 39.4ms
Speed: 5.8ms preprocess, 39.4ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 1 cat, 3 bottles, 1 tv, 36.7ms
Speed: 1.4ms preprocess, 36.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 1 cat, 2 bottles, 1 tv, 38.8ms
Speed: 1.2ms preprocess, 38.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 1 cat, 2 bottles, 1 tv, 34.5ms
Speed: 1.3ms preprocess, 34.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 1 cat, 4 bottles, 1 tv, 31.2ms
Speed: 1.4ms preprocess, 31.2ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 1 cat, 3 bottles, 28.8ms
Speed: 1.4ms preprocess, 28.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 1 cat, 2 bottles, 39.0ms
Speed: 1.3ms preprocess, 39.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 1 cat, 3 bottles, 34.6ms
Spe