In [8]:
!pip install ultralytics opencv-python



In [9]:
from ultralytics import YOLO
import cv2
from pathlib import Path

In [10]:
# Let's us load the yolo8 model
model = YOLO('yolov8s.pt')

In [11]:
#sample images and vidoes path stored in a list
image_files = ['/content/sample1.jpg', '/content/sample2.jpg']
video_files = ['/content/sample_video1.mp4', '/content/sample_video2.mp4']

In [12]:
#This function detect and save images with bounding boxes
def detect_on_images(images):
    for img_path in images:
        results = model(img_path)
        for r in results:
            annotated_img = r.plot()
            output_name = f"{Path(img_path).stem}_output.jpg"
            cv2.imwrite(output_name, annotated_img)
            print(f"Image saved: {output_name}")

In [13]:
def detect_on_videos(videos):
    for video_path in videos:
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print(f"Failed to open video: {video_path}")
            continue

        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS) or 30  # Fallback to 30 if fps is 0

        # Use .mp4 extension and a more compatible codec
        output_name = f"{Path(video_path).stem}_output.mp4"
        out = cv2.VideoWriter(output_name, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

        frame_count = 0
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            results = model(frame)
            annotated_frame = results[0].plot()

            if annotated_frame is not None:
                out.write(annotated_frame)
                frame_count += 1

        cap.release()
        out.release()

        if frame_count > 0:
            print(f"Video saved: {output_name}")
        else:
            print(f"No frames processed for: {video_path}")


In [14]:
if __name__ == '__main__':
    detect_on_images(image_files)
    detect_on_videos(video_files)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

0: 384x640 1 person, 3 cars, 1 truck, 433.4ms
Speed: 5.8ms preprocess, 433.4ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 3 cars, 1 truck, 501.6ms
Speed: 5.4ms preprocess, 501.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 3 cars, 1 truck, 668.0ms
Speed: 5.1ms preprocess, 668.0ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 4 cars, 1 truck, 625.2ms
Speed: 4.9ms preprocess, 625.2ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 4 cars, 1 truck, 655.2ms
Speed: 5.7ms preprocess, 655.2ms inference, 5.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 cars, 1 truck, 652.8ms
Speed: 6.8ms preprocess, 652.8ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 4 cars, 1 truck, 421.1ms
Speed: 4.5ms prep