In [10]:
# First, get all the packages we need
# ultralytics is for YOLO, opencv is for video stuff, requests is to download
!pip install ultralytics opencv-python-headless

# Import all the libraries
import cv2
from ultralytics import YOLO
import time
import os
import requests # for downloading the video

# --- Download a video to test on ---
# We need a driving video. This one is a good sample.
print("Downloading sample video...")
video_url = "https://github.com/intel-iot-devkit/sample-videos/raw/master/car-detection.mp4"
my_video = "driving.mp4"
my_output_video = "output.mp4"

# Try to download the file and save it as 'driving.mp4'
try:
    r = requests.get(video_url)
    with open(my_video, 'wb') as f:
        f.write(r.content)
    print(f"Video '{my_video}' downloaded successfully.")
except Exception as e:
    print(f"Oops, download failed: {e}")
    # Stop the script if we don't have a video
    raise SystemExit()


# This is our main function that will do all the work
def process_video(input_video, output_video):

    # Load the YOLO model (yolov8n.pt is the small, fast one)
    print("Loading YOLO model...")
    model = YOLO('yolov8n.pt')

    # Get all the class names (like 'car', 'person', etc.)
    class_names = model.names
    print("Model loaded.")

    # --- Open the video file ---
    cap = cv2.VideoCapture(input_video)
    if not cap.isOpened():
        print(f"Error: Couldn't open the video file {input_video}")
        return

    # --- Set up the output video ---
    # Get the video's info (width, height, fps) so we can create a new video
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # 'mp4v' is the codec for .mp4 files. This sets up our output video file.
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video, fourcc, fps, (frame_width, frame_height))

    print(f"Processing video... saving to '{output_video}'")

    # This is the main loop, it goes through the video frame by frame
    while cap.isOpened():
        # Read one frame
        success, frame = cap.read()

        if not success:
            # We're at the end of the video
            print("End of video stream.")
            break

        # Run YOLO detection on the current frame
        results = model(frame, stream=True, verbose=False) # verbose=False makes it less spammy

        # Loop through all the detections YOLO found in this frame
        for r in results:
            boxes = r.boxes
            for box in boxes:
                # Get the box details
                # Bounding box coordinates
                x1, y1, x2, y2 = int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3])
                # Confidence score (how sure the model is)
                confidence = box.conf[0]
                # Class ID (the number for the object)
                cls_id = int(box.cls[0])
                # Class name (the actual name, like 'car')
                cls_name = class_names[cls_id]

                # Only draw the box if we're pretty sure (confidence > 50%)
                if confidence > 0.5:

                    # --- Draw the box and label ---
                    # Give different classes different colors
                    color = (0, 255, 0) # Green as default
                    if cls_name == 'person':
                        color = (0, 0, 255) # Red for people
                    elif cls_name == 'car':
                        color = (255, 0, 0) # Blue for cars

                    # Draw the box (rectangle)
                    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)

                    # Write the class name and confidence score above the box
                    label = f"{cls_name} {confidence:.2f}"
                    cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # Add this finished frame (with boxes) to our output video
        out.write(frame)

    # --- Clean up ---
    # When the loop is done, release the video files
    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"All done! Processed video saved as '{output_video}'.")

# --- This is where the script actually starts ---
# This part runs the 'process_video' function
if __name__ == "__main__":
    # Check if the video file exists (it should have downloaded)
    if os.path.exists(my_video):
        # Run our main function on the video
        process_video(my_video, my_output_video)
    else:
        print(f"Error: Input video '{my_video}' not found. Download failed?")

Downloading sample video...
Video 'driving.mp4' downloaded successfully.
Loading YOLO model...
Model loaded.
Processing video... saving to 'output.mp4'
End of video stream.
All done! Processed video saved as 'output.mp4'.
