# Object Detection on Video using YOLOv8

This notebook demonstrates how to perform object detection on a video using the Ultralytics YOLOv8 model. The script processes each frame of the input video, draws bounding boxes around detected objects, and saves the annotated frames into an output video file.


In [8]:
# %%
# Import necessary libraries
from ultralytics import YOLO
import cv2
from PIL import Image
import os
import sys


In [9]:
# %%
# Define the YoloInference class for handling model predictions
class YoloInference:
    def __init__(self, model_path: str = "yolov8n.pt") -> None:
        """
        Initializes the YOLO model.
        
        Args:
            model_path (str): Path to the YOLO model file.
        """
        self.model = YOLO(model_path)
    
    def predict(self, frame, conf_threshold=0.25, iou_threshold=0.45):
        """
        Performs object detection on a single frame.
        
        Args:
            frame (numpy.ndarray): The input frame in BGR format.
            conf_threshold (float): Confidence threshold for detections.
            iou_threshold (float): IoU threshold for Non-Max Suppression.
        
        Returns:
            numpy.ndarray: The frame with bounding boxes drawn.
        """
        # Convert frame from BGR to RGB
        img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Perform prediction
        results = self.model.predict(
            source=img_rgb,
            conf=conf_threshold,
            iou=iou_threshold,
            verbose=False
        )
        
        # Iterate through detections and draw bounding boxes
        for result in results:
            boxes = result.boxes
            for box in boxes:
                # Extract bounding box coordinates
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                conf = box.conf[0]
                cls = box.cls[0]
                label = f"{self.model.names[int(cls)]} {conf:.2f}"
                
                # Draw rectangle
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                
                # Put label
                cv2.putText(
                    frame,
                    label,
                    (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.5,
                    (0, 255, 0),
                    2
                )
        
        return frame


In [10]:
# %%
# Function to process video
def process_video(input_path: str, output_path: str, model: YoloInference, conf_threshold: float = 0.25, iou_threshold: float = 0.45):
    """
    Processes the input video, performs object detection on each frame, and saves the output video.
    
    Args:
        input_path (str): Path to the input video file.
        output_path (str): Path to save the output video file.
        model (YoloInference): An instance of the YoloInference class.
        conf_threshold (float): Confidence threshold for detections.
        iou_threshold (float): IoU threshold for Non-Max Suppression.
    """
    # Check if input video exists
    if not os.path.exists(input_path):
        print(f"Error: Input video not found at {input_path}")
        sys.exit(1)
    
    # Initialize video capture
    cap = cv2.VideoCapture(input_path)
    
    if not cap.isOpened():
        print(f"Error: Could not open video {input_path}")
        sys.exit(1)
    
    # Get video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # You can choose other codecs
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))
    
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print(f"Processing video: {input_path}")
    print(f"Total frames: {frame_count}")
    
    current_frame = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Perform inference and draw bounding boxes
        annotated_frame = model.predict(frame, conf_threshold, iou_threshold)
        
        # Write the frame to the output video
        out.write(annotated_frame)
        
        current_frame += 1
        if current_frame % 10 == 0:
            print(f"Processed {current_frame}/{frame_count} frames")
    
    # Release resources
    cap.release()
    out.release()
    print(f"Processing completed. Output saved at {output_path}")


In [11]:
# %%
# Initialize the YOLO model
# You can choose a different model or provide a custom checkpoint
model_path = "checkpoints/yolo11s_NP.pt"  # Replace with your model path if different
if not os.path.exists(model_path):
    print(f"Error: YOLO model not found at {model_path}")
    print("Please download a YOLOv8 model from https://ultralytics.com/")
    sys.exit(1)

yolo_inference = YoloInference(model_path)


In [12]:
# %%
# Define input and output video paths
input_video_path = "flood.mp4"   # Replace with your input video path
output_video_path = "flood_boxes.mp4"  # Desired output video path

# Process the video
process_video(
    input_path=input_video_path,
    output_path=output_video_path,
    model=yolo_inference,
    conf_threshold=0.4,  # Adjust as needed
    iou_threshold=0.4    # Adjust as needed
)


Processing video: flood.mp4
Total frames: 227
Processed 10/227 frames
Processed 20/227 frames
Processed 30/227 frames
Processed 40/227 frames
Processed 50/227 frames
Processed 60/227 frames
Processed 70/227 frames
Processed 80/227 frames
Processed 90/227 frames
Processed 100/227 frames
Processed 110/227 frames
Processed 120/227 frames
Processed 130/227 frames
Processed 140/227 frames
Processed 150/227 frames
Processed 160/227 frames
Processed 170/227 frames
Processed 180/227 frames
Processed 190/227 frames
Processed 200/227 frames
Processed 210/227 frames
Processed 220/227 frames
Processing completed. Output saved at flood_boxes.mp4
