In [1]:
pip install ultralytics opencv-python opencv-python-headless

Note: you may need to restart the kernel to use updated packages.


In [2]:
import cv2
import torch
from ultralytics import YOLO

# Load YOLOv8 pre-trained model
model = YOLO('yolov8n.pt')  

In [3]:
# Load video
video_path = "video.mp4"  
cap = cv2.VideoCapture(video_path)
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = int(cap.get(cv2.CAP_PROP_FPS))

output_video = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

class_names = ['person', 'bottle', 'laptop', 'book']

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame)

    detections = results[0].boxes.xyxy.cpu().numpy()  
    confidences = results[0].boxes.conf.cpu().numpy() 
    classes = results[0].boxes.cls.cpu().numpy()  

    for i, (x1, y1, x2, y2) in enumerate(detections):
        conf = confidences[i]
        cls = int(classes[i])
        cls_name = model.names[cls]

        if cls_name in class_names and conf > 0.5:
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
            label = f'{cls_name}: {conf:.2f}'
            cv2.putText(frame, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    output_video.write(frame)

cap.release()
output_video.release()




0: 384x640 1 person, 1 bottle, 1 chair, 96.5ms
Speed: 15.7ms preprocess, 96.5ms inference, 218.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 bottle, 1 chair, 5.9ms
Speed: 0.0ms preprocess, 5.9ms inference, 7.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 bottle, 8.0ms
Speed: 2.0ms preprocess, 8.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 bottle, 1 chair, 1 vase, 17.1ms
Speed: 0.0ms preprocess, 17.1ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 bottle, 1 chair, 16.0ms
Speed: 0.0ms preprocess, 16.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 chair, 1 vase, 13.1ms
Speed: 0.0ms preprocess, 13.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 bottle, 1 chair, 7.5ms
Speed: 1.0ms preprocess, 7.5ms inference, 0.0ms postprocess per image at shape (1, 3, 3

In [4]:
def calculate_iou(box1, box2):
    x1_inter = max(box1[0], box2[0])
    y1_inter = max(box1[1], box2[1])
    x2_inter = min(box1[2], box2[2])
    y2_inter = min(box1[3], box2[3])
    
    intersection = max(0, x2_inter - x1_inter) * max(0, y2_inter - y1_inter)
    area_box1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area_box2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    union = area_box1 + area_box2 - intersection
    return intersection / union if union > 0 else 0


In [5]:
tracked_objects = {}  

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame)

    for result in results.xyxy[0]:
        x1, y1, x2, y2, conf, cls = result
        cls_name = model.names[int(cls)]

        if cls_name in class_names:
            pass  

    output_video.write(frame)

cap.release()
output_video.release()
