In [1]:
!pip install ultralytics




[notice] A new release of pip is available: 24.0 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
from ultralytics import YOLO
model = YOLO("yolov8s.pt")  # Load an official YOLOv8 model

Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s.pt to 'yolov8s.pt'...


100%|██████████| 21.5M/21.5M [00:00<00:00, 64.7MB/s]


1. Basic Tracking Example with Output to Video File

In [10]:
from ultralytics import YOLO
import cv2 

# import pre-trained model
model = YOLO("yolov8s.pt")

# import video 
video_path = "video.mp4"
cap = cv2.VideoCapture(video_path)

# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Define video coda format
out = cv2.VideoWriter('output_basic_tracking.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

# Loop through the video frames

while cap.isOpened():
    succuess, frame = cap.read()
    if succuess:
        # Run YOLOV8 tracking on the frams
        results = model.track(frame, persist=True)

        # Visulization the results
        annotated_frame = results[0].plot()

        # Write the frame on the output video
        out.write(annotated_frame)
    else:
        break

# Rlease everthing 
cap.release()
out.release()




0: 384x640 16 persons, 84.3ms
Speed: 2.2ms preprocess, 84.3ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 16 persons, 72.6ms
Speed: 3.0ms preprocess, 72.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 17 persons, 79.1ms
Speed: 2.0ms preprocess, 79.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 82.8ms
Speed: 2.7ms preprocess, 82.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 78.1ms
Speed: 1.0ms preprocess, 78.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 77.8ms
Speed: 2.0ms preprocess, 77.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 77.2ms
Speed: 2.0ms preprocess, 77.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 19 persons, 79.8ms
Speed: 2.4ms preprocess, 79.8ms inference, 1.0ms postprocess per image at

2. Advanced Tracking Example with Output to Video File using bytetrack

In [12]:
from ultralytics import YOLO
import cv2

# Load the YOLOv8 model
model = YOLO("yolov8n.pt")

# Open the video file
video_path = "video.mp4"
cap = cv2.VideoCapture(video_path)

# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Define the codec and create VideoWriter object
out = cv2.VideoWriter('output_advanced_tracking.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

# Loop through the video frames
while cap.isOpened():
    success, frame = cap.read()
    if success:
        # Run YOLOv8 tracking on the frame with ByteTrack
        results = model.track(frame, persist=True, tracker="bytetrack.yaml")
        # Visualize the results on the frame
        annotated_frame = results[0].plot()
        # Write the frame to the output video file
        out.write(annotated_frame)
    else:
        break

# Release everything when job is finished
cap.release()
out.release()



0: 384x640 10 persons, 74.7ms
Speed: 2.0ms preprocess, 74.7ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 61.8ms
Speed: 2.0ms preprocess, 61.8ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 1 bird, 56.9ms
Speed: 1.0ms preprocess, 56.9ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 1 bird, 55.9ms
Speed: 2.0ms preprocess, 55.9ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 2 birds, 56.7ms
Speed: 1.9ms preprocess, 56.7ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 13 persons, 2 birds, 60.2ms
Speed: 2.0ms preprocess, 60.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 13 persons, 2 birds, 57.9ms
Speed: 2.0ms preprocess, 57.9ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 14 persons, 2 birds, 56.5ms
Speed: 1.0ms preproce

3. Visualizing Tracks Over Time and Saving Output

In [16]:
from collections import defaultdict
import cv2 
import numpy as np 
from ultralytics import YOLO

# Load the YOLOv8 model
model = YOLO("yolov8n.pt")
video_path = "video.mp4"
cap = cv2.VideoCapture(video_path)

# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Define the codec and create VideoWriter object
out = cv2.VideoWriter('output_tracks.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

track_history = defaultdict(lambda: [])

while cap.isOpened():
    success, frame = cap.read()
    if success:
        results = model.track(frame, persist=True)
        boxes = results[0].boxes.xywh.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        annotated_frame = results[0].plot()
        for box, track_id in zip(boxes, track_ids):
            x, y, w, h = box
            track = track_history[track_id]
            track.append((float(x), float(y)))
            if len(track) > 30:
                track.pop(0)
            points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
            cv2.polylines(annotated_frame, [points], isClosed=False, color=(222, 222, 222), thickness = 6)
            
        out.write(annotated_frame)
    else:
        break
cap.release()
out.release()





0: 384x640 10 persons, 81.3ms
Speed: 2.0ms preprocess, 81.3ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 77.4ms
Speed: 2.0ms preprocess, 77.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 1 bird, 68.9ms
Speed: 2.0ms preprocess, 68.9ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 1 bird, 67.0ms
Speed: 2.0ms preprocess, 67.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 2 birds, 71.6ms
Speed: 1.0ms preprocess, 71.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 13 persons, 2 birds, 64.8ms
Speed: 2.0ms preprocess, 64.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 13 persons, 2 birds, 61.1ms
Speed: 2.0ms preprocess, 61.1ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 14 persons, 2 birds, 67.0ms
Speed: 1.0ms preproce