In [None]:
!pip install ultralytics opencv-python -q
!conda install conda-forge::ultralytics
!pip install supervision==0.21.0.rc5

### Sample test footage

In [6]:
import cv2
from ultralytics import YOLO, solutions
# Load the YOLO model
model = YOLO("yolov9e.pt")

# Open the video file
cap = cv2.VideoCapture("resources/741755_Kazakhstan Traffic Cars Road_By_Danil_Nevsky_Artlist_HD.mp4")
assert cap.isOpened(), "Error reading video file" 
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

classes_to_count = list(range(0, 23))  # classes to count 

# Define the lines or regions points
line_points_1 = [(50, 300), (1000, 250)]  # first line points
line_points_2 = [(1400, 750), (1920, 650)]  # second line points

# Video writer
video_writer = cv2.VideoWriter("sample_object_counting_output.mp4", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# Init Object Counters
counter_1 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_1,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(0, 0),
    count_reg_color=(229, 255, 204),
    count_bg_color=(229, 255, 204)
)

counter_2 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_2,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(-250, 0),
    count_reg_color=(153, 153, 255),
    count_bg_color=(153, 153, 255)
)

# Init separate trackers for each counter
tracker = model  # Tracker for the first counter

while cap.isOpened():
    success, im0 = cap.read()
    if not success:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    # Perform tracking for the first counter, options: tracker="bytetrack.yaml"
    tracks = tracker.track(im0, persist=True, show=False, classes=classes_to_count)
            
    im0 = counter_1.start_counting(im0, tracks)

    # # Perform tracking for the second counter
    # im0 = counter_2.start_counting(im0, tracks)
    
    # Write the frame to the output video
    video_writer.write(im0)

# Release resources
cap.release()
video_writer.release()
cv2.destroyAllWindows()

Line Counter Initiated.
Line Counter Initiated.

0: 384x640 9 cars, 1 traffic light, 686.1ms
Speed: 2.1ms preprocess, 686.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 cars, 1 traffic light, 655.2ms
Speed: 1.5ms preprocess, 655.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 1 traffic light, 604.5ms
Speed: 1.4ms preprocess, 604.5ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 1 traffic light, 607.6ms
Speed: 1.4ms preprocess, 607.6ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 1 traffic light, 608.4ms
Speed: 2.1ms preprocess, 608.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 1 traffic light, 631.4ms
Speed: 1.4ms preprocess, 631.4ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 1 traffic light, 655.2ms
Speed: 1.4ms preprocess, 655.2ms inference, 

KeyboardInterrupt: 

### Real World Footage

In [1]:
import cv2
from ultralytics import YOLO, solutions

# Load the YOLO model
model = YOLO("yolov9e.pt")

# Open the video file
cap = cv2.VideoCapture("resources/DJI_0505.MP4")
assert cap.isOpened(), "Error reading video file" 
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

classes_to_count = list(range(0, 23))  # classes to count 

# Define the lines or regions points
line_points_1 = [(300, 600), (700, 550)]  # first line points
line_points_2 = [(800, 550), (1700, 350)]  # second line points
line_points_3 = [(174, 1150), (174, 1800)]  # second line points
line_points_4 = [(640, 1674), (1006, 1578)]  # second line points


# Video writer
video_writer = cv2.VideoWriter("smoother_object_counting_output.mp4", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# Init Object Counters
counter_1 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_1,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(0, 0),
    count_reg_color=(229, 255, 204),  # Very light green
    count_bg_color=(229, 255, 204)  # Very light green
)

counter_2 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_2,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(-250, 0),
    count_reg_color=(153, 204, 255),  # Light blue
    count_bg_color=(153, 204, 255)  # Light blue
)

counter_3 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_3,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(-500, 0),
    count_reg_color=(255, 204, 204),  # Light red
    count_bg_color=(255, 204, 204)  # Light red
)

counter_4 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_4,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(-750, 0),
    count_reg_color=(210, 180, 140),  # Light brown
    count_bg_color=(210, 180, 140)  # Light brown
)


# Init separate trackers for each counter
tracker = model  # Tracker for the first counter

while cap.isOpened():
    success, im0 = cap.read()
    if not success:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    # Perform tracking for the first counter, options: tracker="bytetrack.yaml"
    tracks = tracker.track(im0, persist=True, show=False, classes=classes_to_count)
    
    im0 = counter_1.start_counting(im0, tracks)

    # Perform tracking for the second counter
    im0 = counter_2.start_counting(im0, tracks)
    
    im0 = counter_3.start_counting(im0, tracks)
    
    im0 = counter_4.start_counting(im0, tracks)
    
    # Write the frame to the output video
    video_writer.write(im0)

# Release resources
cap.release()
video_writer.release()
cv2.destroyAllWindows()

Line Counter Initiated.
Line Counter Initiated.
Line Counter Initiated.
Line Counter Initiated.

0: 640x384 1 person, 6 cars, 1 truck, 697.6ms
Speed: 3.9ms preprocess, 697.6ms inference, 6.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 5 cars, 2 trucks, 671.9ms
Speed: 1.5ms preprocess, 671.9ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 6 cars, 1 truck, 676.4ms
Speed: 1.4ms preprocess, 676.4ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 6 cars, 1 truck, 645.1ms
Speed: 1.6ms preprocess, 645.1ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 6 cars, 1 truck, 663.1ms
Speed: 1.1ms preprocess, 663.1ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 6 cars, 1 truck, 678.1ms
Speed: 1.3ms preprocess, 678.1ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 7 cars, 1 t

KeyboardInterrupt: 

### Prevent the detection of stationary objects

In [2]:
import cv2
from ultralytics import YOLO, solutions

# Load the YOLO model
model = YOLO("yolov9e.pt")

# Open the video file
cap = cv2.VideoCapture("resources/DJI_0505.MP4")
assert cap.isOpened(), "Error reading video file" 
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

classes_to_count = list(range(0, 23))  # classes to count 

# Define the lines or regions points
line_points_1 = [(300, 600), (700, 550)]  # first line points
line_points_2 = [(800, 550), (1700, 350)]  # second line points
line_points_3 = [(174, 1150), (174, 1800)]  # second line points
line_points_4 = [(640, 1674), (1006, 1578)]  # second line points


# Video writer
video_writer = cv2.VideoWriter("smoother_object_counting_output.mp4", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# Init Object Counters
counter_1 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_1,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(0, 0),
    count_reg_color=(229, 255, 204),  # Very light green
    count_bg_color=(229, 255, 204)  # Very light green
)

counter_2 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_2,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(-250, 0),
    count_reg_color=(153, 204, 255),  # Light blue
    count_bg_color=(153, 204, 255)  # Light blue
)

counter_3 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_3,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(-500, 0),
    count_reg_color=(255, 204, 204),  # Light red
    count_bg_color=(255, 204, 204)  # Light red
)

counter_4 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_4,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(-750, 0),
    count_reg_color=(210, 180, 140),  # Light brown
    count_bg_color=(210, 180, 140)  # Light brown
)

# Initialize tracking dictionary
last_positions = {}
movement_threshold = 20  # Minimum pixels an object must move to be considered in motion

# Init separate trackers for each counter
tracker = model  # Tracker for the first counter

while cap.isOpened():
    success, im0 = cap.read()
    if not success:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    # Perform tracking for all counters
    tracks = tracker.track(im0, persist=True, show=False, classes=classes_to_count)
    
    for track in tracks:
        id = track.id
        if id in last_positions:
            x, y, w, h = track.xywh
            last_x, last_y = last_positions[id]
            # Calculate movement
            distance = ((last_x - x) ** 2 + (last_y - y) ** 2) ** 0.5
            if distance < movement_threshold:
                continue  # Skip stationary objects
        last_positions[id] = (x, y)  # Update position
    
    # Perform counting and draw tracks for each counter
    im0 = counter_1.start_counting(im0, tracks)
    im0 = counter_2.start_counting(im0, tracks)
    im0 = counter_3.start_counting(im0, tracks)
    im0 = counter_4.start_counting(im0, tracks)
    
    # Write the frame to the output video
    video_writer.write(im0)

# Release resources
cap.release()
video_writer.release()
cv2.destroyAllWindows()


Line Counter Initiated.
Line Counter Initiated.
Line Counter Initiated.
Line Counter Initiated.

0: 640x384 1 person, 6 cars, 1 truck, 716.3ms
Speed: 2.5ms preprocess, 716.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 384)


AttributeError: 'Results' object has no attribute 'id'. See valid attributes below.

    A class for storing and manipulating inference results.

    Attributes:
        orig_img (numpy.ndarray): Original image as a numpy array.
        orig_shape (tuple): Original image shape in (height, width) format.
        boxes (Boxes, optional): Object containing detection bounding boxes.
        masks (Masks, optional): Object containing detection masks.
        probs (Probs, optional): Object containing class probabilities for classification tasks.
        keypoints (Keypoints, optional): Object containing detected keypoints for each object.
        speed (dict): Dictionary of preprocess, inference, and postprocess speeds (ms/image).
        names (dict): Dictionary of class names.
        path (str): Path to the image file.

    Methods:
        update(boxes=None, masks=None, probs=None, obb=None): Updates object attributes with new detection results.
        cpu(): Returns a copy of the Results object with all tensors on CPU memory.
        numpy(): Returns a copy of the Results object with all tensors as numpy arrays.
        cuda(): Returns a copy of the Results object with all tensors on GPU memory.
        to(*args, **kwargs): Returns a copy of the Results object with tensors on a specified device and dtype.
        new(): Returns a new Results object with the same image, path, and names.
        plot(...): Plots detection results on an input image, returning an annotated image.
        show(): Show annotated results to screen.
        save(filename): Save annotated results to file.
        verbose(): Returns a log string for each task, detailing detections and classifications.
        save_txt(txt_file, save_conf=False): Saves detection results to a text file.
        save_crop(save_dir, file_name=Path("im.jpg")): Saves cropped detection images.
        tojson(normalize=False): Converts detection results to JSON format.
    

### Smoother

In [1]:
import supervision as sv
from ultralytics import YOLO
import cv2
import numpy as np

# Load YOLO model
model = YOLO("yolov9e.pt")
tracker = sv.ByteTrack()
frames_generator = sv.get_video_frames_generator("resources/741755_Kazakhstan Traffic Cars Road_By_Danil_Nevsky_Artlist_HD.mp4")
video_info = sv.VideoInfo.from_video_path("resources/741755_Kazakhstan Traffic Cars Road_By_Danil_Nevsky_Artlist_HD.mp4")

# Define the line for counting
start, end = sv.Point(x=50, y=300), sv.Point(x=1000, y=250)
line_zone = sv.LineZone(start=start, end=end)

# Initialize counters
crossed_count_in = 0
crossed_count_out = 0

smoother = sv.DetectionsSmoother()

bounding_box_annotator = sv.BoundingBoxAnnotator()

with sv.VideoSink("supervision_output.mp4", video_info=video_info) as sink:
    for frame in frames_generator:
        result = model(frame)[0]
        detections = sv.Detections.from_ultralytics(result)
        detections = tracker.update_with_detections(detections)
        detections = smoother.update_with_detections(detections)

        crossed_in, crossed_out = line_zone.trigger(detections)
        if np.any(crossed_in):
            crossed_count_in += np.sum(crossed_in)
        if np.any(crossed_out):
            crossed_count_out += np.sum(crossed_out)

        annotated_frame = bounding_box_annotator.annotate(frame.copy(), detections)

        # Annotate the line on the frame
        cv2.line(annotated_frame, (start.x, start.y), (end.x, end.y), (0, 0, 255), 2)  # Red line

        # Annotate counts on the frame
        cv2.putText(annotated_frame, f"Crossed In: {crossed_count_in}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(annotated_frame, f"Crossed Out: {crossed_count_out}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        sink.write_frame(annotated_frame)



0: 384x640 14 cars, 1 traffic light, 1 potted plant, 792.3ms
Speed: 2.0ms preprocess, 792.3ms inference, 309.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 cars, 1 traffic light, 1 potted plant, 641.0ms
Speed: 1.9ms preprocess, 641.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 cars, 1 traffic light, 1 potted plant, 638.0ms
Speed: 1.6ms preprocess, 638.0ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 cars, 1 traffic light, 1 potted plant, 698.2ms
Speed: 1.7ms preprocess, 698.2ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 13 cars, 1 traffic light, 1 potted plant, 654.8ms
Speed: 2.0ms preprocess, 654.8ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 13 cars, 1 traffic light, 1 potted plant, 638.1ms
Speed: 1.5ms preprocess, 638.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 14 cars, 1 traffic light, 