In [None]:
!pip install ultralytics opencv-python -q
!conda install conda-forge::ultralytics
!pip install supervision==0.21.0.rc5

### Sample test footage

In [None]:
import cv2
from ultralytics import YOLO, solutions
# Load the YOLO model
model = YOLO("yolov9e.pt")

# Open the video file
cap = cv2.VideoCapture("resources/741755_Kazakhstan Traffic Cars Road_By_Danil_Nevsky_Artlist_HD.mp4")
assert cap.isOpened(), "Error reading video file" 
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

classes_to_count = list(range(0, 23))  # classes to count 

# Define the lines or regions points
line_points_1 = [(50, 300), (1000, 250)]  # first line points
line_points_2 = [(1400, 750), (1920, 650)]  # second line points

# Video writer
video_writer = cv2.VideoWriter("sample_object_counting_output.mp4", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# Init Object Counters
counter_1 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_1,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(0, 0),
    count_reg_color=(229, 255, 204),
    count_bg_color=(229, 255, 204)
)

counter_2 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_2,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(-250, 0),
    count_reg_color=(153, 153, 255),
    count_bg_color=(153, 153, 255)
)

# Init separate trackers for each counter
tracker = model  # Tracker for the first counter

while cap.isOpened():
    success, im0 = cap.read()
    if not success:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    # Perform tracking for the first counter, options: tracker="bytetrack.yaml"
    tracks = tracker.track(im0, persist=True, show=False, classes=classes_to_count)
            
    im0 = counter_1.start_counting(im0, tracks)

    # # Perform tracking for the second counter
    # im0 = counter_2.start_counting(im0, tracks)
    
    # Write the frame to the output video
    video_writer.write(im0)

# Release resources
cap.release()
video_writer.release()
cv2.destroyAllWindows()

### Real World Footage

In [3]:
import cv2
from ultralytics import YOLO, solutions

# Load the YOLO model
model = YOLO("yolov9e.pt")

# Open the video file
cap = cv2.VideoCapture("resources/DJI_0505.MP4")
assert cap.isOpened(), "Error reading video file" 
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

classes_to_count = list(range(0, 23))  # classes to count 

# Define the lines or regions points
line_points_1 = [(300, 600), (700, 550)]  # first line points
line_points_2 = [(800, 550), (1700, 350)]  # second line points
line_points_3 = [(174, 1150), (174, 1800)]  # second line points
line_points_4 = [(640, 1674), (1006, 1578)]  # second line points


# Video writer
video_writer = cv2.VideoWriter("out_object_counting_output.mp4", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# Init Object Counters
counter_1 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_1,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(0, 0),
    count_reg_color=(229, 255, 204),  # Very light green
    count_bg_color=(229, 255, 204)  # Very light green
)

counter_2 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_2,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(-250, 0),
    count_reg_color=(153, 204, 255),  # Light blue
    count_bg_color=(153, 204, 255)  # Light blue
)

counter_3 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_3,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(-500, 0),
    count_reg_color=(255, 204, 204),  # Light red
    count_bg_color=(255, 204, 204)  # Light red
)

counter_4 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_4,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(-750, 0),
    count_reg_color=(210, 180, 140),  # Light brown
    count_bg_color=(210, 180, 140)  # Light brown
)


# Init separate trackers for each counter
tracker = model  # Tracker for the first counter

while cap.isOpened():
    success, im0 = cap.read()
    if not success:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    # Perform tracking for the first counter, options: tracker="bytetrack.yaml"
    tracks = tracker.track(im0, persist=True, show=False, classes=classes_to_count)
    
    im0 = counter_1.start_counting(im0, tracks)

    # Perform tracking for the second counter
    im0 = counter_2.start_counting(im0, tracks)
    
    im0 = counter_3.start_counting(im0, tracks)
    
    im0 = counter_4.start_counting(im0, tracks)
    
    # Write the frame to the output video
    video_writer.write(im0)

# Release resources
cap.release()
video_writer.release()
cv2.destroyAllWindows()

Line Counter Initiated.
Line Counter Initiated.
Line Counter Initiated.
Line Counter Initiated.

0: 640x384 1 person, 6 cars, 1 truck, 694.0ms
Speed: 1.8ms preprocess, 694.0ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 5 cars, 2 trucks, 642.7ms
Speed: 1.5ms preprocess, 642.7ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 6 cars, 1 truck, 642.1ms
Speed: 1.5ms preprocess, 642.1ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 6 cars, 1 truck, 672.0ms
Speed: 1.5ms preprocess, 672.0ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 6 cars, 1 truck, 666.1ms
Speed: 2.1ms preprocess, 666.1ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 6 cars, 1 truck, 654.4ms
Speed: 1.5ms preprocess, 654.4ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 7 cars, 1 t

KeyboardInterrupt: 

### Calculate Speed and Prevent the detection of stationary objects

In [23]:
import cv2
from ultralytics import YOLO, solutions
import time

model = YOLO("yolov9e.pt")

# Open the video file
cap = cv2.VideoCapture("resources/DJI_0505.MP4")
assert cap.isOpened(), "Error reading video file" 
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

classes_to_count = list(range(0, 23))  # classes to count 

# Define the lines or regions points
line_points_1 = [(300, 600), (700, 550)]  
line_points_2 = [(800, 550), (1700, 350)] 
line_points_3 = [(174, 1150), (174, 1800)]
line_points_4 = [(640, 1674), (1006, 1578)] 


# Video writer
video_writer = cv2.VideoWriter("stationary_object_counting_output.mp4", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# Init Object Counters
counter_1 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_1,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(0, 0),
    count_reg_color=(229, 255, 204), 
    count_bg_color=(229, 255, 204)  
)

counter_2 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_2,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(-250, 0),
    count_reg_color=(153, 204, 255),  
    count_bg_color=(153, 204, 255)  
)

counter_3 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_3,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(-500, 0),
    count_reg_color=(255, 204, 204),  
    count_bg_color=(255, 204, 204)  
)

counter_4 = solutions.ObjectCounter(
    view_img=True,
    reg_pts=line_points_4,
    classes_names=model.names,
    draw_tracks=True,
    line_thickness=2,
    text_offset=(-750, 0),
    count_reg_color=(210, 180, 140),  
    count_bg_color=(210, 180, 140)  
)

# Initialize tracking dictionary
last_positions = {}
stationary_ids = set()
frame_interval = 30  # Check position every x frames
frame_count = 0  # Initialize frame counter

movement_threshold = 2  # Minimum pixels an object must move to be considered in motion
pixel_to_meter = 0.5  # Conversion factor: 1 pixel = 0.5 meters

tracker = model  

while cap.isOpened():
    success, im0 = cap.read()
    if not success:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    frame_count += 1  # Increment frame counter

    tracks = tracker.track(im0, persist=True, show=False, classes=classes_to_count)
    
    for result in tracks:
        for track in result.boxes:
            id = int(track.id)
            if track.xyxy.numel() == 4:
                x_min, y_min, x_max, y_max = track.xyxy[0]  # Unpack from the first row

                # Calculate center coordinates (x, y), width (w), and height (h)
                x = (x_min + x_max) / 2
                y = (y_min + y_max) / 2
                w = x_max - x_min
                h = y_max - y_min

            current_time = time.time()  # Get the current timestamp
            
            if frame_count % frame_interval == 0:
                if id in last_positions:
                    last_x, last_y, _, _, last_time = last_positions[id]
                    # Calculate movement
                    distance_pixels = ((last_x - x) ** 2 + (last_y - y) ** 2) ** 0.5
                    delta_time = current_time - last_time

                    if distance_pixels < movement_threshold:
                        stationary_ids.add(id)
                    else:
                        stationary_ids.discard(id)
                    
                    print(f"Object {stationary_ids} is stationary.")
                    # Convert distance from pixels to meters
                    distance_meters = distance_pixels * pixel_to_meter
                    # Calculate speed in pixels per second
                    speed_pixels_per_second = distance_pixels / delta_time
                    # Convert speed to kilometers per hour
                    speed_km_per_hour = speed_pixels_per_second * 3.6

                    print(f"Object {id} moved with speed {speed_km_per_hour:.2f} km/h")

            # Update position and timestamp x frames
            last_positions[id] = (x, y, w, h, current_time)


    # Perform counting and draw tracks for each counter
    im0 = counter_1.start_counting(im0, tracks, stationary_ids)
    im0 = counter_2.start_counting(im0, tracks, stationary_ids)
    im0 = counter_3.start_counting(im0, tracks, stationary_ids)
    im0 = counter_4.start_counting(im0, tracks, stationary_ids)
    
    # Write the frame to the output video
    video_writer.write(im0)

# Release resources
cap.release()
video_writer.release()
cv2.destroyAllWindows()

Line Counter Initiated.
Line Counter Initiated.
Line Counter Initiated.
Line Counter Initiated.

0: 640x384 1 person, 6 cars, 1 truck, 777.4ms
Speed: 1.2ms preprocess, 777.4ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 5 cars, 2 trucks, 745.8ms
Speed: 1.4ms preprocess, 745.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 6 cars, 1 truck, 758.5ms
Speed: 1.5ms preprocess, 758.5ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 6 cars, 1 truck, 745.0ms
Speed: 1.8ms preprocess, 745.0ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 6 cars, 1 truck, 753.1ms
Speed: 1.6ms preprocess, 753.1ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 6 cars, 1 truck, 837.3ms
Speed: 1.2ms preprocess, 837.3ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 7 cars, 1 t

KeyboardInterrupt: 

### Smoother

In [1]:
import supervision as sv
from ultralytics import YOLO
import cv2
import numpy as np

# Load YOLO model
model = YOLO("yolov9e.pt")
tracker = sv.ByteTrack()
frames_generator = sv.get_video_frames_generator("resources/741755_Kazakhstan Traffic Cars Road_By_Danil_Nevsky_Artlist_HD.mp4")
video_info = sv.VideoInfo.from_video_path("resources/741755_Kazakhstan Traffic Cars Road_By_Danil_Nevsky_Artlist_HD.mp4")

# Define the line for counting
start, end = sv.Point(x=50, y=300), sv.Point(x=1000, y=250)
line_zone = sv.LineZone(start=start, end=end)

# Initialize counters
crossed_count_in = 0
crossed_count_out = 0

smoother = sv.DetectionsSmoother()

bounding_box_annotator = sv.BoundingBoxAnnotator()

with sv.VideoSink("supervision_output.mp4", video_info=video_info) as sink:
    for frame in frames_generator:
        result = model(frame)[0]
        detections = sv.Detections.from_ultralytics(result)
        detections = tracker.update_with_detections(detections)
        detections = smoother.update_with_detections(detections)

        crossed_in, crossed_out = line_zone.trigger(detections)
        if np.any(crossed_in):
            crossed_count_in += np.sum(crossed_in)
        if np.any(crossed_out):
            crossed_count_out += np.sum(crossed_out)

        annotated_frame = bounding_box_annotator.annotate(frame.copy(), detections)

        # Annotate the line on the frame
        cv2.line(annotated_frame, (start.x, start.y), (end.x, end.y), (0, 0, 255), 2)  # Red line

        # Annotate counts on the frame
        cv2.putText(annotated_frame, f"Crossed In: {crossed_count_in}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(annotated_frame, f"Crossed Out: {crossed_count_out}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        sink.write_frame(annotated_frame)



0: 384x640 14 cars, 1 traffic light, 1 potted plant, 792.3ms
Speed: 2.0ms preprocess, 792.3ms inference, 309.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 cars, 1 traffic light, 1 potted plant, 641.0ms
Speed: 1.9ms preprocess, 641.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 cars, 1 traffic light, 1 potted plant, 638.0ms
Speed: 1.6ms preprocess, 638.0ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 cars, 1 traffic light, 1 potted plant, 698.2ms
Speed: 1.7ms preprocess, 698.2ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 13 cars, 1 traffic light, 1 potted plant, 654.8ms
Speed: 2.0ms preprocess, 654.8ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 13 cars, 1 traffic light, 1 potted plant, 638.1ms
Speed: 1.5ms preprocess, 638.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 14 cars, 1 traffic light, 