#### 1. Real-time object counting and tracking using BotSort algorithm

1. detect and track objects in a video
2. put (tracker)/ a circle in the centre of each detected object
3. put a line on the frame to count


#### 1.1 import required modules and load the YOLO11 large model


In [1]:
import os
import cv2
from ultralytics import YOLO
from collections import defaultdict

# Load the YOLO model
model = YOLO('yolo11l.pt')

#### 1.2 load the class names


In [2]:
class_list = model.names 
class_list

{0: 'person',
 1: 'bicycle',
 2: 'car',
 3: 'motorcycle',
 4: 'airplane',
 5: 'bus',
 6: 'train',
 7: 'truck',
 8: 'boat',
 9: 'traffic light',
 10: 'fire hydrant',
 11: 'stop sign',
 12: 'parking meter',
 13: 'bench',
 14: 'bird',
 15: 'cat',
 16: 'dog',
 17: 'horse',
 18: 'sheep',
 19: 'cow',
 20: 'elephant',
 21: 'bear',
 22: 'zebra',
 23: 'giraffe',
 24: 'backpack',
 25: 'umbrella',
 26: 'handbag',
 27: 'tie',
 28: 'suitcase',
 29: 'frisbee',
 30: 'skis',
 31: 'snowboard',
 32: 'sports ball',
 33: 'kite',
 34: 'baseball bat',
 35: 'baseball glove',
 36: 'skateboard',
 37: 'surfboard',
 38: 'tennis racket',
 39: 'bottle',
 40: 'wine glass',
 41: 'cup',
 42: 'fork',
 43: 'knife',
 44: 'spoon',
 45: 'bowl',
 46: 'banana',
 47: 'apple',
 48: 'sandwich',
 49: 'orange',
 50: 'broccoli',
 51: 'carrot',
 52: 'hot dog',
 53: 'pizza',
 54: 'donut',
 55: 'cake',
 56: 'chair',
 57: 'couch',
 58: 'potted plant',
 59: 'bed',
 60: 'dining table',
 61: 'toilet',
 62: 'tv',
 63: 'laptop',
 64: 'mou

#### 1.3 load the video file


In [3]:
video_path = 'test_videos/4.mp4'

# Check if the file exists and is not empty
if not os.path.exists(video_path) or os.path.getsize(video_path) == 0:
    print(f"Error: The file '{video_path}' does not exist or is empty.")
else:
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Unable to open the video file '{video_path}'.")
    else:
        print(f"Successfully opened the video file '{video_path}'.")

Successfully opened the video file 'test_videos/4.mp4'.


In [4]:
output_video_path = 'test_videos/output.avi'
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, 
                                        cv2.CAP_PROP_FRAME_HEIGHT, 
                                        cv2.CAP_PROP_FPS))
video_writer = cv2.VideoWriter(output_video_path, 
                                cv2.VideoWriter_fourcc(*"mp4v"), 
                                fps, (w, h))

In [None]:
line_y_red = 430 # line position

# Dictionary to store object counts by class
class_counts = defaultdict(int)

# Dictionary to keep track of object IDs that have crossed the line
crossed_ids = set()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    # Run YOLO tracking on the frame
    results = model.track(frame, persist=True, classes=[0, 2, 3]) #, conf=0.5, iou=0.5, device='mps', show=True) # class_ids=[0, 1, 2, 3, 4, 5] for coco dataset
    # print(results)

        # Ensure results are not empty
    if results[0].boxes.data is not None:
        # Get the detected boxes, their class indices, and track IDs
        boxes = results[0].boxes.xyxy.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        class_indices = results[0].boxes.cls.int().cpu().tolist()
        confidences = results[0].boxes.conf.cpu()

        cv2.line(frame, (690, line_y_red), (1130, line_y_red), (0, 0, 255), 2)  # Draw the red line on the frame
        # cv2.putText(frame, 'Red Line', (690, line_y_red - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)

     # Loop through each detected object
        for box, track_id, class_idx, conf in zip(boxes, track_ids, class_indices, confidences):
            x1, y1, x2, y2 = map(int, box)
            # calculate the centre point of the bounding box
            cx = int((x1 + x2) / 2)
            cy = int((y1 + y2) / 2)
            
            class_name = class_list[class_idx]  # Get the class name from the index
            cv2.circle(frame, (cx, cy), 3, (0, 0, 255), -1)  # Draw the center point
            cv2.putText(frame, f"ID: {track_id} {class_name}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) 

            # Check if the object has crossed the red line
            if cy > line_y_red and track_id not in crossed_ids:
                # Mark the object as crossed
                crossed_ids.add(track_id)
                class_counts[class_name] += 1

        # Display the counts on the frame
        y_offset = 30
        for class_name, count in class_counts.items():
            cv2.putText(frame, f"{class_name}: {count}", (w-200, y_offset),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
            y_offset += 30

        video_writer.write(frame) # write frame to output video
    #show the frame
    cv2.imshow('YOLO object tracking and counting', frame)

    # exit loop if 'q' id pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
# release the video capture object and close all OpenCV windows
cap.release()  
cv2.destroyAllWindows() 


0: 384x640 6 persons, 10 cars, 3 motorcycles, 851.6ms
Speed: 2.9ms preprocess, 851.6ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 10 cars, 3 motorcycles, 343.1ms
Speed: 3.4ms preprocess, 343.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 10 cars, 3 motorcycles, 343.2ms
Speed: 1.4ms preprocess, 343.2ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 10 cars, 3 motorcycles, 520.3ms
Speed: 1.4ms preprocess, 520.3ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 10 cars, 4 motorcycles, 673.0ms
Speed: 1.7ms preprocess, 673.0ms inference, 3.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 10 cars, 4 motorcycles, 735.3ms
Speed: 3.9ms preprocess, 735.3ms inference, 3.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 9 cars, 4 motorcycles, 548.8ms
Speed: 2.2ms preprocess, 548

In [6]:
# # logic - if you want to filter out persons sitting on motorbikes or bicycles.

# import cv2
# import numpy as np

# def filter_pedestrians(detections):
#     persons = [d for d in detections if d['class'] == 'person']
#     motorbikes = [d for d in detections if d['class'] == 'motorbike']
    
#     pedestrians = []
    
#     for person in persons:
#         person_box = person['bbox']
#         is_pedestrian = True
        
#         for motorbike in motorbikes:
#             motorbike_box = motorbike['bbox']
            
#             # Check if the person is sitting on the motorbike
#             if is_person_on_motorbike(person_box, motorbike_box):
#                 is_pedestrian = False
#                 break
        
#         if is_pedestrian:
#             pedestrians.append(person)
    
#     return pedestrians

# def is_person_on_motorbike(person_box, motorbike_box):
#     # Define the criteria for a person sitting on a motorbike
#     # For simplicity, we check if the person box is within the motorbike box
#     px, py, pw, ph = person_box
#     mx, my, mw, mh = motorbike_box
    
#     if (px > mx and py > my and px + pw < mx + mw and py + ph < my + mh):
#         return True
#     return False

# # Example detections from YOLO
# detections = [
#     {'class': 'person', 'bbox': [50, 50, 100, 200]},
#     {'class': 'motorbike', 'bbox': [40, 40, 120, 220]},
#     {'class': 'person', 'bbox': [200, 200, 100, 200]},
# ]

# pedestrians = filter_pedestrians(detections)
# print("Pedestrians:", pedestrians)
