In [1]:
import cv2
from ultralytics import YOLO

In [2]:
from collections import defaultdict

In [3]:
model = YOLO('yolo11l.pt')

In [4]:
class_list = model.names
class_list

{0: 'person',
 1: 'bicycle',
 2: 'car',
 3: 'motorcycle',
 4: 'airplane',
 5: 'bus',
 6: 'train',
 7: 'truck',
 8: 'boat',
 9: 'traffic light',
 10: 'fire hydrant',
 11: 'stop sign',
 12: 'parking meter',
 13: 'bench',
 14: 'bird',
 15: 'cat',
 16: 'dog',
 17: 'horse',
 18: 'sheep',
 19: 'cow',
 20: 'elephant',
 21: 'bear',
 22: 'zebra',
 23: 'giraffe',
 24: 'backpack',
 25: 'umbrella',
 26: 'handbag',
 27: 'tie',
 28: 'suitcase',
 29: 'frisbee',
 30: 'skis',
 31: 'snowboard',
 32: 'sports ball',
 33: 'kite',
 34: 'baseball bat',
 35: 'baseball glove',
 36: 'skateboard',
 37: 'surfboard',
 38: 'tennis racket',
 39: 'bottle',
 40: 'wine glass',
 41: 'cup',
 42: 'fork',
 43: 'knife',
 44: 'spoon',
 45: 'bowl',
 46: 'banana',
 47: 'apple',
 48: 'sandwich',
 49: 'orange',
 50: 'broccoli',
 51: 'carrot',
 52: 'hot dog',
 53: 'pizza',
 54: 'donut',
 55: 'cake',
 56: 'chair',
 57: 'couch',
 58: 'potted plant',
 59: 'bed',
 60: 'dining table',
 61: 'toilet',
 62: 'tv',
 63: 'laptop',
 64: 'mou

In [18]:
cap = cv2.VideoCapture('4.mp4')
line_y_red = 430 #Red line position

# Dictionary to store objects by class
class_counts = defaultdict(int)

# Dictionary to keep track of object IDs that have crossed the line
crossed_ids = set()

while cap.isOpened():
    r, frame = cap.read()
    if not r:
        break

    results = model.track(frame, persist = True, classes = [1,2,3,5,6,7])
    #print(results)

    if results[0].boxes.data is not None:
        boxes = results[0].boxes.xyxy.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        class_indices = results[0].boxes.cls.int().cpu().tolist()
        confidences = results[0].boxes.conf.cpu()

        cv2.line(frame, (690, line_y_red), (1130, line_y_red), (0,0,255), 3)
       # cv2.putText(frame, "Red Line", (690, line_y_red-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_8)

    # Loop through each detected object
    for box, track_id, class_idx, conf in zip(boxes, track_ids, class_indices, confidences):
        x1, y1, x2, y2 = map(int, box)

        # Calculate the center point
        cx = (x1 + x2) // 2
        cy = (y1 + y2) // 2

        class_name = class_list[class_idx]

        cv2.circle(frame, (cx,cy), 4, (0,0,255), -1)
        
        cv2.putText(frame, f"ID: {track_id} {class_name}", (x1, y1-10), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,255),2)
        cv2.rectangle(frame, (x1,y1), (x2,y2), (0,255,0), 2)

        # Check if the object has crossed the red line
        if cy > line_y_red and track_id not in crossed_ids:
            # Mark the object as crossed
            crossed_ids.add(track_id)
            class_counts[class_name] += 1        

    # Display the counts on the frame
    y_offset = 30
    for class_name, count in class_counts.items():
        cv2.putText(frame, f"{class_name}: {count}", (50, y_offset),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2)
        y_offset += 30
    
    cv2.imshow("YOLOY Object Tracking & Counting", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
    


0: 384x640 1 car, 2 buss, 320.1ms
Speed: 3.3ms preprocess, 320.1ms inference, 128.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 cars, 2 motorcycles, 2 buss, 441.7ms
Speed: 2.0ms preprocess, 441.7ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 cars, 3 motorcycles, 2 buss, 339.1ms
Speed: 2.0ms preprocess, 339.1ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 cars, 3 motorcycles, 2 buss, 1 truck, 352.4ms
Speed: 2.5ms preprocess, 352.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 cars, 4 motorcycles, 2 buss, 1 truck, 307.6ms
Speed: 1.0ms preprocess, 307.6ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 cars, 4 motorcycles, 2 buss, 1 truck, 312.1ms
Speed: 2.0ms preprocess, 312.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 cars, 4 motorcycles, 2 buss, 2 trucks, 325.3ms
Speed: 1.5ms preprocess, 325.3ms