In [1]:
import cv2
from ultralytics import YOLO
from collections import defaultdict

# Load the YOLO model
model = YOLO('yolo11s.pt')

class_list = model.names

cap = cv2.VideoCapture('test_videos/4.mp4')

frame_count = 0  # Inicializa contador

line_y_red = 530

class_counts = defaultdict(int)

crossed_ids = set()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    #frame = cv2.resize(frame, (640, 360))  # Reduz resolução para acelerar

    frame_count += 1
    if frame_count % 3 != 0:
        continue  # Pula este frame (apenas processa os pares)

    # Run YOLO tracking on the frame
    results = model.track(frame, persist=True, classes = [2, 3, 5, 7], conf=0.4)  # conf=0.4 ajuda a acelerar também

    # Ensure results are not empty
    if results[0].boxes.data is not None:
        boxes = results[0].boxes.xyxy.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        class_indices = results[0].boxes.cls.int().cpu().tolist()
        confidences = results[0].boxes.conf.cpu()

        cv2.line(frame, (690, line_y_red), (1530, line_y_red), (0, 0 ,255), 3)
        cv2.putText(frame, 'Red Line', (690, line_y_red-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

        for box, track_id, class_idx, conf in zip(boxes, track_ids, class_indices, confidences):
            x1, y1, x2, y2 = map(int, box)
            cx = (x1 + x2) // 2
            cy = (y1 + y2) // 2            
            
            class_name = class_list[class_idx]

            cv2.circle(frame, (cx, cy), 4, (0, 0, 255), -1)

            cv2.putText(frame, f"ID: {track_id} {class_name}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)


            if cy > line_y_red and track_id not in crossed_ids:
                crossed_ids.add(track_id)
                class_counts[class_name] += 1

        y_offset = 30
        for class_name, count in class_counts.items():
            cv2.putText(frame, f"{class_name}: {count}", (50, y_offset),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
            y_offset +=30
            

    # Show the frame
    cv2.imshow("YOLO Object Tracking & Counting", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 384x640 3 cars, 2 buss, 250.2ms
Speed: 10.4ms preprocess, 250.2ms inference, 16.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 2 buss, 143.5ms
Speed: 2.2ms preprocess, 143.5ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 2 buss, 151.4ms
Speed: 2.0ms preprocess, 151.4ms inference, 3.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 2 buss, 144.3ms
Speed: 2.7ms preprocess, 144.3ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 cars, 2 buss, 135.1ms
Speed: 1.9ms preprocess, 135.1ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 2 buss, 139.5ms
Speed: 1.8ms preprocess, 139.5ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 2 motorcycles, 2 buss, 148.0ms
Speed: 2.2ms preprocess, 148.0ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 2 motorcycles, 2 buss, 14