In [1]:
import cv2
from ultralytics import YOLO
from collections import defaultdict

In [2]:
model = YOLO('yolo11l.pt')

In [3]:
class_list = model.names
class_list

{0: 'person',
 1: 'bicycle',
 2: 'car',
 3: 'motorcycle',
 4: 'airplane',
 5: 'bus',
 6: 'train',
 7: 'truck',
 8: 'boat',
 9: 'traffic light',
 10: 'fire hydrant',
 11: 'stop sign',
 12: 'parking meter',
 13: 'bench',
 14: 'bird',
 15: 'cat',
 16: 'dog',
 17: 'horse',
 18: 'sheep',
 19: 'cow',
 20: 'elephant',
 21: 'bear',
 22: 'zebra',
 23: 'giraffe',
 24: 'backpack',
 25: 'umbrella',
 26: 'handbag',
 27: 'tie',
 28: 'suitcase',
 29: 'frisbee',
 30: 'skis',
 31: 'snowboard',
 32: 'sports ball',
 33: 'kite',
 34: 'baseball bat',
 35: 'baseball glove',
 36: 'skateboard',
 37: 'surfboard',
 38: 'tennis racket',
 39: 'bottle',
 40: 'wine glass',
 41: 'cup',
 42: 'fork',
 43: 'knife',
 44: 'spoon',
 45: 'bowl',
 46: 'banana',
 47: 'apple',
 48: 'sandwich',
 49: 'orange',
 50: 'broccoli',
 51: 'carrot',
 52: 'hot dog',
 53: 'pizza',
 54: 'donut',
 55: 'cake',
 56: 'chair',
 57: 'couch',
 58: 'potted plant',
 59: 'bed',
 60: 'dining table',
 61: 'toilet',
 62: 'tv',
 63: 'laptop',
 64: 'mou

In [4]:
cap = cv2.VideoCapture('test_videos/4.mp4')

In [5]:
line_y_red = 430  # Red line position

In [6]:
class_counts = defaultdict(int)

In [7]:
crossed_ids = set()

In [8]:
frame_count = 0

In [9]:
# Confidence threshold for filtering weak detections
CONFIDENCE_THRESHOLD = 0.5  

In [10]:
# Speed estimation parameters
PIXEL_TO_METER_RATIO = 0.05  # Example: 1 pixel = 0.05 meters
FRAME_RATE = cap.get(cv2.CAP_PROP_FPS)
prev_positions = {}

In [11]:
# FPS Calculation
prev_time = 0

In [12]:
# Ground truth (replace with actual values if available)
y_true = []  # Ground truth labels
y_pred = []  # Model predictions

In [13]:
import time
import numpy as np
from sklearn.metrics import precision_score, recall_score, accuracy_score,f1_score

In [14]:
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    frame_count += 1

    # FPS calculation
    current_time = time.time()
    fps = 1 / (current_time - prev_time)
    prev_time = current_time

    # YOLO inference with tracking
    results = model.track(frame, persist=True, classes=[1,2,3,5,6,7])

    if results[0].boxes.data is not None:
        boxes = results[0].boxes.xyxy.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        class_indices = results[0].boxes.cls.int().cpu().tolist()
        confidences = results[0].boxes.conf.cpu()

        cv2.line(frame, (690, line_y_red), (1130, line_y_red), (0, 0, 255), 3)

        for box, track_id, class_idx, conf in zip(boxes, track_ids, class_indices, confidences):
            if conf < CONFIDENCE_THRESHOLD:
                continue  # Skip low-confidence detections

            x1, y1, x2, y2 = map(int, box)
            cx = (x1 + x2) // 2  # Center point
            cy = (y1 + y2) // 2  

            class_name = class_list[class_idx]

            cv2.circle(frame, (cx, cy), 4, (0, 0, 255), -1)
            cv2.putText(frame, f"ID: {track_id} {class_name} {conf:.2f}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) 

            # Track vehicle speed
            if track_id in prev_positions:
                prev_cx, prev_cy = prev_positions[track_id]
                distance_pixels = np.sqrt((cx - prev_cx) ** 2 + (cy - prev_cy) ** 2)
                distance_meters = distance_pixels * PIXEL_TO_METER_RATIO
                speed_mps = distance_meters * FRAME_RATE  # Speed in meters per second
                speed_kph = speed_mps * 3.6  # Convert to km/h

                cv2.putText(frame, f"Speed: {speed_kph:.1f} km/h", (x1, y2 + 20),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)

            prev_positions[track_id] = (cx, cy)

            # Vehicle Counting
            if cy > line_y_red and track_id not in crossed_ids:
                crossed_ids.add(track_id)
                class_counts[class_name] += 1
                y_pred.append(class_name)  # Add detected class for evaluation

        # Display vehicle count
        y_offset = 30
        for class_name, count in class_counts.items():
            cv2.putText(frame, f"{class_name}: {count}", (50, y_offset),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
            y_offset += 30

    # Display FPS
    cv2.putText(frame, f"FPS: {fps:.2f}", (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)

    # Show frame
    cv2.imshow("YOLO Object Tracking & Counting", frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Precision & Recall Calculation
if y_true and y_pred:  # Only calculate if ground truth is available
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    print(f"Precision: {precision:.2f}, Recall: {recall:.2f}")

# Release resources
cap.release()
cv2.destroyAllWindows()


0: 384x640 10 cars, 3 motorcycles, 3 buss, 1 truck, 298.9ms
Speed: 4.5ms preprocess, 298.9ms inference, 8.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 3 motorcycles, 3 buss, 1 truck, 374.6ms
Speed: 1.7ms preprocess, 374.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 3 motorcycles, 3 buss, 1 truck, 219.9ms
Speed: 1.2ms preprocess, 219.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 3 motorcycles, 3 buss, 1 truck, 190.3ms
Speed: 1.2ms preprocess, 190.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 4 motorcycles, 3 buss, 1 truck, 193.2ms
Speed: 1.4ms preprocess, 193.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)



2025-02-14 21:33:43.947 Python[71191:14543291] +[IMKClient subclass]: chose IMKClient_Modern
2025-02-14 21:33:43.947 Python[71191:14543291] +[IMKInputSession subclass]: chose IMKInputSession_Modern


0: 384x640 10 cars, 4 motorcycles, 3 buss, 1 truck, 206.9ms
Speed: 1.2ms preprocess, 206.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 cars, 4 motorcycles, 3 buss, 2 trucks, 199.9ms
Speed: 1.7ms preprocess, 199.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 cars, 4 motorcycles, 3 buss, 2 trucks, 243.7ms
Speed: 1.4ms preprocess, 243.7ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 cars, 3 motorcycles, 3 buss, 2 trucks, 188.6ms
Speed: 1.4ms preprocess, 188.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 cars, 4 motorcycles, 3 buss, 2 trucks, 206.6ms
Speed: 1.3ms preprocess, 206.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 4 motorcycles, 3 buss, 1 truck, 248.3ms
Speed: 1.6ms preprocess, 248.3ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 3 motorcycles, 3 buss, 2 t