In [3]:
import cv2
from ultralytics import YOLO
import numpy as np

# Load YOLOv8 model from a local file
model = YOLO("yolov8n.pt")

# Initialize video capture (use your video file path here)
video_path = 'video.mp4'  # Change this to your video file path
cap = cv2.VideoCapture(video_path)

# Dictionary to keep track of objects by ID
tracked_objects = {}
object_count = {}

# Function to calculate the distance between two points
def distance(p1, p2):
    return np.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2)

# Threshold distance to consider an object as the same
DISTANCE_THRESHOLD = 50  # Adjust as needed

# Unique ID for each object
object_id_counter = 0

while True:
    # Read a frame from the video
    ret, frame = cap.read()
    if not ret:
        break

    # Perform inference
    results = model(frame)

    # Extract results
    boxes = results[0].boxes.xyxy.numpy()  # x1, y1, x2, y2
    confidences = results[0].boxes.conf.numpy()  # Confidence scores
    class_ids = results[0].boxes.cls.numpy().astype(int)  # Class IDs

    current_objects = []

    # Filter out low-confidence predictions
    for i, conf in enumerate(confidences):
        if conf > 0.5:  # Threshold for confidence
            class_id = class_ids[i]
            x1, y1, x2, y2 = boxes[i]
            center_x = (x1 + x2) / 2
            center_y = (y1 + y2) / 2
            current_objects.append((class_id, center_x, center_y))

            # Check if the object is already tracked
            tracked = False
            for obj_id, (tracked_class_id, tracked_coords) in tracked_objects.items():
                if tracked_class_id == class_id and distance((center_x, center_y), tracked_coords) < DISTANCE_THRESHOLD:
                    tracked = True
                    break

            # If not tracked, assign a new ID
            if not tracked:
                tracked_objects[object_id_counter] = (class_id, (center_x, center_y))
                object_name = model.names[class_id]
                
                # Only increment count if this is a new object
                if object_name not in object_count:
                    object_count[object_name] = 0
                object_count[object_name] += 1
                object_id_counter += 1

            # Draw bounding boxes and labels on the frame
            label = f'{model.names[class_id]}: {conf:.2f}'
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
            cv2.putText(frame, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 2)

    # Display total counts of distinct objects
    count_texts = [f'{name} = {count}' for name, count in object_count.items()]
    cv2.putText(frame, ', '.join(count_texts), (10, 30), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 2)

    # Show the frame
    cv2.imshow("Object Counting", frame)

    # Break the loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release video capture and close windows
cap.release()
cv2.destroyAllWindows()



0: 384x640 5 persons, 2 motorcycles, 2 buss, 1 truck, 62.5ms
Speed: 2.5ms preprocess, 62.5ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 2 motorcycles, 2 buss, 1 truck, 93.2ms
Speed: 1.8ms preprocess, 93.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 2 motorcycles, 2 buss, 1 truck, 116.7ms
Speed: 2.3ms preprocess, 116.7ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 1 motorcycle, 2 buss, 1 truck, 86.3ms
Speed: 2.1ms preprocess, 86.3ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 motorcycle, 2 buss, 78.5ms
Speed: 2.3ms preprocess, 78.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 1 car, 1 bus, 72.2ms
Speed: 2.3ms preprocess, 72.2ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 1 motorcycle, 2 buss, 71.2ms
Speed: 2.5ms preproce