In [1]:
import cv2
from ultralytics import YOLO
import numpy as np

# Initialize YOLO model
model = YOLO("yolo11n.pt")

# Load video
video_path = 'empty bottle.mp4'
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Output video writer
output_path = 'output-empty-bottle-detection.mp4'
out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

# Counting parameters
bottle_count = 0
line_position = int(frame_width * 0.9)  # Red line at 90% width
tracked_objects = {}  # Stores object ID, centroid, and state
next_id = 1
frame_persistence = {}  # To prevent ID switching

# Function to calculate Euclidean distance
def euclidean_distance(pt1, pt2):
    return np.sqrt((pt1[0] - pt2[0])**2 + (pt1[1] - pt2[1])**2)

print("Processing video...")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = model.predict(frame, conf=0.5)
    bottle_detections = [det for det in results[0].boxes if int(det.cls) == 39]

    current_objects = {}

    for det in bottle_detections:
        x1, y1, x2, y2 = map(int, det.xyxy[0])
        center_x = (x1 + x2) // 2
        center_y = (y1 + y2) // 2

        # Try to match with existing tracked objects
        matched_id = None
        for obj_id, (prev_center, crossed) in tracked_objects.items():
            if euclidean_distance((center_x, center_y), prev_center) < 50:  # Distance threshold
                matched_id = obj_id
                break

        # If no match, assign a new ID (but check persistence buffer)
        if matched_id is None:
            if any(euclidean_distance((center_x, center_y), prev_center) < 50 for prev_center in frame_persistence.values()):
                continue  # Avoid counting the same bottle again if it appears quickly
            matched_id = next_id
            next_id += 1

        # Update object tracking
        crossed_before = tracked_objects.get(matched_id, (None, False))[1]
        current_objects[matched_id] = ((center_x, center_y), crossed_before)

        # Store for persistence check
        frame_persistence[matched_id] = (center_x, center_y)

        # Draw bounding box and ID
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.circle(frame, (center_x, center_y), 5, (255, 0, 0), -1)
        cv2.putText(frame, f"ID: {matched_id}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # Bottle counting logic (Only count once when moving past the line)
        if center_x >= line_position and not crossed_before:
            bottle_count += 1
            current_objects[matched_id] = ((center_x, center_y), True)  # Mark as counted

    tracked_objects = current_objects  # Update tracked objects

    # Draw red counting line
    cv2.line(frame, (line_position, 0), (line_position, frame_height), (0, 0, 255), 2)

    # Display bottle count
    cv2.putText(frame, f"Bottle Count: {bottle_count}", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 3)

    # Show and save frame
    cv2.imshow('Bottle Detection with Counter', frame)
    out.write(frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
out.release()
cv2.destroyAllWindows()

print(f"Detection completed. Output saved to '{output_path}'")
print(f"Total bottles counted: {bottle_count}")


Processing video...

0: 384x640 5 bottles, 130.4ms
Speed: 8.8ms preprocess, 130.4ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 bottles, 117.4ms
Speed: 14.6ms preprocess, 117.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 bottles, 111.4ms
Speed: 10.0ms preprocess, 111.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 bottles, 104.0ms
Speed: 2.0ms preprocess, 104.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 bottles, 122.8ms
Speed: 3.0ms preprocess, 122.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 bottles, 101.2ms
Speed: 4.0ms preprocess, 101.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 bottles, 101.3ms
Speed: 4.0ms preprocess, 101.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 bottles, 134.9ms
Speed: 3.0ms preprocess, 134.9ms inference, 