In [1]:
from ultralytics import YOLO
import cv2
import time

# Load YOLO model and get class names
model = YOLO("yolov8n.pt")
names = model.model.names

# Video source
video_path = "LeftObject_1.avi"
cap = cv2.VideoCapture(video_path)
assert cap.isOpened(), "Error reading video file"
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

# Video writer to save output
output_path = "distance_calculation.avi"
video_writer = cv2.VideoWriter(output_path,
                               cv2.VideoWriter_fourcc(*'mp4v'),
                               fps,
                               (w, h))

# Custom function to check if two bounding boxes overlap
def boxes_overlap(box1, box2):
    x1_1, y1_1, x2_1, y2_1 = box1
    x1_2, y1_2, x2_2, y2_2 = box2
    return not (x2_1 < x1_2 or x1_1 > x2_2 or y2_1 < y1_2 or y1_1 > y2_2)

# Timer variables
timer_started = False
start_time = None

while cap.isOpened():
    success, im0 = cap.read()
    if not success:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    # Perform object detection/tracking
    results = model(im0)

    # Prepare lists for person and bag bounding boxes
    person_boxes = []
    bag_boxes = []

    if results[0].boxes:
        # Get bounding boxes and classify them
        for box in results[0].boxes:
            cls_idx = int(box.cls)  # Class index
            bbox = box.xyxy[0].tolist()  # Get bounding box coordinates
            if names[cls_idx] == 'person':
                person_boxes.append(bbox)
            elif names[cls_idx] in ('suitcase', 'handbag', 'backpack', 'briefcase'):
                bag_boxes.append(bbox)

    # Initialize variables for object counts
    num_people = len(person_boxes)
    num_bags = len(bag_boxes)

    # Calculate distance between overlapping person and bag bounding boxes
    for person_box in person_boxes:
        for bag_box in bag_boxes:
            if boxes_overlap(person_box, bag_box):
                # Calculate Euclidean distance between centers of boxes
                person_center = ((person_box[0] + person_box[2]) / 2, (person_box[1] + person_box[3]) / 2)
                bag_center = ((bag_box[0] + bag_box[2]) / 2, (bag_box[1] + bag_box[3]) / 2)
                distance_cm = ((person_center[0] - bag_center[0]) ** 2 + (person_center[1] - bag_center[1]) ** 2) ** 0.5 * 100  # Convert to cm

                if distance_cm > 10:
                    if not timer_started:
                        timer_started = True
                        start_time = time.time()
                    else:
                        elapsed_time = time.time() - start_time
                        cv2.putText(im0, f"Timer: {int(elapsed_time)} s", (w - 150, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

                # Optional: draw a line between the boxes
                cv2.line(im0, (int(person_center[0]), int(person_center[1])), (int(bag_center[0]), int(bag_center[1])), (0, 255, 0), 2)

    # Display object counts
    cv2.putText(im0, f"People: {num_people}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    cv2.putText(im0, f"Bags: {num_bags}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # Display the frame
    cv2.imshow('Frame', im0)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    video_writer.write(im0)

# Release resources
cap.release()
video_writer.release()
cv2.destroyAllWindows()



0: 384x640 3 persons, 168.6ms
Speed: 8.0ms preprocess, 168.6ms inference, 1751.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 171.3ms
Speed: 3.0ms preprocess, 171.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 200.0ms
Speed: 3.2ms preprocess, 200.0ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 1 handbag, 204.6ms
Speed: 4.0ms preprocess, 204.6ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 190.7ms
Speed: 2.9ms preprocess, 190.7ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 184.0ms
Speed: 2.9ms preprocess, 184.0ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 1 handbag, 175.5ms
Speed: 4.1ms preprocess, 175.5ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 184.8ms
Speed: 3.4ms preprocess, 184.8ms inferenc

In [2]:
cap.release()
video_writer.release()
cv2.destroyAllWindows()