In [12]:
import cv2
import time
import json
from ultralytics import YOLO

# Set the video file path directly
VIDEO_PATH = r"C:\Users\nishi\store-analytics\yolov8\20250118_123057.mp4"  # 🔹 Change this to your actual video file

# Load the YOLO model
model = YOLO('yolov8n.pt')

# Open the video file
cap = cv2.VideoCapture(VIDEO_PATH)

# List to store centroids and timestamps
data = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Perform inference with stream=True
    results = model(frame, stream=True)

    # Get the current timestamp
    timestamp = time.time()

    # List to store centroids for the current frame
    centroids = []

    # Filter results to only show people (class 0 in COCO dataset)
    for result in results:
        for detection in result.boxes:
            if detection.cls == 0:  # Class 0 is 'person' in COCO dataset
                bbox = detection.xyxy[0].cpu().numpy().astype(int)
                centroid = [int((bbox[0] + bbox[2]) // 2), int((bbox[1] + bbox[3]) // 2)]
                centroids.append(centroid)
                cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)

    # Append centroids and timestamp to the data list
    if centroids:
        data.append({
            "centroids": centroids,
            "time": timestamp
        })

    # Display the frame
    cv2.imshow('Frame', frame)

    # Break the loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture object and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()

# Write the data to a JSON file
with open('output.json', 'w') as f:
    json.dump(data, f, indent=4)


0: 640x384 12 persons, 1 bottle, 1 chair, 1 dining table, 2 laptops, 114.4ms
Speed: 4.0ms preprocess, 114.4ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 10 persons, 1 bottle, 1 chair, 1 dining table, 2 laptops, 111.1ms
Speed: 2.0ms preprocess, 111.1ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 10 persons, 1 bottle, 2 cups, 3 chairs, 1 dining table, 4 laptops, 110.8ms
Speed: 3.0ms preprocess, 110.8ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 12 persons, 1 bottle, 1 cup, 2 chairs, 1 dining table, 4 laptops, 99.9ms
Speed: 3.0ms preprocess, 99.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 10 persons, 2 bottles, 3 chairs, 1 dining table, 5 laptops, 94.8ms
Speed: 2.5ms preprocess, 94.8ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 12 persons, 2 bottles, 1 cup, 4 chairs, 1 dining table, 5 laptops, 1 cell phone, 98.5ms
Speed