In [18]:
import cv2
import numpy as np
from ultralytics import YOLO


### function to print mouse coordinates while moving over frame to assist define ROI area
def RGB(event, x, y, flags, param):
    if event == cv2.EVENT_MOUSEMOVE:
        point = [x,y]
        print(point)

cv2.namedWindow('vid')
cv2.setMouseCallback('vid', RGB)


### Load yolo model
model = YOLO("models/yolo11n.pt")
names = model.names

### load the video source
cap = cv2.VideoCapture("media/vid_1.mp4")
#cap = cv2.VideoCapture(1)

### Define ROI area on the frame
ROI = np.array([(680,317), (220,334), (257,393), (801,382)], np.int32)

persons_counter = 0

count = 0
ref = 0
while True:
    # read frames if ret is true
    ret, frame = cap.read()
    if not ret:
        break
    
    # skip some frames
    count +=1
    if count-ref != 2:
        continue
    count, ref = 0, 0

    # resizing frames
    frame = cv2.resize(frame, (1000, 500))

    # run yolo on frames with persist tracking between frames
    results = model.track(frame, persist=True)
    people = len(results[0].boxes.id)
    
    #
    if results[0].boxes is not None and results[0].boxes.id is not None:
        bboxes = results[0].boxes.xyxy.int().cpu().tolist()
        class_ids = results[0].boxes.cls.int().cpu().tolist()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        confidences  =results[0].boxes.conf.cpu().tolist()

        for bbox, class_id, track_id, conf in zip(bboxes, class_ids, track_ids, confidences):
            class_name = names[class_id]
            if class_name in ["person"]:
                    
                x1, y1, x2, y2 = bbox
                xc = (x1+x2)//2
                yc = (y1+y2)//2

                inside = cv2.pointPolygonTest(contour=ROI, pt=((xc,yc)), measureDist=False)
                if inside == 1:
                    persons_counter +=1

                    cv2.circle(frame, center=(xc,yc), radius=3, color=(255,255,0), thickness=-1)
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
                    cv2.putText(frame, f'{class_name}', (x1, y1-2), 1, 1, (0,255,0))

        cv2.putText(frame, f'people = {persons_counter}', (50,50), 2, 1, (0,0,255))
        persons_counter = 0

        
        cv2.polylines(frame, [ROI], True, (255,0,255), 2)

        cv2.imshow('vid', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


0: 320x640 21 persons, 60.0ms
Speed: 2.0ms preprocess, 60.0ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)
[157, 42]

0: 320x640 21 persons, 55.5ms
Speed: 1.0ms preprocess, 55.5ms inference, 0.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 20 persons, 48.4ms
Speed: 1.7ms preprocess, 48.4ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 20 persons, 46.5ms
Speed: 1.0ms preprocess, 46.5ms inference, 0.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 19 persons, 43.3ms
Speed: 1.0ms preprocess, 43.3ms inference, 0.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 20 persons, 43.7ms
Speed: 2.0ms preprocess, 43.7ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 19 persons, 44.5ms
Speed: 1.0ms preprocess, 44.5ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 20 persons, 45.7ms
Speed: 1.0ms preprocess, 45.7ms inference, 0.0ms postprocess pe

In [7]:
print(len(results[0].boxes.id))

16
