In [None]:
import numpy as np
import cv2
from ultralytics import YOLO
from IPython.display import clear_output


def mouseXY(event, x, y, flags, param):
   if event == cv2.EVENT_MOUSEMOVE:
    clear_output(wait=True)
    print([x,y])

def capture_ent(frame, x1, y1, x2, y2,track_id, foot):
    person_img = frame[y1:y2, x1:x2]
    cv2.imshow("person", person_img)
    imgName = f"images/entery/ID_{track_id}_{foot[0]}-{foot[1]}.jpg"
    cv2.imwrite(filename=imgName, img=person_img)

def capture_ext(frame, x1, y1, x2, y2,track_id, foot):
    person_img = frame[y1:y2, x1:x2]
    cv2.imshow("person", person_img)
    imgName = f"images/exit/ID_{track_id}_{foot[0]}-{foot[1]}.jpg"
    cv2.imwrite(filename=imgName, img=person_img)

cv2.namedWindow(winname="vid")
cv2.setMouseCallback(window_name="vid", on_mouse=mouseXY)


### Load yolo model
model = YOLO("models/yolo11n.pt")
classes_names = model.names

### Load video/camera
cap = cv2.VideoCapture("media/vid_1.mp4")

### Define ROI areas elements
# areas countour
areaIn = np.array([(379, 549), (329, 554), (561, 671), (616, 661)], np.int32)
areaOut = np.array([(316, 556), (278, 560), (477, 682), (541, 676)], np.int32)

# objects inside ROI areas
areaOut_list = []
areaIn_list = []
entry_list = []
exit_list = []

### Run the video frames
frames_count = 0
while True:
    # read frames if ret is true
    ret, frame = cap.read()
    if not ret:
        break
    
    # skip some frames
    frames_count +=1
    if frames_count != 3:
        continue
    frames_count = 0

    # resizing frames
    frame = cv2.resize(frame, (1200, 700))

    # run yolo on frames with persist tracking between frames
    objects = model.track(frame, persist=True)

    # test existing bounding boxes
    if objects[0].boxes is not None and objects[0].boxes.id is not None:
        bboxes = objects[0].boxes.xyxy.int().cpu().tolist()
        class_ids = objects[0].boxes.cls.int().cpu().tolist()
        track_ids = objects[0].boxes.id.int().cpu().tolist()
        confidences = objects[0].boxes.conf.cpu().tolist()

        # define bbox,class,track_id,confidence of each object in the frame
        for bbox, class_id, track_id, conf in zip(bboxes, class_ids, track_ids, confidences):
            class_name = classes_names[class_id]
            if class_name in ["person"]:
                    
                x1, y1, x2, y2 = bbox

                ### Define oject keypoints
                # Bottom left corner
                corner_BL = (x1, y2)
                # Bottom Riight corner
                corner_BR = (x2, y2)

                foot = ((x1+x2)//2, y2)
                
                ### test if the objects keypoints are inside areaOut/areaIn areas
                isInside_areaOut_ent = cv2.pointPolygonTest(contour=areaOut, pt=(corner_BL), measureDist=False)
                isInside_areaIn_ent = cv2.pointPolygonTest(contour=areaIn, pt=(corner_BL), measureDist=False)
                isInside_areaOut_ext = cv2.pointPolygonTest(contour=areaOut, pt=(corner_BR), measureDist=False)
                isInside_areaIn_ext = cv2.pointPolygonTest(contour=areaIn, pt=(corner_BR), measureDist=False)

                # Entering detection
                if (isInside_areaOut_ent >= 0) and (track_id not in areaOut_list):
                        areaOut_list.append(track_id)
                        #capture_ent(frame, x1, y1, x2, y2,track_id, foot)
                if (track_id in areaOut_list) and (isInside_areaIn_ent >= 0) and (track_id not in entry_list):
                    entry_list.append(track_id)
                    capture_ent(frame, x1, y1, x2, y2,track_id, foot)
                    if track_id in exit_list:
                        exit_list.remove(track_id)

                # Exiting detection
                if (isInside_areaIn_ext >= 0) and (track_id not in areaIn_list):
                        areaIn_list.append(track_id)
                        #capture_ext(frame, x1, y1, x2, y2,track_id, foot)
                if (track_id in areaIn_list) and (isInside_areaOut_ext >= 0) and (track_id not in exit_list):
                    exit_list.append(track_id)
                    capture_ext(frame, x1, y1, x2, y2,track_id, foot)
                    if track_id in entry_list:
                        entry_list.remove(track_id)

                ### Draw object keypoints -> CG/top/bottom
                # bottom left corner
                cv2.circle(img=frame, center=foot, radius=3, color=(0,0,255), thickness=-1)
                # object bbox
                cv2.rectangle(img=frame, pt1=(x1, y1), pt2=(x2, y2), color=(0,255,0), thickness=1)
                # object class name
                txt1 = f'{class_name}'
                cv2.putText(img=frame, text=txt1, org=(x1, y1-2), fontFace=1, fontScale=0.9,thickness=2, color=(0,255,255))
                # object track id
                txt2 = f"{track_id}"
                cv2.rectangle(img=frame, pt1=(x2-20, y1), pt2=(x2, y1+20), color=(255,255,255), thickness=-1)
                cv2.putText(img=frame, text=txt2, org=(x2-17, y1+15), fontFace=1, fontScale=0.9,thickness=1, color=(0,0,255))                    
                    

        ### Drawing ROI areas on the frame
        cv2.polylines(img=frame, pts=[areaIn], isClosed=True, color=(255,0,255), thickness=2)
        cv2.polylines(img=frame, pts=[areaOut], isClosed=True, color=(255,0,255), thickness=2)

        # Display people In/Out counters on the frame
        txt3 = f"People In = {len(entry_list)}"
        txt4 = f"People Out = {len(exit_list)}"
        cv2.rectangle(img=frame, pt1=(30, 20), pt2=(320, 120), color=(255,255,255), thickness=-1)
        cv2.putText(img=frame, text=txt3, org=(50, 50), fontFace=2, fontScale=1,thickness=1, color=(0,0,255))
        cv2.putText(img=frame, text=txt4, org=(50, 100), fontFace=2, fontScale=1,thickness=1, color=(0,0,255))

        ### Run the media frames
        cv2.imshow('vid', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        clear_output(wait=True)




cap.release()
cv2.destroyAllWindows()


0: 384x640 1 banana, 60.3ms
Speed: 1.5ms preprocess, 60.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
