In [1]:
import sys

import cv2 as cv
import numpy as np

from motion_detector.motion_detector import MotionDetector
from people_detector.people_detector import PeopleDetector
from face_recognizer.face_recognizer import FaceRecognizer
from utils.bbox_utils import crop_bboxes
from utils.view import view

from PIL import Image

import utils.logger as logger
from logging import DEBUG, INFO, CRITICAL

logger.init_logger(CRITICAL)

  from .autonotebook import tqdm as notebook_tqdm


## PROCESS VIDEO
*Here happens the magick*


In [2]:
def process_video(video_path: str, motion_detector: MotionDetector, people_detector: PeopleDetector,
                  face_recognizer: FaceRecognizer, scale: float = 0.5,
                  early_stop=None, starts_from=0):
    """
    Processes the input video.
    Parameters:
        video_path: Path to the video file.
        scale: Scaling factor for resizing frames.
        overlap_threshold: Threshold for merging overlapping detections using IoU.
        area_threshold: Minimum area for detected bounding boxes.
        people_detector: YOLO model to detect people.
        early_stop: stop after n frames
    """
    capture = cv.VideoCapture(video_path)
    iterations = 0
    while early_stop is None or iterations < early_stop:
        iterations += 1
        ret, frame = capture.read()
        if iterations <= starts_from:
            print("skipping frame {}".format(iterations))
            continue
        if not ret:
            break

        # frame = cv.resize(frame, None, fx=scale, fy=scale, interpolation=cv.INTER_LINEAR)
        frame = cv.resize(frame, None, fx=scale, fy=scale, interpolation=cv.INTER_AREA)

        # detections, merged_detections, frame = mog2_movement_detection(frame, background_subtractor=back_sub, area_threshold=area_threshold,
        #                         overlap_threshold=overlap_threshold, draw=False)

        detections, merged_detections, frame = motion_detector(frame, draw=False)

        if not merged_detections:
            continue

        # drop area colum
        # cropped_frames = crop_bboxes(frame, bboxes=[md[:-1] for md in merged_detections])
        merged_detections = np.array(merged_detections)
        cropped_frames = crop_bboxes(frame, bboxes=merged_detections[:, :-1])  # a-la numpy

        # detect people on cropped frames

        # probs, bboxes, result = people_detector.detect(frame)
        # annotated_frame = result.plot()
        # view(annotated_frame)

        detections = people_detector.detect_on_frames(cropped_frames)
        for detection in detections:
            probs, _, result = detection
            if len(probs) == 0:
                continue
            detected_person_image = result.orig_img

            # Face detection + recognition

            # Skip too small images
            if detected_person_image.shape[0] < face_recognizer.min_face_size or detected_person_image.shape[
                1] < face_recognizer.min_face_size:
                continue

            faces = face_recognizer.recognize_faces(detected_person_image)
            if len(faces) > 0:
                print(f"Frame {iterations}: {len(faces)} faces detected.")

                for detected_face in faces:
                    if detected_face['label'] is not None:
                        print(f"Detected face: {detected_face['label']} with confidence {detected_face['confidence']}")
                    else:
                        print(f"Detected unrecognized face! 😭")

    else:
        print(f"Stopped after {iterations} frames due early stopping condition.")

    capture.release()
    cv.destroyAllWindows()

    if sys.platform == 'darwin':
        for _ in range(4):
            cv.waitKey(1)

    cv.destroyAllWindows()



## Main

In [3]:
yolosize = 'n'
yolo11 = PeopleDetector(f"yolo11{yolosize}.pt", verbose=False, )
yolo11.to('cpu')

overlap_threshold = 0.0005
area_threshold = 700
motion_detector = MotionDetector(area_threshold=area_threshold, overlap_threshold=overlap_threshold)

face_recognizer = FaceRecognizer(threshold=0.5)
face_5 = Image.open('demo_images/face_5.jpg')
face_recognizer.enroll_face(face_5, 'Michael Scott')

# probs, bboxes, _ = yolo11.detect("The-Office-HD-Background.jpg")
# print('confidence scores', probs)
# print('bboxes', bboxes)

In [4]:
video_path = 'datasets/new_video.mp4'
process_video(video_path, motion_detector, yolo11, face_recognizer=face_recognizer,
              scale=0.5)  # , starts_from=3000, early_stop=3500)

Frame 70: 1 faces detected.
Detected unrecognized face! 😭
Frame 73: 1 faces detected.
Detected unrecognized face! 😭
Frame 74: 1 faces detected.
Detected unrecognized face! 😭
Frame 75: 1 faces detected.
Detected unrecognized face! 😭
Frame 76: 1 faces detected.
Detected unrecognized face! 😭
Frame 77: 1 faces detected.
Detected unrecognized face! 😭
Frame 78: 1 faces detected.
Detected unrecognized face! 😭
Frame 79: 1 faces detected.
Detected unrecognized face! 😭
Frame 80: 1 faces detected.
Detected unrecognized face! 😭
Frame 81: 1 faces detected.
Detected unrecognized face! 😭
Frame 82: 1 faces detected.
Detected face: Marini with confidence 0.5553990006446838
Frame 83: 1 faces detected.
Detected unrecognized face! 😭
Frame 84: 1 faces detected.
Detected face: Marini with confidence 0.5688936114311218
Frame 85: 1 faces detected.
Detected unrecognized face! 😭
Frame 86: 1 faces detected.
Detected unrecognized face! 😭
Frame 87: 1 faces detected.
Detected unrecognized face! 😭
Frame 88: 1 faces 

KeyboardInterrupt: 

# People Detection TEST


Define the base functions for people detection. <br/>
It Takes as input an array of frames containing the motion detected by MOG2 above

In [None]:
# imports

In [None]:
# Load a model
model = PeopleDetector("./yolo11x.pt")  # load an official model, or use local path

In [None]:
def test_yolo11(video_path: str, scale: float = 0.5):
    # Open the video file
    cap = cv.VideoCapture(video_path)

    # Loop through the video frames
    while cap.isOpened():
        # Read a frame from the video
        success, frame = cap.read()

        if success:
            # Run YOLO inference on the frame
            # frame = cv.resize(frame, None, fx=0.5, fy=0.5, interpolation=cv.INTER_LINEAR)
            results = model(frame)

            # Visualize the results on the frame
            annotated_frame = results[0].plot()
            annotated_frame = cv.resize(annotated_frame, None, fx=scale, fy=scale, interpolation=cv.INTER_LINEAR)
            # Display the annotated frame
            cv.imshow("YOLO Inference", annotated_frame)

            # Break the loop if 'q' is pressed
            if cv.waitKey(1) & 0xFF == ord("q"):
                break
        else:
            # Break the loop if the end of the video is reached
            break

    # Release the video capture object and close the display window
    cap.release()
    cv.destroyAllWindows()

## Main test YOLO11

In [None]:
test_yolo11("datasets/SamsungGear360.mp4", scale=0.25)