In [1]:
import sys
from logging import CRITICAL, DEBUG

import cv2 as cv
import numpy as np

import utils.logger as logger
from camera.video_frame_initializer import initializer
from face_recognizer.face_recognizer import FaceRecognizer
from motion_detector.motion_detector import MotionDetector
from people_detector.people_detector import PeopleDetector
from utils.bbox_utils import crop_bboxes
from utils.logger import get_logger

logger.init_logger(DEBUG)

  from .autonotebook import tqdm as notebook_tqdm


## PROCESS VIDEO
*Here happens the magick*


In [2]:
from utils.view import view


def process_video(videos: list[str], motion_detector: MotionDetector, people_detector: PeopleDetector,
                  face_recognizer: FaceRecognizer, scale: float = 0.5,
                  early_stop=None, starts_from=0):
    """
    Processes the input video.
    Parameters:
        videos_path: Paths to the video files.
        scale: Scaling factor for resizing frames.
        overlap_threshold: Threshold for merging overlapping detections using IoU.
        area_threshold: Minimum area for detected bounding boxes.
        people_detector: YOLO model to detect people.
        early_stop: stop after n frames
    """
    controller = initializer(videos, timeout=-1, max_queue_size=100)
    controller.start()
    logger = get_logger(__name__)

    while early_stop is None:
        frames = controller.get_frames()

        if len(frames) == 0:
            continue

        for frame_id, frame in frames:
            # frame = cv.resize(frame, None, fx=scale, fy=scale, interpolation=cv.INTER_LINEAR)
            frame = cv.resize(frame, None, fx=scale, fy=scale, interpolation=cv.INTER_AREA)

            detections, merged_detections, frame = motion_detector(frame, draw=False)

            if not merged_detections:
                continue

            merged_detections = np.array(merged_detections)
            cropped_frames = crop_bboxes(frame, bboxes=merged_detections[:, :-1])  # a-la numpy

            detections = people_detector.detect_on_frames(cropped_frames)
            for detection in detections:
                probs, _, result = detection
                if len(probs) == 0:
                    continue

                detected_person_image = result.orig_img
                view(detected_person_image, winname='frame_person')

            # Face detection + recognition

                # Skip too small images
                if detected_person_image.shape[0] < face_recognizer.min_face_size or detected_person_image.shape[
                    1] < face_recognizer.min_face_size:
                    continue
    
                faces = face_recognizer.recognize_faces(detected_person_image)
                if len(faces) > 0:
                    logger.debug(f"Frame {frame_id}: {len(faces)} faces detected.")
                    view(detected_person_image, winname='frame_face')
                    for detected_face in faces:
                        if detected_face['label'] is not None:
                            logger.debug(f"Detected face: {detected_face['label']} with confidence {detected_face['confidence']}")
                        else:
                            logger.debug(f"Detected unrecognized face! 😭")

    else:
        logger.critical(f"Stopped due to early stopping condition.")

    cv.destroyAllWindows()

    if sys.platform == 'darwin':
        for _ in range(4):
            cv.waitKey(1)

    cv.destroyAllWindows()



## Main

In [3]:
yolosize = 'n'
yolo11 = PeopleDetector(f"yolo11{yolosize}.pt", verbose=False,)
yolo11.to('cpu')

overlap_threshold = 0.0005
area_threshold = 700
motion_detector = MotionDetector(area_threshold=area_threshold, overlap_threshold=overlap_threshold)

face_recognizer = FaceRecognizer(threshold=0.5)


In [4]:
videos = \
    """./datasets/WiseNET/wisenet_dataset/video_sets/set_1/video1_5.avi""".split('\n')

videos_path = [video.strip() for video in videos]

process_video(videos_path, motion_detector, yolo11, face_recognizer=face_recognizer,
              scale=0.5)  # , starts_from=3000, early_stop=3500)

13:01:04 [INFO] FrameController: starting
13:01:04 [INFO] VideoSource-video1_5.avi: starting video
13:01:04 [DEBUG] MotionDetector: number of detections: 1; number of merged detections: 1
13:01:05 [DEBUG] yolo11n.pt: 2 people found with accuracy 0.786, 0.614


  return torch._C._cuda_getDeviceCount() > 0


13:01:07 [DEBUG] FaceRecognizer: No faces enrolled in the system!
13:01:07 [DEBUG] MotionDetector: number of detections: 2; number of merged detections: 2
13:01:07 [DEBUG] yolo11n.pt: no person has been detected
13:01:07 [DEBUG] yolo11n.pt: no person has been detected
13:01:07 [DEBUG] MotionDetector: number of detections: 2; number of merged detections: 2
13:01:07 [DEBUG] yolo11n.pt: no person has been detected
13:01:07 [DEBUG] yolo11n.pt: no person has been detected
13:01:07 [DEBUG] MotionDetector: number of detections: 2; number of merged detections: 2
13:01:07 [DEBUG] yolo11n.pt: no person has been detected
13:01:07 [DEBUG] yolo11n.pt: no person has been detected
13:01:07 [DEBUG] MotionDetector: number of detections: 2; number of merged detections: 2
13:01:07 [DEBUG] yolo11n.pt: no person has been detected
13:01:07 [DEBUG] yolo11n.pt: no person has been detected
13:01:07 [DEBUG] MotionDetector: number of detections: 2; number of merged detections: 2
13:01:08 [DEBUG] yolo11n.pt: no p

KeyboardInterrupt: 

# People Detection TEST


Define the base functions for people detection. <br/>
It Takes as input an array of frames containing the motion detected by MOG2 above

In [None]:
# Load a model
model = PeopleDetector("./yolo11x.pt")  # load an official model, or use local path

In [None]:
def test_yolo11(video_path: str, scale: float = 0.5):
    # Open the video file
    cap = cv.VideoCapture(video_path)

    # Loop through the video frames
    while cap.isOpened():
        # Read a frame from the video
        success, frame = cap.read()

        if success:
            # Run YOLO inference on the frame
            # frame = cv.resize(frame, None, fx=0.5, fy=0.5, interpolation=cv.INTER_LINEAR)
            results = model(frame)

            # Visualize the results on the frame
            annotated_frame = results[0].plot()
            annotated_frame = cv.resize(annotated_frame, None, fx=scale, fy=scale, interpolation=cv.INTER_LINEAR)
            # Display the annotated frame
            cv.imshow("YOLO Inference", annotated_frame)

            # Break the loop if 'q' is pressed
            if cv.waitKey(1) & 0xFF == ord("q"):
                break
        else:
            # Break the loop if the end of the video is reached
            break

    # Release the video capture object and close the display window
    cap.release()
    cv.destroyAllWindows()

## Main test YOLO11