In [None]:
from facenet_pytorch import MTCNN
import torch
import cv2
import time

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Create the model
mtcnn = MTCNN(keep_all=True, device=device)

# Load the video and go from frame to frame
capture = cv2.VideoCapture(
    "face-demographics-walking-and-pause.mp4"
)

print("Control taken from frame difference test and passed to facial detection.")
frame_number = 0

fps_limit = 15
# no. of frames for which the compuatation will be performed per second.
start_time = 0

while True:

    now_time = time.time() - start_time
    ret, frame = capture.read()

    if now_time > 1.0 / fps_limit:
        start_time = time.time()  # reset the time

        if ret:
            frame = cv2.resize(frame, (600, 400))

            # Here we are going to use the facenet detector
            boxes, conf = mtcnn.detect(frame)
            # conf is a list of the confidence values of each detection
            # box is a list of four tuples where each of the tuples
            # contain the x,y,width,height of a box that contains the detected face

            #    print('\n boxes: ', boxes)
            #    print('\n conf: ', conf)
            # Define a confidence threshold:
            confidence_threshold = 0.95
            multiple_faces_detected = False
            no_face_detected = False
            n_faces = 0

            for i in conf:

                if i == None:
                    i = 0
                if i > confidence_threshold:
                    n_faces += 1

            if n_faces > 1:
                multiple_faces = True
            else:
                multiple_faces = False

            if n_faces == 0:
                no_face_detected = True

            if multiple_faces == True:
                print(
                    str(n_faces)
                    + " faces detected! "
                    + "for frame: "
                    + str(frame_number)
                    + " whose timestamp is: ",
                    str(capture.get(cv2.CAP_PROP_POS_MSEC)),
                )

            if no_face_detected == True:
                print(
                    "No face detected!"
                    + "for frame: "
                    + str(frame_number)
                    + " whose timestamp is: ",
                    str(capture.get(cv2.CAP_PROP_POS_MSEC)),
                )

            frame_number += 1

            n_faces_str = "No. of faces = " + str(n_faces)
            frame_number_str = "Frame No. = " + str(frame_number)
            time_stamp_str = "Timestamp = " + str(capture.get(cv2.CAP_PROP_POS_MSEC))

            if conf[0] != None:
                for (x, y, w, h) in boxes:
                    text = f"{conf[0]*100:.2f}%"
                    x, y, w, h = int(x), int(y), int(w), int(h)

                    cv2.putText(
                        frame,
                        text,
                        (x, y - 20),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.5,
                        (0, 255, 0),
                        1,
                    )

                    cv2.rectangle(frame, (x, y), (w, h), (0, 255, 0), 1)

            cv2.putText(
                frame,
                frame_number_str,
                (0, 15),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5,
                (0, 255, 0),
                1,
            )

            cv2.putText(
                frame,
                time_stamp_str,
                (0, 30),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5,
                (0, 255, 0),
                1,
            )

            cv2.putText(
                frame,
                n_faces_str,
                (0, 45),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5,
                (0, 255, 0),
                1,
            )

        else:
            break

    # For displaying images/frames
    cv2.imshow("Frame", frame)
    if cv2.waitKey(25) & 0xFF == ord("q"):
        break

capture.release()
cv2.destroyAllWindows()

print(
    "Control given away from face detection to facial detection to facial recogntion."
)
