# Face Detection - YOLOv8n
# Face Embedding and Recognition - FaceNet

In [None]:
# Installation
!pip install deepface
!pip install ultralytics

In [5]:
# Import libraries
import os
import cv2
import numpy as np
from ultralytics import YOLO
from deepface import DeepFace
from google.colab import drive
import matplotlib.pyplot as plt

In [6]:
# mount drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [7]:
# Get videos info
total = 0
for i in range(1,3):
    input_video = f'/content/drive/MyDrive/Videos/Vid{i}.mp4'
    cap = cv2.VideoCapture(input_video)  # input video
    fps = cap.get(cv2.CAP_PROP_FPS)  # input video FPS
    W = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    H = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_number_of_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print(f'Vid{i}:' ,W,H,fps,total_number_of_frames)
    total += total_number_of_frames
print(total)

Vid1: 1280 720 13.057954427218812 251
Vid2: 1280 720 11.926205536601675 7687
7938


### Extract Face Embedings and Matching

> First read the video and detect the faces in the first frame,
> save them in FaceBank,

> For next frames check the similarity with the prevoius frame
> if the Face matched so no action is needed                        

> If the Face didnt match, then check the similarity with the FaceBank,
> ---> If matched with FaceBank, no action is needed,
> ---> If didnt matched, that is a new Face and should be save in the facceBank, and gets an ID

> **Hint: For final code we should Save several faces area for each person and Tune the Threshold parameter! Fine-Tuning Detection Model can Enhance the Perfomance as well.**

In [8]:
# This function compute cosine distance between two vectors
#(cosine distance = 1 - cosine similarity)
def comp_cosine_distance(source_representation, test_representation):

    a = np.dot(source_representation, test_representation)
    b = np.linalg.norm(source_representation)
    c = np.linalg.norm(test_representation)
    return 1 - a / (b * c)

In [9]:
def show_face_bank(face_bank):
    for i, face in enumerate(face_bank):
        plt.imshow(cv2.cvtColor(face, cv2.COLOR_BGR2RGB))
        plt.title(f'ID: {i}')
        plt.axis('off')
        plt.figure()

In [11]:
# The main function: Face Detection (YOLOv8n-face) --> Face Embedding (FaceNet)
# --> Check Similarity (cosine similarity) --> Face Matching.

def find_unique_persons(thresh, video_path):

    # Load YOLO face detection model
    model = YOLO("/content/drive/MyDrive/yolov8n-face.pt", task = 'pose')

    # Open the video file
    cap = cv2.VideoCapture(video_path)

    frame_counter = 0

    # FaceBank list
    face_bank = []
    results_list = []
    # Loop through the video frames
    while cap.isOpened():

        # Read a frame from the video
        success, frame = cap.read()

        if success:
            # Run YOLOv8 inference on the frame
            frame_counter += 1
            results = model(frame)
            results_list.append(results[0])
        else:
            # Break the loop if the end of the video is reached
            break


        # save the faces in facebank correpond to the FIRST FRAME
        if frame_counter == 1:

          # loop through detected faces
          for face_coords in results[0].boxes.xywh:
              x,y,w,h = np.array(face_coords).astype(int)
              face_area = frame[y-h//2:y+h//2,x-w//2:x+w//2,:]
              face_bank.append(face_area)

          # save the current frame for similarity check with next frame
          prev_frame = np.copy(frame)

        else:
          # A list for save current frame face area
          current_frame_face_areas = []

          # loop through detected faces
          for face_coords in results[0].boxes.xywh:
              x,y,w,h = np.array(face_coords).astype(int)
              face_area = frame[y-h//2:y+h//2,x-w//2:x+w//2,:]
              current_frame_face_areas.append(face_area)


          # extract results for prev Frame
          prev_results = model(prev_frame)
          prev_frame_face_areas = []
          # loop through detected faces
          for face_coords in prev_results[0].boxes.xywh:
              x,y,w,h = np.array(face_coords).astype(int)
              face_area = prev_frame[y-h//2:y+h//2,x-w//2:x+w//2,:]
              prev_frame_face_areas.append(face_area)


          # check similarity between current frame and the previous one/facebank
          # loop through the  current faces
          for face_area in current_frame_face_areas:

              match_flag = False
              face_area_embedding_objs = DeepFace.represent(
                  img_path = cv2.resize(face_area, (160,160)),
                  model_name ='Facenet', enforce_detection=False)
              face_area_embedding = face_area_embedding_objs[0]['embedding']

              # check similarity with prev frame
              for prev_face_area in prev_frame_face_areas:

                  prev_embedding_objs = DeepFace.represent(
                      img_path = cv2.resize(prev_face_area, (160,160)),
                      model_name ='Facenet', enforce_detection=False)
                  prev_face_area_embedding = prev_embedding_objs[0]['embedding']

                  # compute cosine distance
                  dist = comp_cosine_distance(
                      face_area_embedding, prev_face_area_embedding)

                  if dist<= thresh: # it means the face is matched
                      match_flag = True
                      break


              # check similarity with face bank
              # it means the face is not matched with prevframe
              # so should be check with FaceBank
              if match_flag == False:
                  for saved_face in face_bank:

                      saved_embedding_objs = DeepFace.represent(
                          img_path = cv2.resize(saved_face, (160,160)),
                          model_name ='Facenet', enforce_detection=False)
                      saved_face_area_embedding = saved_embedding_objs[0]['embedding']

                      # compute cosine distance
                      dist = comp_cosine_distance(
                          face_area_embedding, saved_face_area_embedding)

                      # it means the face is matched with facebank
                      if dist<= thresh:
                        match_flag = True
                        break

              # New Face is Recognised!
              # it means the face is not matched even with the facebank
              # so its a new face and should be added to facebank
              if match_flag == False:
                  face_bank.append(face_area)

          # save the current frame for similarity check with next frame
          prev_frame = np.copy(frame)

          if frame_counter%50 == 0:
              print('*****************',frame_counter)
              print('*****************',len(face_bank))


    # Release the video capture object
    cap.release()

    return face_bank

In [1]:
face_bank_vid1 = find_unique_persons(
    thresh=0.3, video_path="/content/drive/MyDrive/Videos/Vid1.mp4")
print(len(face_bank_vid1))
show_face_bank(face_bank_vid1)

In [2]:
face_bank_vid2 = find_unique_persons(
    thresh=0.3, video_path="/content/drive/MyDrive/Videos/Vid2.mp4")
print(len(face_bank_vid2))
show_face_bank(face_bank_vid2)

## Problems:
1- Recognize a person as different ones!

2- False Positive and False Negative Detections!
### Primary Solutions
- Set Threshold properly
 - Vid1 Results:
    -  thresh = 0.15 >>> 29 persons
    -  thresh = 0.3 >>> 8 persons
 - Vid2 Results:
    -  thresh = 0.15 >>>
    -  thresh = 0.3 >>>
- Save several faces area of a person (TODO)
- Fine-Tuning Face Detection Model (TODO)