<a href="https://colab.research.google.com/github/Jasonhcwong/face-recognition-system/blob/main/process_video_file.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Clone the repo and install required library

In [None]:
!git clone https://github.com/Jasonhcwong/face-recognition-system.git
%cd face-recognition-system

!pip install mtcnn

# Import required library

In [2]:
import os
import cv2
import numpy as np
import tensorflow as tf
import face_preprocess

from mtcnn import MTCNN

# Helper functions

In [3]:
def euclidian_distance(embeddings1, embeddings2):
    embeddings1 = embeddings1/np.linalg.norm(embeddings1, axis=0, keepdims=True)
    embeddings2 = embeddings2/np.linalg.norm(embeddings2, axis=0, keepdims=True)
    dist = np.sqrt(np.sum(np.square(np.subtract(embeddings1, embeddings2))))
    return dist

def calc_confidence(distance, threshold=1.2333):
    return 0 if distance >= threshold else (1.0-distance/threshold)

def compare_faces(embeddings1, embeddings2):
    dist = euclidian_distance(embeddings1, embeddings2)
    conf = calc_confidence(dist)
    return dist, conf

def find_nearest_person(embedding, face_db, threshold):
    min_distance = 999
    person = "Unknown"
    for embd, name in face_db:
        distance, confidence = compare_faces(embd, embedding)
        if distance < threshold and distance < min_distance:
            min_distance = distance
            person = name
    return (person, min_distance)

# Initialize MTCNN, load face embedding model and create known-faces database

In [None]:
##### initialize mtcnn
detector = MTCNN()
mtcnn_min_conf = 0.9

##### initialize face recognition model (saved_model)
ckpt_dir = './ckpt/epoch_13'
train_model = tf.keras.models.load_model(ckpt_dir)
model = tf.keras.Model(train_model.get_layer('input').input, train_model.get_layer('embedding').output, trainable=False)
#model.summary()

# load known faces from folder
known_faces_mtcnn_dir = './known_faces_mtcnn/'
known_face_db = []
for root, subdirectories, _ in os.walk(known_faces_mtcnn_dir):
    for person in subdirectories:
        print('processing person: ' + person)
        for subroot, _, files in os.walk(os.path.join(root, person)):
            for file in files:
                full_path =  os.path.join(subroot, file)
                print('processing file: ' + full_path)
                img = cv2.imread(full_path)
                img = cv2.resize(img, (112, 112))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = img - 127.5
                img = img * 0.0078125
                img_tensor = np.expand_dims(img, axis=0)
                #print(img_tensor.shape)
                result = model.predict(img_tensor, batch_size=1, verbose=0)
                known_face_db.append((result.flatten(), person))
                #known_face_db.append((result, person))

#print(known_face_db)

# Process video file and run face recognition

In [None]:
# read video and start processing loop
# the video used in this repo comes from https://www.youtube.com/watch?v=R32qWdOWrTo
# it can be downloaded using a Youtube downloader
video_input_file = './nosedive.mp4'
video_output_file = "./output.mp4"

vidcap = cv2.VideoCapture(video_input_file)
vidout = cv2.VideoWriter(video_output_file,cv2.VideoWriter_fourcc(*'MPEG'),30,(1920,1080))

success,img = vidcap.read()
count = 0
while success:
    print("processing frame: " + str(count))

    # BGR2RGB
    img_with_dets = img.copy()
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # MTCNN
    detections = detector.detect_faces(img)

    for det in detections:
        if det['confidence'] >= mtcnn_min_conf:
            x, y, width, height = det['box']
            keypoints = det['keypoints']
            cv2.rectangle(img_with_dets, (x,y), (x+width,y+height), (0,155,255), 2)
            #cv2.circle(img_with_dets, (keypoints['left_eye']), 2, (0,155,255), 2)
            #cv2.circle(img_with_dets, (keypoints['right_eye']), 2, (0,155,255), 2)
            #cv2.circle(img_with_dets, (keypoints['nose']), 2, (0,155,255), 2)
            #cv2.circle(img_with_dets, (keypoints['mouth_left']), 2, (0,155,255), 2)
            #cv2.circle(img_with_dets, (keypoints['mouth_right']), 2, (0,155,255), 2)
            
            # pre-process the face image
            bbox = det["box"]
            bbox = np.array([bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]])
            landmarks = det["keypoints"]

            landmarks = np.array([landmarks["left_eye"][0], landmarks["right_eye"][0], landmarks["nose"][0], landmarks["mouth_left"][0], landmarks["mouth_right"][0],
                landmarks["left_eye"][1], landmarks["right_eye"][1], landmarks["nose"][1], landmarks["mouth_left"][1], landmarks["mouth_right"][1]])
            landmarks = landmarks.reshape((2,5)).T
            nimg = face_preprocess.preprocess(img, bbox, landmarks, image_size='112,112')
            
            # calculate face embedding
            nimg = nimg - 127.5
            nimg = nimg * 0.0078125
            nimg_tensor = np.expand_dims(nimg, axis=0)
            result = model.predict(nimg_tensor, batch_size=1, verbose=0)
            embedding = result.flatten()
            
            # find nearest face in db
            dist_threshold = 1.0
            name, dist = find_nearest_person(embedding, known_face_db, dist_threshold)
            # TODO: set threshold for distance
            if dist < dist_threshold:
                img_with_dets = cv2.putText(img_with_dets, name, (x, y+height+50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255),2)
                img_with_dets = cv2.putText(img_with_dets, str(dist), (x, y+height+100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255),2)


    # write video frame
    vidout.write(img_with_dets)
    #cv2.imwrite("out_{}.jpg".format(str(count)), img_with_dets)
       
    success,img = vidcap.read()
    count += 1

vidcap.release()
vidout.release()

print('Done processing frames from file!')

# play a video file
from IPython.display import HTML
from base64 import b64encode
 
def show_video(video_path, video_width = 600):
   
  video_file = open(video_path, "r+b").read()
 
  video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
  return HTML(f"""<video width={video_width} controls><source src="{video_url}"></video>""")
 
show_video(video_output_file)