This code published under 24 hour hackathon<br>
Author : **Pawan Jain**

## Load Libraries

In [22]:
import warnings
warnings.filterwarnings("ignore")

from keras.models import Sequential
from keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import MaxPooling2D, AveragePooling2D
from keras.layers.merge import Concatenate
from keras.layers.core import Lambda, Flatten, Dense
from keras.initializers import glorot_uniform
from keras.engine.topology import Layer
from keras import backend as K
from keras.models import load_model
K.set_image_data_format('channels_first')

import time
import h5py
import pickle
import cv2
import os.path
import os
import numpy as np
from numpy import genfromtxt #data to string and than string to datatype
import pandas as pd
import tensorflow as tf
from utility import *
from webcam_utility import *
np.set_printoptions(threshold=np.nan) #determine way floating point numbers, arrays are displayed.

## Model
The model makes an encoding vector consisting of 128 numbers for the input image. Two encodings are compared and if the two encodings are similar then we say that the two images are of the same person otherwise they are different. 
The model uses **Triplet loss function**. The aim is to minimize this function.

In [2]:
def triplet_loss(y_true, y_pred, alpha = 0.2):
    anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]
    
    # triplet formula components
    pos_dist = tf.reduce_sum( tf.square(tf.subtract(y_pred[0], y_pred[1])) )
    neg_dist = tf.reduce_sum( tf.square(tf.subtract(y_pred[0], y_pred[2])) )
    basic_loss = pos_dist - neg_dist + alpha
    
    loss = tf.maximum(basic_loss, 0.0)
   
    return loss

### Loading the Model
The model outputs a vector of 128 numbers which represent encoding for the given input image. We will be using this encoding vector for comparing two images.

In [3]:
# load the model
FRmodel = load_model('models/model.h5', custom_objects={'triplet_loss': triplet_loss})

### Face detection part

In [4]:
def detect_face(database, model):
    save_loc = r'saved_image/1.jpg' 
    capture_obj = cv2.VideoCapture(0)
    capture_obj.set(3, 640)  # WIDTH
    capture_obj.set(4, 480)  # HEIGHT

    #classifier to detect object
    face_cascade = cv2.CascadeClassifier(r'haarcascades/haarcascade_frontalface_default.xml') 
    
    # whether there was any face found or not
    face_found = False

    # run the webcam for given seconds
    req_sec = 3
    loop_start = time.time()
    elapsed = 0

    while(True):
        curr_time = time.time()
        elapsed = curr_time - loop_start
        if elapsed >= req_sec:
            break

        # capture_object frame-by-frame
        ret, frame = capture_obj.read()
        # mirror the frame
        frame = cv2.flip(frame, 1, 0)

        # Our operations on the frame come here
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        # detect face
        faces = face_cascade.detectMultiScale(gray, 1.3, 5)

        # Display the resulting frame
        for (x, y, w, h) in faces:
            # required region for the face
            roi_color = frame[y-90:y+h+70, x-50:x+w+50]
            # save the detected face
            cv2.imwrite(save_loc, roi_color)
            # draw a rectangle bounding the face
            cv2.rectangle(frame, (x-10, y-70),(x+w+20, y+h+40), (15, 175, 61), 4)
            
        # display the frame with bounding rectangle
        cv2.imshow('frame', frame)

        # close the webcam when 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # release the capture_object
    capture_obj.release()
    cv2.destroyAllWindows()

    img = cv2.imread(save_loc)
    if img is not None:
        face_found = True
    else:
        face_found = False

    return face_found

In [6]:
def detect_face_realtime(database, model, threshold=0.7):
    text = ''
    font = cv2.FONT_HERSHEY_SIMPLEX
    save_loc = r'saved_image/1.jpg'
    capture_obj = cv2.VideoCapture(0)
    capture_obj.set(3, 640)  # WIDTH
    capture_obj.set(4, 480)  # HEIGHT

    face_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_default.xml')
    print('**************** Enter "q" to quit **********************')
    prev_time = time.time()
    while(True):

        # capture_object frame-by-frame
        ret, frame = capture_obj.read()
        # mirror the frame
        frame = cv2.flip(frame, 1, 0)

        # Our operations on the frame come here
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        # detect face
        faces = face_cascade.detectMultiScale(gray, 1.3, 5)

        # Display the resulting frame
        for (x, y, w, h) in faces:
            # required region for the face
            roi_color = frame[y-90:y+h+70, x-50:x+w+50]

            # save the detected face
            cv2.imwrite(save_loc, roi_color)

            # keeps track of waiting time for face recognition
            curr_time = time.time()

            if curr_time - prev_time >= 3:
                img = cv2.imread(save_loc)
                if img is not None:
                    resize_img(save_loc)

                    min_dist, identity, registered = find_face_realtime(
                        save_loc, database, model, threshold)

                    if min_dist <= threshold and registered:
                        # for putting text overlay on webcam feed
                        text = 'You Found' + identity
                        print('You Fount ' + identity + '!')
                    else:
                        text = 'Unknown user'
                        print('Unknown user' + ' detected !')
                    print('distance:' + str(min_dist))
                # save the time when the last face recognition task was done
                prev_time = time.time()

            # draw a rectangle bounding the face
            cv2.rectangle(frame, (x-10, y-70),
                          (x+w+20, y+h+40), (15, 175, 61), 4)
            cv2.putText(frame, text, (50, 50), font, 1.8, (158, 11, 40), 3)

        # display the frame with bounding rectangle
        cv2.imshow('frame', frame)

        # close the webcam when 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # release the capture_object
    capture_obj.release()
    cv2.destroyAllWindows()

In [7]:
# checks whether the input face is a registered user or not
def find_face_realtime(image_path, database, model, threshold):
    # find the face encodings for the input image
    encoding = img_to_encoding(image_path, model)
    registered = False
    min_dist = 99999
    identity = 'Unknown Person'
    # loop over all the recorded encodings in database
    for name in database:
        # find the similarity between the input encodings and claimed person's encodings using L2 norm
        dist = np.linalg.norm(np.subtract(database[name], encoding))
        # check if minimum distance or not
        if dist < min_dist:
            min_dist = dist
            identity = name

    if min_dist > threshold:
        registered = False
    else:
        registered = True
    return min_dist, identity, registered

In [8]:
def img_to_encoding(image_path, model):
    img1 = cv2.imread(image_path, 1)
    img = img1[...,::-1]
    img = np.around(np.transpose(img, (2,0,1))/255.0, decimals=12)
    x_train = np.array([img])
    embedding = model.predict_on_batch(x_train)
    return embedding

In [9]:
# loads and resizes an image
def resize_img(image_path):
    img = cv2.imread(image_path, 1)
    img = cv2.resize(img, (96, 96))
    cv2.imwrite(image_path, img)

### Add or delete user 

We will create a database of registered. For this we will use a simple dictionary and map each registered user with his/her face encoding.

In [13]:
# we use a dict for keeping track of ampping of each person with his/her face encoding
user_db = ini_user_database()

In [10]:
# initialize the user database
def ini_user_database():
    # check for existing database
    if os.path.exists('database/user_dict.pickle'):
        with open('database/user_dict.pickle', 'rb') as handle:
            user_db = pickle.load(handle)   
    else:
        # make a new one
        # we use a dict for keeping track of mapping of each person with his/her face encoding
        user_db = {}
     
    return user_db

In [11]:
# adds a new user face to the database using his/her image stored on disk using the image path
def add_user_img_path(user_db, FRmodel, name, img_path):
    if name not in user_db: 
        user_db[name] = img_to_encoding(img_path, FRmodel)
        # save the database
        with open('database/user_dict.pickle', 'wb') as handle:
                pickle.dump(user_db, handle, protocol=pickle.HIGHEST_PROTOCOL)
        print('User ' + name + ' added successfully')
    else:
        print('The name is already registered! Try a different name.........')

In [12]:
# deletes a registered user from database
def delete_user(user_db, name):
    popped = user_db.pop(name, None)
    
    if popped is not None:
        print('User ' + name + ' deleted successfully')
        # save the database
        with open('database/user_dict.pickle', 'wb') as handle:
                pickle.dump(user_db, handle, protocol=pickle.HIGHEST_PROTOCOL)
    elif popped == None:
        print('No such user !!')

### Putting everything together
For making this face recognition system we are going to take the input image, find its encoding and then see if there is any similar encoding in the database or not. We define a threshold value to decide whether the two images are similar or not based on the similarity of their encodings.

In [14]:
# recognize the input user face encoding by checking for it in the database
def find_face(image_path, database, model, threshold = 0.6):
    # find the face encodings for the input image
    encoding = img_to_encoding(image_path, model)
    
    min_dist = 99999
    # loop over all the recorded encodings in database 
    for name in database:
        # find the similarity between the input encodings and claimed person's encodings using L2 norm
        dist = np.linalg.norm(np.subtract(database[name], encoding) )
        # check if minimum distance or not
        if dist < min_dist:
            min_dist = dist
            identity = name

    if min_dist > threshold:
        print("User not in the database.")
        identity = 'Unknown Person'
    else:
        print ("Hi! " + str(identity) + ", L2 distance: " + str(min_dist))
        
    return min_dist, identity

In [17]:
# takes an input image and performs face recognition on it
def do_face_recognition(user_db, FRmodel, threshold, save_loc):
    # we can use the webcam to capture the user image then get it recognized
    face_found = detect_face(user_db, FRmodel)

    if face_found:
        resize_img(save_loc)
        find_face(save_loc, user_db, FRmodel, threshold)
    else:
        print('There was no face found in the visible frame. Try again...........')

### User Manual

**To add image via path** : add_user_img_path(user_db, FRmodel, "Pj", "saved_image/1.jpg") <br>
**To delete user data** : delete_user(user_db, "Pawan")<br>
**To recognise face** : do_face_recognition(user_db, FRmodel, 0.6, "image location : saved_image/1.jpg")<br>
**To detect face** : detect_face_realtime(user_db, FRmodel, threshold = 0.7)

### Simple GUI 

In [21]:
from tkinter import *
root = Tk()

topFrame = Frame(root)
topFrame.pack()
bottomFrame = Frame(root)
bottomFrame.pack(side=BOTTOM)

button1 = Button(topFrame, text="Recognise", command = lambda:do_face_recognition(user_db, FRmodel, 0.6, "saved_image/1.jpg"))
button2 = Button(topFrame, text="Detect", command= lambda:detect_face_realtime(user_db, FRmodel, threshold = 0.7))
button3 = Button(topFrame, text="Add new missing", command=lambda:add_user_img_path(user_db, FRmodel, "Pj", "saved_image/1.jpg") )

button1.pack(padx=5, pady=10,side=BOTTOM)
button2.pack(padx=5, pady=10,side=BOTTOM)
button3.pack(padx=5, pady=10,side=BOTTOM)

root.mainloop()

### Further Work
<ul><li> Add some more convenient method to input image data
    <li> Form for user data in tkinter gui
    <li> Add graphs of loss, cross entropy etc.

### References:
- Convolutional Neural Networks Specialization by Deeplearning.ai on Coursera.
https://www.coursera.org/learn/convolutional-neural-networks/home/welcome 
- Florian Schroff, Dmitry Kalenichenko, James Philbin (2015). [FaceNet: A Unified Embedding for Face Recognition and Clustering](https://arxiv.org/pdf/1503.03832.pdf)
- The pretrained model used is inspired by Victor Sy Wang's implementation and was loaded using his code: https://github.com/iwantooxxoox/Keras-OpenFace.
- A lot of explanation from http://llcao.net/cu-deeplearning17/pp/class10_FaceNet.pdf by Florian Schroff, Dmitry Kalenichenko, James Philbin