In [1]:
#import libraries
import tensorflow as tf
import numpy as np
import cv2
import copy
import PIL
import pickle
import time 
import scipy
from scipy import stats
import matplotlib.pyplot as plt
import os
###################
################
from sklearn.decomposition import PCA
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d
from imageio import imread
from skimage.transform import resize
from scipy.spatial import distance
from keras.models import load_model

In [2]:
#load FaceNet model
FaceNet=tf.keras.models.load_model('facenet_keras.h5',compile=False)

#load trained YOLO model
config_path = "yolov4-tiny.cfg"
weights_path = "yolov4-tiny_2000.weights"
YOLO = cv2.dnn.readNetFromDarknet(config_path, weights_path)
ln = YOLO.getLayerNames()
ln = [ln[i - 1] for i in YOLO.getUnconnectedOutLayers()]

#specifiy the image size
image_size = 160


In [3]:
def prewhiten(x):
    #predefine some varabiles
    size = x.size
    axis = (0, 1, 2)
    
    #standardization
    mean = np.mean(x, axis=axis, keepdims=True)
    std = np.std(x, axis=axis, keepdims=True)
    std_adj = np.maximum(std, 1.0/np.sqrt(size))
    y = (x - mean) / std_adj
    return y

def l2_normalize(x, axis=-1, epsilon=1e-10):
    #l2 normalization
    output = x / np.sqrt(np.maximum(np.sum(np.square(x), axis=axis, keepdims=True), epsilon))
    return output

def load_and_align_images(img):
    #to fix the bug of no detection
    nms_boxes=[[0,0,0,0]]
    
    #empty the buffers
    boxes, confidences, class_ids,names, aligned_images = [], [], [], [], []

    #keep the original height and width, Caffe model require resizing to 300*300
    h, w = img.shape[:2]
    blob = cv2.dnn.blobFromImage(img, 1/255, (160, 160), swapRB=True, crop=False)

    #pass the image to the model
    YOLO.setInput(blob)

    #extract
    faces = YOLO.forward(ln)
    
    # loop over each of the layer outputs
    for output in faces:
        # loop over each of the object detections
        for detection in output:
            # extract the confidence (as a probability) the current object detection
            confidence = detection[5]
            # discard weak predictions by ensuring the detected
            # probability is greater than the minimum probability
            if confidence > CONFIDENCE:
                # scale the bounding box coordinates back relative to the
                # size of the image, keeping in mind that YOLO actually
                # returns the center (x, y)-coordinates of the bounding
                # box followed by the boxes' width and height
                box = detection[:4] * np.array([w, h, w, h])
                (centerX, centerY, width, height) = box.astype("int")

                # use the center (x, y)-coordinates to derive the top and
                # and left corner of the bounding box
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))
                x,y,width,height=abs(x)+2,abs(y)+2,abs(width)+2,abs(height)+2
                
                # update our list of bounding box coordinates, confidences
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                
    # perform the non maximum suppression given the scores defined before
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, SCORE_THRESHOLD, IOU_THRESHOLD)
    
    #loop over the suppressed boxes
    if(len(idxs)>0):
        nms_boxes=[]
        for i in idxs.flatten():
            x, y = boxes[i][0], boxes[i][1]
            w, h = boxes[i][2], boxes[i][3]
            #append new suppressed boxes to draw it
            nms_boxes.append([x, y, int(w), int(h)])
            face_cropped=img[y:y+h,x:x+w]
            aligned = resize(face_cropped, (image_size, image_size), mode='reflect')
            aligned_images.append(aligned)
    return np.array(aligned_images), nms_boxes

def calc_embs(img, batch_size=1):
    cropped_images, boxes=load_and_align_images(img)
    #to fix the bug of no detection
    if(cropped_images.shape[0]!=0):
        aligned_images = tf.image.per_image_standardization(cropped_images)
        embs = []
        #calculate the emeddings
        for start in range(0, len(aligned_images), batch_size):
            pd=FaceNet.predict_on_batch(aligned_images[start:start+batch_size])
            embs.append((tf.math.l2_normalize(pd, axis=-1, epsilon=1e-10)))
    else:
        embs=[np.zeros((128,1))]
    return embs, boxes

def calc_dist(img0, img1):
    #calculate the distances
    return distance.euclidean(img0, img1)

In [6]:
#pre definied varaiables
CONFIDENCE = 0.7
SCORE_THRESHOLD = 0.7
IOU_THRESHOLD = 0.6

#load ref embeddings
ref_embeddings=np.load('ref_embeddings.npy')

#load ref names dict
with open('ref_embeddings_names.pkl', 'rb') as f:
    ref_embeddings_names = pickle.load(f)

# define a video capture object
vid = cv2.VideoCapture(0)

#define a timer to measure the performance
start_time=time.time()

while(True):
    # Capture the video frame by frame    
    ret, frame = vid.read()
    
    #calculate the embeddings and boxes
    embeddings,boxes=calc_embs(frame)
    
    #loop over every embedding to calculate the distances
    for i,embedding in enumerate(embeddings):
        distances=[]
        for j in range(len(ref_embeddings)):
            dist=calc_dist(embedding,ref_embeddings[j])
            distances.append(dist)
    
        #choose the minimum distance and user
        thersold_value=0.9
        indexes=np.argmin(distances)
        decision_value=distances[indexes]    
        if decision_value > thersold_value:
            name='Unknown'
        else:      
            name=ref_embeddings_names[indexes]

        #draw the frame
        x, y = boxes[i][0], boxes[i][1]
        w, h = boxes[i][2], boxes[i][3]
        cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2,)          
        cv2.putText(frame, name, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,0.5, (255,0,0), 2)

    #measure the performance
    cv2.putText(frame, str(int(1/(time.time()-start_time))), (0, 25), cv2.FONT_HERSHEY_SIMPLEX,1, (255,0,0), 2)
    start_time=time.time()
    print(distances)
    
    # Display the resulting frame
    cv2.imshow('frame', frame)

    # the 'q' button is set as the
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# After the loop release the cap object
vid.release()
# Destroy all the windows
cv2.destroyAllWindows()

[1.0385243892669678, 1.1049963235855103, 1.165015697479248, 1.2523627281188965, 1.2557990550994873, 1.076358675956726]
[1.0702455043792725, 1.2043282985687256, 1.2057147026062012, 1.2863237857818604, 1.3319413661956787, 1.155860424041748]
[0.9999999239258534, 1.0000000828898676, 0.9999999293831231, 0.9999999534350931, 1.0000000106383855, 1.0000000057865295]
[0.9999999239258534, 1.0000000828898676, 0.9999999293831231, 0.9999999534350931, 1.0000000106383855, 1.0000000057865295]
[0.9999999239258534, 1.0000000828898676, 0.9999999293831231, 0.9999999534350931, 1.0000000106383855, 1.0000000057865295]
[0.9999999239258534, 1.0000000828898676, 0.9999999293831231, 0.9999999534350931, 1.0000000106383855, 1.0000000057865295]
[1.2957464456558228, 1.2237937450408936, 1.2564276456832886, 1.4910542964935303, 1.3595117330551147, 1.4024776220321655]
[1.2200437784194946, 1.2648179531097412, 1.3167457580566406, 1.4794310331344604, 1.4039798974990845, 1.3653631210327148]
[1.3362654447555542, 1.217182159423

[0.7912642359733582, 1.1501537561416626, 1.152875542640686, 1.2699356079101562, 1.2648321390151978, 1.065006136894226]
[1.43741774559021, 1.288033127784729, 1.378855586051941, 1.3681973218917847, 1.5483546257019043, 1.3248066902160645]
[1.3891850709915161, 1.3018124103546143, 1.3581839799880981, 1.0563929080963135, 1.385908603668213, 1.1954567432403564]
[1.3887537717819214, 1.308037519454956, 1.287247896194458, 0.9273898601531982, 1.2991012334823608, 1.1929991245269775]
[1.3828014135360718, 1.3407284021377563, 1.3009392023086548, 0.9555172324180603, 1.3411566019058228, 1.1848562955856323]
[1.3151028156280518, 1.3417065143585205, 1.274398922920227, 0.8710038661956787, 1.3086981773376465, 1.085534691810608]
[1.4036445617675781, 1.3714736700057983, 1.3005807399749756, 0.9068684577941895, 1.377225399017334, 1.2696764469146729]
[1.3544269800186157, 1.3331272602081299, 1.3494417667388916, 0.8326912522315979, 1.3373520374298096, 1.126704216003418]
[1.4325731992721558, 1.3909180164337158, 1.29

[0.8387383818626404, 1.095045566558838, 1.191616177558899, 1.245112657546997, 1.2394521236419678, 1.018071174621582]
[0.7854403853416443, 1.0740206241607666, 1.1852242946624756, 1.2883096933364868, 1.2846728563308716, 1.0753674507141113]
[0.7776628136634827, 1.011732816696167, 1.1460212469100952, 1.287802815437317, 1.2463467121124268, 1.0557106733322144]
[0.7563064694404602, 1.0707162618637085, 1.1752455234527588, 1.3033403158187866, 1.281935214996338, 1.1020357608795166]
[0.7378789782524109, 1.0872743129730225, 1.109643578529358, 1.2778164148330688, 1.2738018035888672, 1.0114027261734009]
[0.7317547798156738, 1.054666519165039, 1.1491520404815674, 1.3031667470932007, 1.2432923316955566, 0.9936358332633972]
[0.7707051038742065, 1.0488582849502563, 1.1326913833618164, 1.254537582397461, 1.2753863334655762, 1.0232267379760742]
[0.7712316513061523, 1.0700477361679077, 1.1543937921524048, 1.2966550588607788, 1.2373560667037964, 1.0025458335876465]
[0.7825276851654053, 1.097999930381775, 1.

In [5]:
plt.imshow(dummy2)

NameError: name 'dummy2' is not defined