### This jupyter notebook is to recognize faces on live camera

In [1]:
# importing libraries

from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torchvision import datasets
from torch.utils.data import DataLoader
from PIL import Image
import cv2
import time
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# initializing MTCNN and InceptionResnetV1 

mtcnn0 = MTCNN(image_size=240, margin=0, keep_all=False, min_face_size=40) # keep_all=False
mtcnn = MTCNN(image_size=240, margin=0, keep_all=True, min_face_size=40) # keep_all=True
resnet = InceptionResnetV1(pretrained='vggface2').eval() 

In [3]:
# Read data from folder

dataset = datasets.ImageFolder('photos') # photos folder path 
idx_to_class = {i:c for c,i in dataset.class_to_idx.items()} # accessing names of peoples from folder names

def collate_fn(x):
    return x[0]

loader = DataLoader(dataset, collate_fn=collate_fn)

name_list = [] # list of names corresponding to cropped photos
embedding_list = [] # list of embeding matrix after conversion from cropped faces to embedding matrix using resnet

for img, idx in loader:
    print(idx)
    face, prob = mtcnn0(img, return_prob=True) 
    if face is not None and prob>0.92:
        emb = resnet(face.unsqueeze(0)) 
        embedding_list.append(emb.detach()) 
        name_list.append(idx_to_class[idx])        

# save data
data = [embedding_list, name_list] 
torch.save(data, 'data.pt') # saving data.pt file

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1


In [4]:
print(data)

[[tensor([[-3.7118e-02,  6.4068e-04, -4.1864e-02,  9.3099e-03, -2.5859e-02,
          2.8985e-02,  1.6316e-02,  7.4955e-03, -4.6818e-02, -5.2490e-02,
          4.4620e-02,  2.4296e-02,  7.1447e-03, -6.7977e-02,  2.7779e-02,
         -5.7301e-03,  3.3642e-03,  8.8947e-03,  6.5585e-02, -7.3146e-02,
         -9.1732e-02, -2.2472e-02,  6.2999e-02,  9.9672e-03,  2.8147e-02,
         -2.0385e-02,  2.5368e-02,  1.3075e-03, -1.6299e-02,  6.0465e-02,
         -4.2731e-02, -6.3864e-02, -3.4208e-02,  1.2286e-02, -2.4901e-02,
          5.7637e-02,  4.3302e-02,  5.9796e-02,  1.2610e-02,  1.5443e-02,
          6.3564e-03,  6.2554e-02,  6.1343e-02, -6.6706e-03,  5.6121e-02,
          5.1941e-02, -1.4201e-02, -3.9378e-02,  3.3473e-02, -5.6704e-02,
         -5.1326e-02,  9.4176e-02, -8.4700e-02, -3.5991e-02, -5.3182e-02,
          5.5047e-02,  2.2908e-02,  3.7715e-02,  4.8122e-03,  4.0224e-02,
         -3.9982e-02, -1.1158e-02,  1.5151e-02, -3.2337e-02,  6.7025e-02,
          5.5866e-02, -4.5879e-02, -

In [5]:
# Using webcam recognize face

# loading data.pt file
load_data = torch.load('data.pt') 
embedding_list = load_data[0] 
name_list = load_data[1] 

cam = cv2.VideoCapture(0) 

while True:
    ret, frame = cam.read()
    if not ret:
        print("fail to grab frame, try again")
        break
        
    img = Image.fromarray(frame)
    img_cropped_list, prob_list = mtcnn(img, return_prob=True) 
    
    if img_cropped_list is not None:
        boxes, _ = mtcnn.detect(img)
        # print(boxes)
        for i, prob in enumerate(prob_list):
            if prob>0.90:
                emb = resnet(img_cropped_list[i].unsqueeze(0)).detach() 
                
                dist_list = [] # list of matched distances, minimum distance is used to identify the person
                
                for idx, emb_db in enumerate(embedding_list):
                    dist = torch.dist(emb, emb_db).item()
                    dist_list.append(dist)

                min_dist = min(dist_list) # get minumum dist value
                min_dist_idx = dist_list.index(min_dist) # get minumum dist index
                name = name_list[min_dist_idx] # get name corrosponding to minimum dist
                
                box = boxes[i] 
                
                original_frame = frame.copy() # storing copy of frame before drawing on it
                
                if min_dist<0.90:
                    frame = cv2.putText(frame, name+' '+str(min_dist), (int(box[0]),int(box[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0),1, cv2.LINE_AA)
                    
                
                frame = cv2.rectangle(frame, (int(box[0]),int(box[1])) , (int(box[2]),int(box[3])), (255,0,0), 2)
                
    cv2.imshow("IMG", frame)
        
    
    k = cv2.waitKey(1)
    if k%256==27: # ESC
        print('Esc pressed, closing...')
        break

cam.release()
cv2.destroyAllWindows()
    

Esc pressed, closing...
