In [1]:
# importing libraries

from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torchvision import datasets
from torch.utils.data import DataLoader
from PIL import Image
import cv2
import time
import os

In [2]:
# initializing MTCNN and InceptionResnetV1 
#O(1)
mtcnn0 = MTCNN(image_size=240, margin=0, keep_all=False, min_face_size=40) # keep_all=False
mtcnn = MTCNN(image_size=240, margin=0, keep_all=True, min_face_size=40) # keep_all=True
resnet = InceptionResnetV1(pretrained='vggface2').eval() 

In [3]:
def check_path(path):            #function to confirm whether the given path exists or not
    dir = os.path.dirname(path)  #if it doesn't exist this function will create
    if not os.path.exists(dir):
        os.makedirs(dir)
# O(1)
vid_cam = cv2.VideoCapture(0)  #Start video capturing
face_cascade = cv2.CascadeClassifier('D:\dip_cod\dip_cod\Lib\site-packages\cv2\data\haarcascade_frontalface_default.xml') # Detect object in video stream using Haarcascade Frontal Face
face_id = 1  # For each person,there will be one face id
count = 0    # Initialize sample face image
check_path("dataset/")
start_time = time.time()
# O(nm) n-number of frames m-number of faces
while(True):
    _,image_frame = vid_cam.read()       # Capture video frame _, is used to ignored first value because vid_cam.read() is returning 2 values
    gray = cv2.cvtColor(image_frame, cv2.COLOR_BGR2GRAY) # Convert frame to grayscale
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)# Detect faces using Cascade Classifier(xml file)
    for (x,y,w,h) in faces:
        cv2.rectangle(image_frame, (x,y), (x+w,y+h), (255,0,0), 2) # Crop the image frame into rectangle
        count += 1               # Increment face image
        cv2.imwrite("dataset/Dima/User." + str(face_id) + '.' + str(count) + ".jpg", gray[y:y+h,x:x+w]) # Save the captured image into the datasets folder
        cv2.imshow('Creating Dataset!!!', image_frame)  # Display the video frame, with rectangular box on the person's face
    if count>1000:                                     # If image taken reach 100, stop taking video
        break
print(time.time() - start_time)
#O(1)
vid_cam.release()                                       # Stop video
cv2.destroyAllWindows()                                 # Close all windows


52.350488901138306


In [4]:
# Read data from folder

dataset = datasets.ImageFolder('dataset') # photos folder path 
idx_to_class = {i:c for c,i in dataset.class_to_idx.items()} # accessing names of peoples from folder names


def collate_fn(x):
    return x[0]

loader = DataLoader(dataset, collate_fn=collate_fn)

name_list = [] # list of names corrospoing to cropped photos
embedding_list = [] # list of embeding matrix after conversion from cropped faces to embedding matrix using resnet


#O(n) n number of pictures
start_time = time.time()
for img, idx in loader:
    face, prob = mtcnn0(img, return_prob=True) 
    if face is not None and prob>0.92:
        emb = resnet(face.unsqueeze(0)) 
        embedding_list.append(emb.detach()) 
        name_list.append(idx_to_class[idx])     
        

# save data
print(time.time() - start_time)
data = [embedding_list, name_list] 
torch.save(data, 'data.pt') # saving data.pt file

75.53178143501282


In [5]:
# Using webcam recognize face

# loading data.pt file
load_data = torch.load('data.pt') 
embedding_list = load_data[0] 
name_list = load_data[1] 
ll = []
count_TRUE = 0
count_TOTAL = 0

cam = cv2.VideoCapture(0) 
start_time = time.time()
# O(frame*m*n*A/freq) A - unknown complexity(MTCNN, MTCNN.detect, resnet)
while True:
    ret, frame = cam.read()
    if not ret:
        print("fail to grab frame, try again")
        break
    if count_TOTAL > 500:
        #if  count_TRUE > 99:
        #    break
        #else:
        #    count_TOTAL = 0
        #    count_TRUE = 0
        #    start_time = time.time()
        break
    
    count_TOTAL += 1
    img = Image.fromarray(frame)
    img_cropped_list, prob_list = mtcnn(img, return_prob=True) 
    
    if img_cropped_list is not None:
        boxes, _ = mtcnn.detect(img)
                
        for i, prob in enumerate(prob_list):
            if prob>0.90:
                emb = resnet(img_cropped_list[i].unsqueeze(0)).detach() 
                
                dist_list = [] # list of matched distances, minimum distance is used to identify the person
                
                for idx, emb_db in enumerate(embedding_list):
                    dist = torch.dist(emb, emb_db).item()
                    dist_list.append(dist)

                min_dist = min(dist_list) # get minumum dist value
                ll.append(min_dist)
                min_dist_idx = dist_list.index(min_dist) # get minumum dist index
                name = name_list[min_dist_idx] # get name corrosponding to minimum dist
                
                box = boxes[i] 
                box = [int(coord) for coord in box]

                original_frame = frame.copy() # storing copy of frame before drawing on it
                
                if min_dist < 0.90:
                    text = f'{name} {min_dist:.2f}'
                    cv2.putText(frame, text, (box[0], box[1]), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 1,
                                cv2.LINE_AA)
                    count_TRUE += 1

                frame = cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 2)
                

    cv2.imshow("IMG", frame)
        
    
    k = cv2.waitKey(1)
    if k%256==27: # ESC
        print('Esc pressed, closing...')
        break
print(time.time() - start_time)
print(f'True/Total {count_TRUE/count_TOTAL}')
f = open("torch_result.txt", "w")#a
f.write(str(ll))
f.close()        
        
cam.release()
cv2.destroyAllWindows()
    

63.15678524971008
True/Total 0.8882235528942116
