In [10]:

# importing libraries

from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torchvision import datasets
from torch.utils.data import DataLoader
from PIL import Image
import cv2
import time
import os
import numpy as np

In [2]:
# initializing MTCNN and InceptionResnetV1 
# We use mtcnn0 to detect faces in our data, 
mtcnn0 = MTCNN(image_size=240, margin=0, keep_all=False, min_face_size=40) # keep_all=False
# We use mtcnn to detect multiple faces in our cam
mtcnn = MTCNN(image_size=240, margin=0, keep_all=True, min_face_size=40) # keep_all=True
# This return a pretrained model that is vggface2
resnet = InceptionResnetV1(pretrained='vggface2').eval()

In [3]:
# Read data from folder

dataset = datasets.ImageFolder('data2') # photos folder path 
idx_to_class = {i:c for c,i in dataset.class_to_idx.items()} # accessing names of peoples from folder names

def collate_fn(x):
    return x[0]

loader = DataLoader(dataset, collate_fn=collate_fn)

name_list = [] # list of names corrospoing to cropped photos
embedding_list = [] # list of embeding matrix after conversion from cropped faces to embedding matrix using resnet

for img, idx in loader:
    print(img.size)
    #Unlike other implementations, calling a facenet-pytorch MTCNN object directly with an image (i.e., using the forward method for those familiar with pytorch) will return torch tensors containing the detected face(s), rather than just the bounding boxes. This is to enable using the module easily as the first stage of a facial recognition pipeline, in which the faces are passed directly to an additional network or algorithm.
    face, prob = mtcnn0(img, return_prob=True)
    print("face: ",face," prob:",prob)
    if face is not None and prob>0.92:
        emb = resnet(face.unsqueeze(0)) 
        embedding_list.append(emb.detach()) 
        name_list.append(idx_to_class[idx])        

# save data
data = [embedding_list, name_list] 
torch.save(data, 'data.pt') # saving data.pt file

.6758,  ..., -0.6445, -0.6523, -0.6602],
         [-0.6680, -0.6758, -0.6758,  ..., -0.6602, -0.6602, -0.6680],
         [-0.6602, -0.6602, -0.6602,  ..., -0.6680, -0.6680, -0.6758],
         ...,
         [ 0.6523,  0.6523,  0.6523,  ..., -0.9258, -0.9258, -0.9180],
         [ 0.6523,  0.6523,  0.6523,  ..., -0.9336, -0.9258, -0.9258],
         [ 0.6523,  0.6523,  0.6523,  ..., -0.9180, -0.9102, -0.9180]],

        [[-0.5508, -0.5586, -0.5664,  ..., -0.5508, -0.5586, -0.5664],
         [-0.5586, -0.5664, -0.5664,  ..., -0.5664, -0.5664, -0.5742],
         [-0.5508, -0.5508, -0.5508,  ..., -0.5742, -0.5742, -0.5820],
         ...,
         [ 0.6367,  0.6367,  0.6367,  ..., -0.8477, -0.8320, -0.8164],
         [ 0.6367,  0.6367,  0.6367,  ..., -0.8242, -0.8008, -0.7930],
         [ 0.6367,  0.6367,  0.6367,  ..., -0.7930, -0.7930, -0.7930]]])  prob: 0.9999368
(1200, 1600)
face:  tensor([[[-0.3555, -0.3633, -0.4258,  ..., -0.6211, -0.5586, -0.5195],
         [-0.3164, -0.3477, -0.4258,  

In [19]:
# Using webcam recognize face

# loading data.pt file
load_data = torch.load('data.pt') 
embedding_list = load_data[0] 
name_list = load_data[1] 

cam = cv2.VideoCapture("./personas.mp4") 

while not cam.isOpened():
    cam = cv2.VideoCapture("./personas.mp4")
    cv2.waitKey(1000)
    print ("Wait for the header")

post_frame = cam.get(cv2.CAP_PROP_POS_FRAMES)

while True:
    ret, frame = cam.read()
    if not ret:
        print("fail to grab frame, try again")
        # The next frame is not ready, so we try to read it again
        cap.set(cv2.CV_CAP_PROP_POS_FRAMES, pos_frame-1)
        print("frame is not ready")
        # It is better to wait for a while for the next frame to be ready
        cv2.waitKey(1000)
        break
        
    img = Image.fromarray(frame)
    img_cropped_list, prob_list = mtcnn(img, return_prob=True) 
    
    if img_cropped_list is not None:
        #return boxed faces
        boxes, _ = mtcnn.detect(img)
                
        for i, prob in enumerate(prob_list):
            if prob>0.90:
                emb = resnet(img_cropped_list[i].unsqueeze(0)).detach() 
                
                dist_list = [] # list of matched distances, minimum distance is used to identify the person
                
                for idx, emb_db in enumerate(embedding_list):
                    dist = torch.dist(emb, emb_db).item()
                    dist_list.append(dist)

                min_dist = min(dist_list) # get minumum dist value
                min_dist_idx = dist_list.index(min_dist) # get minumum dist index
                name = name_list[min_dist_idx] # get name corrosponding to minimum dist
                
                box = boxes[i]
                
                original_frame = frame.copy() # storing copy of frame before drawing on it
                
                if min_dist<0.90:
                    #bgr
                    frame = cv2.putText(frame, str(name)+' '+str(min_dist), (box[0],box[1]), cv2.FONT_HERSHEY_SIMPLEX, 1, (63, 0, 252),1, cv2.LINE_AA)
                    #frame = cv2.putText(frame, 'Hola ' +name, (box[0],box[1]), cv2.FONT_HERSHEY_SIMPLEX, 1, (63, 0, 252),1, cv2.LINE_AA)

                frame = cv2.rectangle(frame, (box[0].astype(int),box[1].astype(int)) , (box[2].astype(int),box[3].astype(int)), (13,214,53), 2)

    cv2.imshow("IMG", frame)

    if cv2.waitKey(10) == 27:
        break
    #if cam.get(cv2.CAP_PROP_POS_FRAMEScv2.CV_CAP_PRcv2.CAP_PROP_POS_FRAMESv2.CV_CAP_PROP_FRAME_COUNT):
        # If the number of captured frames is equal to the total number of frames,
#        # we stop
        break
        
    
    k = cv2.waitKey(1)
    if k%256==27: # ESC
        print('Esc pressed, closing...')
        break
        
    elif k%256==32: # space to save image
        print('Enter your name :')
        name = input()
        
        # create directory if not exists
        if not os.path.exists('data2/'+name):
            os.mkdir('data2/'+name)
            
        img_name = "data2/{}/{}.jpg".format(name, int(time.time()))
        cv2.imwrite(img_name, original_frame)
        print(" saved: {}".format(img_name))
        
        
cam.release()
cv2.destroyAllWindows()

error: OpenCV(4.5.2) :-1: error: (-5:Bad argument) in function 'putText'
> Overload resolution failed:
>  - Can't parse 'org'. Sequence item with index 0 has a wrong type
>  - Can't parse 'org'. Sequence item with index 0 has a wrong type


In [18]:
type(box[1].astype(int))

numpy.int32