In [27]:
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
import numpy as np
import pandas as pd
import os

workers = 0 if os.name == 'nt' else 4

In [28]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cpu


In [29]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

#### Define Inception Resnet V1 module

Set classify=True for pretrained classifier. For this example, we will use the model to output embeddings/CNN features. Note that for inference, it is important to set the model to `eval` mode.

See `help(InceptionResnetV1)` for more details.

In [30]:
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

#### Define a dataset and data loader

We add the `idx_to_class` attribute to the dataset to enable easy recoding of label indices to identity names later one.

In [31]:
def collate_fn(x):
    return x[0]

dataset = datasets.ImageFolder('../data/test_images')
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)

In [32]:
aligned = []
names = []
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)
    if x_aligned is not None:
        print('Face detected with probability: {:8f}'.format(prob))
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])

Face detected with probability: 0.999983
Face detected with probability: 0.999934
Face detected with probability: 0.999733
Face detected with probability: 0.999876
Face detected with probability: 0.999992


In [36]:
# open camera
import cv2
import time
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if ret == True:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # GET THE BOXES
        # fps
        start = time.time() 
        boxes, probs = mtcnn.detect(frame)
        end = time.time()
        fps = int(1/(end-start))
        print("FPS: ", fps)
        # DRAW THE BOXES
        try:
            for box in boxes:
                box = [int(b) for b in box]
                cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0,255,0), 2)
                # put text fps
                cv2.putText(frame, str(fps), (box[0], box[1]), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
                # crop the frame
                crop = frame[box[1]:box[3], box[0]:box[2]]
        except:
            pass

        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        crop = cv2.cvtColor(crop, cv2.COLOR_RGB2BGR)
        cv2.imshow('frame', frame)
        cv2.imshow('crop', crop)
        cv2.waitKey(0)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    else:
        break
        
cap.release()
cv2.destroyAllWindows()


FPS:  17.0
FPS:  14.0
FPS:  15.0
FPS:  17.0
FPS:  17.0
FPS:  17.0
FPS:  16.0
FPS:  17.0
FPS:  19.0
