In [1]:
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
import pandas as pd
import os
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))
workers = 0 if os.name == 'nt' else 4

Running on device: cpu


In [2]:
def calculate_dist(embeddings):
    dists = [[(e1 - e2).norm().item() for e2 in embeddings] for e1 in embeddings]
    print(pd.DataFrame(dists, columns=names, index=names))

def im_test(im_path,embedder):
    img = Image.open(im_path)

    img_cropped = mtcnn(img)
    aligned = torch.stack([img_cropped]).to(device)

    img_embedding = embedder(aligned)
    for embedding, name in zip(embeddings,names):
        print((embedding-img_embedding[0]).norm().item(),name)

In [3]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)


In [4]:
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

In [5]:
from PIL import Image
def collate_fn(x):
    return x[0]

dataset = datasets.ImageFolder('./images')
print({i:c for c, i in dataset.class_to_idx.items()})
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=workers)

{0: 'Dwight', 1: 'Michael'}


In [6]:
aligned = []
names = []
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)
    if x_aligned is not None:
        print('Face detected with probability: {:8f}'.format(prob))
        aligned.append(x_aligned)

        names.append(dataset.idx_to_class[y])

aligned = torch.stack(aligned).to(device)
embeddings = resnet(aligned)

Face detected with probability: 0.999847
Face detected with probability: 0.999954
Face detected with probability: 0.999962
Face detected with probability: 0.999998


In [7]:
calculate_dist(embeddings)

           Dwight    Dwight   Michael   Michael
Dwight   0.000000  0.496898  1.555670  1.505317
Dwight   0.496898  0.000000  1.494655  1.475376
Michael  1.555670  1.494655  0.000000  0.511582
Michael  1.505317  1.475376  0.511582  0.000000


In [8]:
im_test("images/Michael/Michael2.jpg",resnet)


1.5053170919418335 Dwight
1.4753758907318115 Dwight
0.5115817785263062 Michael
2.8422968512131774e-07 Michael


In [9]:
import torch
from imageio import imread
from torchvision import transforms

import insightface

In [10]:
mtcnn = MTCNN(
    image_size=112, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)
embedder = insightface.iresnet100(pretrained=True)
embedder.eval()

mean = [0.5] * 3
std = [0.5 * 256 / 255] * 3
preprocess = transforms.Compose([
 
    transforms.Normalize(mean, std)
])

aligned = []
names = []
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)
    
#     x_aligned = preprocess(x_aligned)
    if x_aligned is not None:
        print('Face detected with probability: {:8f}'.format(prob))
        aligned.append(x_aligned)

        names.append(dataset.idx_to_class[y])

aligned = torch.stack(aligned).to(device)

embeddings = embedder(aligned)


Face detected with probability: 0.999847
Face detected with probability: 0.999954
Face detected with probability: 0.999962
Face detected with probability: 0.999998


In [11]:
calculate_dist(embeddings)

            Dwight     Dwight    Michael    Michael
Dwight    0.000000  16.780432  33.451622  33.817474
Dwight   16.780432   0.000000  31.507275  31.206282
Michael  33.451622  31.507275   0.000000  20.835302
Michael  33.817474  31.206282  20.835302   0.000000


In [12]:
im_test("images/Michael/Michael2.jpg",embedder)

33.81747817993164 Dwight
31.206281661987305 Dwight
20.83530044555664 Michael
6.191942247824045e-06 Michael
