## Improve using facenet

In [None]:
!pip install facenet-pytorch

In [1]:
from facenet_pytorch import MTCNN, InceptionResnetV1
from sklearn.neighbors import NearestNeighbors
import numpy as np
import tensorflow_datasets as tfds
import torch

## Get lfw dataset

In [3]:
ds = tfds.load('lfw', split='train')

[1mDownloading and preparing dataset lfw/0.1.0 (download: 172.20 MiB, generated: Unknown size, total: 172.20 MiB) to /root/tensorflow_datasets/lfw/0.1.0...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]






0 examples [00:00, ? examples/s]

Shuffling and writing examples to /root/tensorflow_datasets/lfw/0.1.0.incompleteT5WO82/lfw-train.tfrecord


  0%|          | 0/13233 [00:00<?, ? examples/s]

[1mDataset lfw downloaded and prepared to /root/tensorflow_datasets/lfw/0.1.0. Subsequent calls will reuse this data.[0m


## Use facenet to generate vectors

In [4]:
if torch.cuda.is_available():
  device = 'cuda'
else:
  device = 'cpu'

mtcnn = MTCNN(device=device)
model = InceptionResnetV1(pretrained='vggface2', device=device).eval()

with open('vectors.csv', 'ab') as f:
  for example in ds:
    aligned = mtcnn(example['image'].numpy()).to(device)
    vectors = model(aligned[None, :]).detach().cpu()
    np.savetxt(f, vectors, delimiter=',')

    torch.cuda.empty_cache()

  0%|          | 0.00/107M [00:00<?, ?B/s]

In [6]:
vectors = np.loadtxt('vectors.csv', float, delimiter=',')

## KNN search

In [7]:
knn = NearestNeighbors(n_jobs=-1)
knn.fit(vectors)
neigh = knn.kneighbors(vectors, 5, return_distance=False)

In [14]:
# get labels from lfw dataset
labels = []

for example in ds:
  labels.append(example['label'].numpy())

In [15]:
# 5749 people in total
faces_per_person = {}

for label in labels:
  if label not in faces_per_person:
    faces_per_person[label] = 1
  else:
    faces_per_person[label] += 1

acc = 0
for n in neigh:
  faces_cur_person = faces_per_person[labels[n[0]]]

  # we don't want person with only 1 image during evaluation
  # but we can use them as noise so skip them
  if faces_cur_person == 1:
    continue

  # we only calculate 5 knn neighbours, so 5 as maximum
  if faces_cur_person > 5:
    faces_cur_person = 5
  
  cnt = 0
  for i in range(5):
    if labels[n[i]] == labels[n[0]]:
      cnt += 1

  acc += (cnt-1)/(faces_cur_person-1)

# 9164 images of 1680 people with 2 or more images
print(acc/9164)

0.9206496435326649
