## Using Pre-trained ResNet50 Network

In [None]:
from sklearn.datasets import fetch_lfw_people
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
import numpy as np

lfw = fetch_lfw_people(color=True, resize=1)
images = preprocess_input(lfw['images'])
labels = lfw['target']

model = ResNet50(weights='imagenet', include_top=False, input_shape=(125, 94, 3), pooling='avg')
pred = model.predict(images)

## Using KNN for Vector Search

In [None]:
from sklearn.neighbors import NearestNeighbors

knn = NearestNeighbors(n_jobs=-1)
knn.fit(pred)
neigh = knn.kneighbors(pred, 5, return_distance=False)

## Evaluate Search Results

In [None]:
# 5749 people in total
faces_per_person = np.zeros(5749, dtype=int)

for label in labels:
  faces_per_person[label] += 1

acc = 0
for n in neigh:
  faces_cur_person = faces_per_person[labels[n[0]]]

  # we don't want person with only 1 image during evaluation
  # but we can use them as noise so skip them
  if faces_cur_person == 1:
    continue

  # we only calculate 5 knn neighbours, so 5 as maximum
  if faces_cur_person > 5:
    faces_cur_person = 5
  
  cnt = 0
  for i in range(5):
    if labels[n[i]] == labels[n[0]]:
      cnt += 1

  acc += (cnt-1)/(faces_cur_person-1)

# 9164 images of 1680 people with 2 or more images
print(acc/9164)

0.10859522770260449
