# Face Recognition using KNN
In this notebook, we aim to apply KNN based on cosine similarity distances from InceptionV3 and distance metrics from DeepFace implementations as the final culmination of the semester project.

In [34]:
# imports
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os
import cv2
from collections import Counter

import keras
import tensorflow as tf

In [15]:
from deepface import DeepFace
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# file paths
path = os.getcwd()
path = path[:-4]
data_dir = path + "/data/"

### I. Load InceptionV3 Feature Embeddings from "/data/dataset_features.csv"

In [4]:
dataset = pd.read_csv(data_dir + "dataset_features.csv")

In [5]:
dataset

Unnamed: 0.1,Unnamed: 0,image,person,split,v3_features
0,0,[[[ 0 1 0]\n [ 0 1 0]\n [ 0 1 0]\n .....,Ann_Veneman,train,[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0...
1,1,[[[0 1 1]\n [0 1 1]\n [0 1 1]\n ...\n [1 2...,Ann_Veneman,train,[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0...
2,2,[[[0 0 0]\n [0 0 0]\n [1 0 0]\n ...\n [0 1...,Ann_Veneman,train,[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0...
3,3,[[[0 0 0]\n [0 0 0]\n [0 0 0]\n ...\n [0 0...,Ann_Veneman,train,[0.00000000e+00 0.00000000e+00 0.00000000e+00 ...
4,4,[[[129 163 176]\n [129 163 176]\n [129 163 1...,Ann_Veneman,test,[0.00000000e+00 0.00000000e+00 0.00000000e+00 ...
...,...,...,...,...,...
5090,5090,[[[0 0 0]\n [0 0 0]\n [0 0 0]\n ...\n [0 0...,Zhu_Rongji,train,[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0...
5091,5091,[[[0 0 0]\n [0 0 0]\n [0 0 0]\n ...\n [0 0...,Zhu_Rongji,train,[0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0...
5092,5092,[[[0 0 0]\n [0 0 0]\n [0 0 0]\n ...\n [0 0...,Zhu_Rongji,test,[0.00000000e+00 0.00000000e+00 0.00000000e+00 ...
5093,5093,[[[0 0 0]\n [0 0 0]\n [0 0 0]\n ...\n [0 0...,Zhu_Rongji,train,[0.00000000e+00 0.00000000e+00 0.00000000e+00 ...


Let's fix the image column by rereplacing the values to image number and extract using getImg()...

In [6]:
def getImg(name, num):
    imgPath = data_dir + "lfw-deepfunneled/" + name + '/' + name + '_' +  "{:04d}".format(num) + '.jpg'
    img = cv2.imread(imgPath)
    #img = cv2.resize(img, (299, 299))
    return img

In [7]:
p_ref = ""
i_ref = 1
image_col = []
for _ , entry in dataset.iterrows():
    if entry.person != p_ref:
        p_ref = entry.person
        i_ref = 1
    img = getImg(p_ref, i_ref)
    image_col.append(img)
    i_ref += 1
dataset["image"] = image_col

In [8]:
def fix_feature_column(X, col):
    fixed_col = []
    for c in X[col]:
        f = [float(x) for x in c[1:-1].replace('\n', '').split()]
        fixed_col.append(f)
    return fixed_col

In [9]:
dataset["v3_features"] = fix_feature_column(dataset, "v3_features")

In [10]:
dataset = dataset.drop("Unnamed: 0", axis=1)

In [11]:
dataset

Unnamed: 0,image,person,split,v3_features
0,"[[[0, 1, 0], [0, 1, 0], [0, 2, 0], [0, 1, 0], ...",Ann_Veneman,train,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,"[[[0, 1, 1], [0, 1, 1], [0, 1, 1], [0, 1, 1], ...",Ann_Veneman,train,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,"[[[0, 0, 0], [0, 0, 0], [2, 0, 0], [2, 0, 0], ...",Ann_Veneman,train,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",Ann_Veneman,train,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,"[[[129, 163, 176], [129, 163, 176], [129, 163,...",Ann_Veneman,test,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
...,...,...,...,...
5090,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",Zhu_Rongji,train,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.462..."
5091,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",Zhu_Rongji,train,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
5092,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",Zhu_Rongji,test,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
5093,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",Zhu_Rongji,train,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.076..."


In [12]:
# split train, test
dataset_train = dataset[dataset.split == "train"].drop("split", axis=1)
dataset_test = dataset[dataset.split == "test"].drop("split", axis=1)

In [13]:
dataset_test

Unnamed: 0,image,person,v3_features
4,"[[[129, 163, 176], [129, 163, 176], [129, 163,...",Ann_Veneman,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
9,"[[[0, 1, 0], [0, 1, 0], [2, 0, 0], [2, 0, 0], ...",Ann_Veneman,"[0.0, 3.26216136e-23, 0.0, 0.0, 2.97151225e-38..."
11,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",Bill_Frist,"[0.0, 4.66922686e-35, 0.0, 0.0, 0.0, 1.0250380..."
29,"[[[4, 0, 0], [4, 0, 0], [4, 0, 0], [4, 0, 0], ...",Edmund_Stoiber,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
30,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",Edmund_Stoiber,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
...,...,...,...
5064,"[[[0, 3, 2], [0, 1, 1], [1, 0, 5], [12, 0, 7],...",Pierce_Brosnan,"[0.0, 8.59265492e-36, 0.0, 0.0, 0.0, 1.0918716..."
5065,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",Pierce_Brosnan,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
5072,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",Ralf_Schumacher,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
5081,"[[[4, 0, 0], [6, 7, 11], [0, 0, 11], [0, 0, 17...",Robert_Blake,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.469..."


### II. Get Predictions

In [14]:
k = 5
csim_thr = 0.4

In [52]:
def deep_dist(model, img1, img2):
    try:
        result = DeepFace.verify(img1, img2, model_name=model)
    except ValueError:
        result = "None"
    return result

In [45]:
models = ["Facenet", "Facenet512", "OpenFace", "DeepID", "SFace", "GhostFaceNet"]

In [54]:
preds = []
for _ , entry_test in dataset_test.iterrows():
    tr = dataset_train.copy(deep=True).drop('v3_features', axis=1)
    dists = []

    for __ , entry_train in tr.iterrows():
        dist = deep_dist(models[0], entry_test.image, entry_train.image)
        if dist != "None":
            dist = dist["distance"]
        dists.append(dist)

    tr["dists"] = dists
    tr = tr[tr.dists != "None"]
    top_k = tr.sort_values(by=['dists']).head(k).person.values
    pred = Counter(top_k).most_common(1)[0][0]
    print(pred)
    break
    preds.append(pred)
#dataset_test["preds"] = csim_pred

KeyboardInterrupt: 

In [None]:
dataset_test

In [43]:
csim_pred

['George_W_Bush',
 'George_W_Bush',
 'Pierce_Brosnan',
 'Pierce_Brosnan',
 'George_W_Bush',
 'Pete_Sampras',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'Yao_Ming',
 'Tony_Blair',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'Pierce_Brosnan',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'Pierce_Brosnan',
 'George_W_Bush',
 'George_W_Bush',
 'Andre_Agassi',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'Yao_Ming',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'George_W_Bush',
 'G

### III. Get Results

Scrapping... KNN takes too long and too much processing.