In [None]:
from model import create_model
nn4_small2 = create_model()

In [None]:
nn4_small2.summary()

In [None]:
nn4_small2_pretrained = create_model()
nn4_small2_pretrained.load_weights('weights/nn4.small2.v1.h5')

In [None]:
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches

from align import AlignDlib

%matplotlib inline

def load_image(path):
    img = cv2.imread(path, 1)
    # OpenCV loads images with color channels
    # in BGR order. So we need to reverse them
    return img[...,::-1]

alignment = AlignDlib('models/landmarks.dat')

In [None]:
import numpy as np
import os.path

class IdentityMetadata():
    def __init__(self, base, name, file):
        # dataset base directory
        self.base = base
        # identity name
        self.name = name
        # image file name
        self.file = file

    def __repr__(self):
        return self.image_path()

    def image_path(self):
        if (self.name == None):
            return os.path.join(self.base, self.file)
        return os.path.join(self.base, self.name, self.file)

def load_metadata(path, is_classified):
    metadata = []
    for i in os.listdir(path):
        if is_classified:
            for f in os.listdir(os.path.join(path, i)):
                # Check file extension. Allow only jpg/jpeg' files.
                ext = os.path.splitext(f)[1]
                if ext == '.jpg' or ext == '.jpeg':
                    metadata.append(IdentityMetadata(path, i, f))
        else: 
            ext = os.path.splitext(i)[1]
            if ext == '.jpg' or ext == '.jpeg':
                metadata.append(IdentityMetadata(path, None, i))
            
    return np.array(metadata)

In [None]:
metadata = load_metadata('Batch_Images', False)

In [None]:
import random
import string

FACES_DIR = './Batch_Images/Faces/'
LABELED_FACES_DIR = './Batch_Images/Labeled_Faces/'
BATCH_LABELED_FACES_DIR = './Batch_Images/Batch_Labeled_Faces/'

def generate_random_hex(path):
    letters = string.hexdigits
    rand_name = ''.join(random.choice(letters) for i in range(10))
    
    if (rand_name + '.jpg') in os.listdir(): return generate_random_hex() 

    return rand_name
    
    
def save_image(img, path = FACES_DIR, image_name = '', extension = '.jpg'):
    try:
        os.listdir(path)
    except FileNotFoundError as e:
        os.mkdir(path)
    
    if path[-1] != '/': path = path + '/'
        
    if image_name == '':
        image_name = generate_random_hex(path)
        
    full_img_name = image_name + extension
    full_path = path + full_img_name
    
    if (not cv2.imwrite(full_path, img[...,::-1])):
        raise Exception('Image could not be written')

In [None]:
from progressbar import ProgressBar

progress = ProgressBar(len(metadata))
progress.start()

for i, m, in enumerate(metadata):
    
    original = load_image(m.image_path())
    bounding_boxes = alignment.getAllFaceBoundingBoxes(original) 
    
    if len(bounding_boxes) > 0:
                          
        for bb in bounding_boxes:
            aligned_face = alignment.align(96, original, bb, landmarkIndices=AlignDlib.OUTER_EYES_AND_NOSE)
            ## Maybe change to send to specific folder
            save_image(aligned_face)
        
    progress.update(i)
    

In [None]:
metadata_labels = load_metadata(LABELED_FACES_DIR, True)
metadata_faces = load_metadata(FACES_DIR, False)

In [None]:
from progressbar import ProgressBar

def generate_embedding(metadata):
    
    num_faces = len(metadata)
    progress = ProgressBar(num_faces)
    progress.start()

    embedded = np.zeros((num_faces, 128))

    for i, m in enumerate(metadata):
        img = load_image(m.image_path())
        img = (img / 255.).astype(np.float32)
        embedded[i] = nn4_small2_pretrained.predict(np.expand_dims(img, axis=0))[0]
        progress.update(i)
    
    return embedded

In [None]:
label_embedded = generate_embedding(metadata_labels)
faces_embedded = generate_embedding(metadata_faces)

In [None]:
embedding_mapper = {}

for embedding, metadata in zip(label_embedded, metadata_labels):
    embedding_mapper[embedding.tobytes()] = metadata    

In [None]:
import requests

NAME_URL = 'https://api.namefake.com/'

def generate_random_name():
    gen = lambda: requests.get(NAME_URL, verify = False).json()['name']
    
    rand_name = gen()
    rand_name = rand_name.replace(' ', '')

    return rand_name

In [None]:
new_name = {}

for m in embedding_mapper.values():
    if m.name not in new_name:
        rand_name = generate_random_name()
        while rand_name in new_name.values():
            rand_name = generate_random_name()
        new_name[m.name] = rand_name

In [None]:
for f_emb in faces_embedded:
    if f_emb.tobytes() in embedding_mapper:
        ## TODO verify if imgs that are not in are faces
        m = embedding_mapper[f_emb.tobytes()]
        img = load_image(m.image_path())
        end_path = BATCH_LABELED_FACES_DIR + new_name[m.name]
        save_image(img, end_path)

In [None]:
import numpy as np
metadata_faces = load_metadata(BATCH_LABELED_FACES_DIR, True)
np.random.shuffle(metadata_faces)
embedded = generate_embedding(metadata_faces)

In [None]:
def distance(emb1, emb2):
    return np.sum(np.square(emb1 - emb2))

def show_pair(idx1, idx2):
    plt.figure(figsize=(10,5))
    plt.suptitle('Distance = %.2f' % (distance(embedded[idx1], embedded[idx2])))
    plt.subplot(121)
    plt.imshow(load_image(metadata_faces[idx1].image_path()))
    plt.subplot(122)
    plt.imshow(load_image(metadata_faces[idx2].image_path()));    
    
def show_pair_name(img_name1, img_name2):
    for i, m in enumerate(metadata_faces):
        if m.file == img_name1:
            idx1 = i
        if m.file == img_name2:
            idx2 = i
    show_pair(idx1, idx2) 

# What is the best threshold for the verification problem (Distance Treshold)

In [None]:
from sklearn.metrics import f1_score, accuracy_score

distances = [] # squared L2 distance between pairs
identical = [] # 1 if same identity, 0 otherwise

num = len(metadata_faces)

for i in range(num - 1):
    for j in range(1, num):
        distances.append(distance(embedded[i], embedded[j]))
        identical.append(1 if metadata_faces[i].name == metadata_faces[j].name else 0)
        
distances = np.array(distances)
identical = np.array(identical)

thresholds = np.arange(0.3, 1.0, 0.01)

f1_scores = [f1_score(identical, distances < t) for t in thresholds]
acc_scores = [accuracy_score(identical, distances < t) for t in thresholds]

opt_idx = np.argmax(f1_scores)
# Threshold at maximal F1 score
opt_tau = thresholds[opt_idx]
# Accuracy at maximal F1 score
opt_acc = accuracy_score(identical, distances < opt_tau)

# Plot F1 score and accuracy as function of distance threshold
plt.plot(thresholds, f1_scores, label='F1 score');
plt.plot(thresholds, acc_scores, label='Accuracy');
plt.axvline(x=opt_tau, linestyle='--', lw=1, c='lightgrey', label='Threshold')
plt.title('Accuracy at threshold ' + str(opt_tau) + ' = ' + str(opt_acc))
plt.xlabel('Distance threshold')
plt.legend();

## Distance distributions of positive and negative pairs

In [None]:
dist_pos = distances[identical == 1]
dist_neg = distances[identical == 0]

plt.figure(figsize=(12,4))

plt.subplot(121)
plt.hist(dist_pos)
plt.axvline(x=opt_tau, linestyle='--', lw=1, c='lightgrey', label='Threshold')
plt.title('Distances (pos. pairs)')
plt.legend();

plt.subplot(122)
plt.hist(dist_neg)
plt.axvline(x=opt_tau, linestyle='--', lw=1, c='lightgrey', label='Threshold')
plt.title('Distances (neg. pairs)')
plt.legend();

# Face recognition - with KNN or an SVM

70% used for training  
30% for validation

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC

# randomized = [(metadata_faces[i], embedded[i]) for i in range(len(metadata_faces))]
# randomized = np.random.shuffle([(metadata_faces[i], embedded[i]) for i in range(len(metadata_faces))])
# np.random.shuffle(randomized)

targets = np.array([m.name for m in metadata_faces])

encoder = LabelEncoder()
encoder.fit(targets)

# Numerical encoding of identities
y = encoder.transform(targets)

pivot = len(metadata_faces) * 0.3
#test_idx = np.array([True if i < pivot else False for i in range(len(randomized))])
#train_idx = np.array([not v for v in test_idx])

test_idx = np.arange(metadata_faces.shape[0]) < pivot
train_idx = np.arange(metadata_faces.shape[0]) >= pivot
# embedded = np.array([e for _, e in randomized])

# 50 train examples of 10 identities (5 examples each)
X_train = embedded[train_idx]
# 50 test examples of 10 identities (5 examples each)
X_test = embedded[test_idx]

y_train = y[train_idx]
y_test = y[test_idx]

knn = KNeighborsClassifier(n_neighbors=1, metric='euclidean')
svc = LinearSVC()

knn.fit(X_train, y_train)
svc.fit(X_train, y_train)

y_pred_knn = knn.predict(X_test)
acc_knn = accuracy_score(y_test, y_pred_knn)
y_pred_svc = svc.predict(X_test)
acc_svc = accuracy_score(y_test, y_pred_svc)

f1_knn = f1_score(y_test, y_pred_knn, average='weighted')
f1_svc = f1_score(y_test, y_pred_svc, average='weighted')

print('KNN accuracy = ' + str(acc_knn) + ' , SVM accuracy = ' + str(acc_svc))
print('KNN f1 score weighted = ' + str(f1_score(y_test, y_pred_knn, average='weighted')) +
      ' , SVM f1 score weighted = ' + str(f1_score(y_test, y_pred_svc, average='weighted')))



In [None]:
import warnings
# Suppress LabelEncoder warning
warnings.filterwarnings('ignore')

def show_prediction(example_idx):
    plt.figure()
    example_image = load_image(metadata[test_idx][example_idx].image_path())
    example_prediction = knn.predict([embedded[test_idx][example_idx]])
    example_identity = encoder.inverse_transform(example_prediction)[0]

    plt.imshow(example_image)
    plt.title('Recognized as ' + str(example_identity));
    
def show_predictions(indexes):
    plt.figure(figsize=(16,16))
    
    for i, idx in enumerate(indexes[:16]):
        example_image = load_image(metadata_faces[test_idx][idx].image_path())
        example_prediction = knn.predict([embedded[test_idx][idx]])
        example_identity = encoder.inverse_transform(example_prediction)[0]

        plt.subplot(4,4,i+1)
        plt.imshow(example_image)
        plt.title('A:' + str(example_identity) + ' R:' + metadata_faces[test_idx][idx].name)        

In [None]:
show_predictions(range(10,26))

## Missclassified images

In [None]:
error_pairs = []

for i, item in enumerate(y_pred_knn):
    if item != y_test[i]:
        error_pairs.append(i)
        
print(error_pairs)

show_predictions(error_pairs)


# Dataset visualization

In [None]:
from sklearn.manifold import TSNE

X_embedded = TSNE(n_components=2).fit_transform(embedded)

plt.figure(figsize=(10,10))

for i, t in enumerate(set(targets)):
    idx = targets == t
    plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=t)   

plt.legend(bbox_to_anchor=(1, 1));