# General Process for testing FaceNet 

# Face detection

For installing MTCNN face detector:
	
-sudo pip install mtcnn (Linux)

-pip install mtcnn (windows)

In [2]:
# confirm mtcnn was installed correctly
import mtcnn
# print version
print(mtcnn.__version__)

0.1.0


In [None]:
# face detection for the 5 Celebrity Faces Dataset
from os import listdir
from os.path import isdir
from PIL import Image
from matplotlib import pyplot
from numpy import savez_compressed
from numpy import asarray
from mtcnn.mtcnn import MTCNN
 
# extract a single face from a given photograph
def extract_face(filename, required_size=(160, 160)):
	# load image from file
	image = Image.open(filename)
	# convert to RGB, if needed
	image = image.convert('RGB')
	# convert to array
	pixels = asarray(image)
	# create the detector, using default weights
	detector = MTCNN()
	# detect faces in the image
	results = detector.detect_faces(pixels)
	# extract the bounding box from the first face
	x1, y1, width, height = results[0]['box']
	# bug fix
	x1, y1 = abs(x1), abs(y1)
	x2, y2 = x1 + width, y1 + height
	# extract the face
	face = pixels[y1:y2, x1:x2]
	# resize pixels to the model size
	image = Image.fromarray(face)
	image = image.resize(required_size)
	face_array = asarray(image)
	return face_array
 
# load images and extract faces for all images in a directory
def load_faces(directory):
	faces = list()
	# enumerate files
	for filename in listdir(directory):
		# path
		path = directory + filename
		# get face
		face = extract_face(path)
		# store
		faces.append(face)
	return faces
 
# load a dataset that contains one subdir for each class that in turn contains images
def load_dataset(directory):
	X, y = list(), list()
	# enumerate folders, on per class
	for subdir in listdir(directory):
		# path
		path = directory + subdir + '/'
		# skip any files that might be in the dir
		if not isdir(path):
			continue
		# load all faces in the subdirectory
		faces = load_faces(path)
		# create labels
		labels = [subdir for _ in range(len(faces))]
		# summarize progress
		print('>loaded %d examples for class: %s' % (len(faces), subdir))
		# store
		X.extend(faces)
		y.extend(labels)
	return asarray(X), asarray(y)
 
# load train dataset
trainX, trainy = load_dataset('5-celebrity-faces-dataset/train/')
print(trainX.shape, trainy.shape)
# load test dataset
testX, testy = load_dataset('5-celebrity-faces-dataset/val/')
# save arrays to one file in compressed format
savez_compressed('5-celebrity-faces-dataset.npz', trainX, trainy, testX, testy)

The above process has to be repeated for all datasets

# Create Face Embeddings

Download FaceNet implementation from: https://drive.google.com/open?id=1pwQ3H4aJ8a6yyJHZkTwtjcL4wYWQb7bn

In [None]:
# calculate a face embedding for each face in the dataset using facenet
from numpy import load
from numpy import expand_dims
from numpy import asarray
from numpy import savez_compressed
from keras.models import load_model
 
# get the face embedding for one face
def get_embedding(model, face_pixels):
	# scale pixel values
	face_pixels = face_pixels.astype('float32')
	# standardize pixel values across channels (global)
	mean, std = face_pixels.mean(), face_pixels.std()
	face_pixels = (face_pixels - mean) / std
	# transform face into one sample
	samples = expand_dims(face_pixels, axis=0)
	# make prediction to get embedding
	yhat = model.predict(samples)
	return yhat[0]
 
# load the face dataset
data = load('5-celebrity-faces-dataset.npz')
trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']
print('Loaded: ', trainX.shape, trainy.shape, testX.shape, testy.shape)

# load the facenet model
model = load_model('facenet_keras.h5')
print('Loaded Model')

# convert each face in the train set to an embedding
newTrainX = list()
for face_pixels in trainX:
	embedding = get_embedding(model, face_pixels)
	newTrainX.append(embedding)
newTrainX = asarray(newTrainX)
print(newTrainX.shape)

# convert each face in the test set to an embedding
newTestX = list()
for face_pixels in testX:
	embedding = get_embedding(model, face_pixels)
	newTestX.append(embedding)
newTestX = asarray(newTestX)
print(newTestX.shape)

# save arrays to one file in compressed format
savez_compressed('5-celebrity-faces-embeddings.npz', newTrainX, trainy, newTestX, testy)

# Pairs formation and verification

In [None]:
from sklearn.preprocessing import Normalizer, LabelEncoder

# 1) Load dataset
data = np.load('5-celebrity-faces-dataset.npz')
trainX, trainy, testX, testy = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"]
print("Dataset: train=%d, test=%d" % (trainX.shape[0], testX.shape[0]))

# 2) Normalize input vectors
in_encoder = Normalizer(norm = "l2")
#print(trainX[0]) # Embedding without normalization
normed_trainX = in_encoder.transform(trainX)
#print(trainX[0]) # Embedding with normalization
normed_testX = in_encoder.transform(testX)

# 3) Label encode targets
out_encoder = LabelEncoder()
print(trainy)
out_encoder.fit(trainy)
trainy = out_encoder.transform(trainy)
print(trainy)
testy = out_encoder.transform(testy)

In [None]:
# Create embedding with it's own photo

count = 0
trainX2 = list()
for face in normed_trainX:
    new_emb = face
    new_emb = np.append(new_emb, count)
    #print(trainy[count])
    #print(new_emb)
    trainX2.append(new_emb)
    count += 1

count = 0
testX2 = list()
for face in normed_testX:
    new_emb = face
    new_emb = np.append(new_emb, count)
    #print(trainy[count])
    #print(new_emb)
    testX2.append(new_emb)
    count += 1 

# new_emb = [0.42, -0.11, ..., 0], [1.22, -2.11, ..., 24] 
# OBS: - the last number in the array (new_emb) is the index of the img in their raw dataset (the img dataset)

In [None]:
def metric_obtention(trainX2):
    how_many = len(trainX2) # how_many = 93

    # Formation of pairs (everyone with everyone)

    pairs = list() 
    # pairs = [ [img_i, img_j, label_i, label_j]   ] - img_i is the embedding of i

    for i in range(how_many):
        for j in range(i+1, how_many):
            pair = list()
            #print("(" + str(i) + "," + str(j) + ")")
            img_i = trainX2[i][0:-1]
            label_i = trainX2[i][-1]
            img_j = trainX2[j][0:-1]
            label_j = trainX2[j][-1]
            pair.append(img_i)
            pair.append(img_j)
            pair.append(label_i)
            pair.append(label_j)
            pairs.append(pair)

    # 2) Pair comparison

    
    threshold = np.linspace(0.1, 0.9, num=9, endpoint=True, retstep=False, dtype=float, axis=0)
    Psame = list()
    Pdiff = list()
    for t in range(len(threshold)):
        #print("Threshold: " + str(threshold[t]))
        for pair in pairs:
            norm = np.linalg.norm(pair[0] - pair[1])**2
            comparison = [pair[0], pair[1], pair[2], pair[3], norm]
            if norm < threshold[t]:
                Psame.append(comparison)
            else:
                Pdiff.append(comparison)
       
    # comparison = [img_i, img_j, label_i, label_j, norm]

    print("Pairs: " + str(len(pairs)))
    print("Psame: " + str(len(Psame)))
    print("Pdiff: " + str(len(Pdiff)))

    # 3) VAL and FAR rates

    VAL_list = []
    FAR_list = []
    TA_total = []
    FA_total = []
    for t in range(len(threshold)):
        TA = [] 
        FA = []
        fa_counter = 0
        ta_counter = 0
        for pair in Psame:
            if pair[-1] <= threshold[t]:
                TA.append(pair)
                ta_counter = len(TA)
                #print("TA para threshold:" + str(threshold[t]) + "es" + str(ta_counter))
                TA_total.append(TA)
        for pair in Pdiff:
            if pair[-1] <= threshold[t]:
                FA.append(pair)
                fa_counter = len(FA)
                #print("FA para threshold:" + str(threshold[t]) + "es" + str(fa_counter))
                TA_total.append(TA)
        if len(Psame) != 0:
            val = ta_counter/len(Psame)
        else:
            val = 0
        print("VAL para threshold:" + str(threshold[t]) + "es" + str(val))
        VAL_list.append(val)
        #print(VAL)
        if len(Pdiff) != 0:
            far = fa_counter/len(Pdiff)
        else:
            far = 0
        print("FAR para threshold:" + str(threshold[t]) + "es" + str(far))
        FAR_list.append(far)

    metrics = [pairs, Psame, Pdiff, TA_total, FA_total, VAL_list, FAR_list]
    return metrics

def face_pairs_plot(metrics_data, raw_imgs, how_many):
    for i in range(how_many):
        plt.figure()
        f, axarr = plt.subplots(1,2)
        axarr[0].imshow(raw_imgs[int(metrics_data[1][i][2])])
        axarr[1].imshow(raw_imgs[int(metrics_data[1][i][3])]) 
        # [1][i][2] means:
        # 1: Access Psame
        # i: Access the i-th term of Psame pairs
        # 2: Access the 2nd term of a single pair of Psame (which is the label of the img in the raw_imgs dataset)
    return

# Plot Roc curve
def ROC_plot(VAL, FAR):
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    #ax.xaxis.set_ticks_position('top')
    ax.yaxis.grid(linestyle = '-', color = 'gray')
    plt.xticks(fontsize=14,fontweight='normal')
    plt.yticks(fontsize=14,fontweight='normal')
    plt.xlabel('FAR', fontsize=14)
    plt.ylabel('VAL', fontsize=14)
    plt.xlim(0,0.015)
    plt.ylim(0,1)
    #ax.invert_xaxis()
    #ax.plot(FAR_list, VAL_list, 'g-', linewidth = 1.5)
    ax.plot(FAR, VAL, 'g-', linewidth = 1.5)
    #plt.show()
    return

 Results are stored in repo: https://github.com/JoseLGP/FaceRecognition