### Fine-tune the pre-trained FaceNet model towards the Caltech Face Dataset (450 images, 30 peaple) using triplet loss function

In [1]:
import os
import numpy as np
from scipy import misc
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf

from keras import backend as K
K.set_image_data_format('channels_first')

from keras.models import Sequential
from keras.models import Model
from keras.layers.core import Dense, Dropout
from keras.layers import Input
from keras import optimizers

from sklearn import decomposition
from sklearn.preprocessing import StandardScaler



  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
from FaceNet_utils import one_hot, distance_based_prediction, evaluate_model, face_dist, load_FaceData, load_base_model

In [3]:
nFace = 450

In [4]:
def triplet_loss(y_true, y_pred, alpha = 0.2):
    """
    Implementation of the triplet loss as defined by formula (3)

    Arguments:
    y_true -- true labels, required when you define a loss in Keras, you don't need it in this function.
    y_pred -- python list containing three objects:
            anchor -- the encodings for the anchor images, of shape (None, 128)
            positive -- the encodings for the positive images, of shape (None, 128)
            negative -- the encodings for the negative images, of shape (None, 128)

    Returns:
    loss -- real number, value of the loss
    """

    anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]

    ### START CODE HERE ### (≈ 4 lines)
    # Step 1: Compute the (encoding) distance between the anchor and the positive
    pos_dist = K.sum(K.square(anchor-positive),axis=-1)
    # Step 2: Compute the (encoding) distance between the anchor and the negative
    neg_dist = K.sum(K.square(anchor-negative),axis=-1)
    # Step 3: subtract the two previous distances and add alpha.
    basic_loss = pos_dist - neg_dist + alpha
    # Step 4: Take the maximum of basic_loss and 0.0. Sum over the training examples.
    loss = K.sum(K.maximum(basic_loss,0.0))
    ### END CODE HERE ###

    return loss

In [5]:
FNModel = load_base_model()

In [6]:
nFeature = FNModel.output.get_shape().as_list()[1]

In [None]:
#FNModel.compile(optimizer = 'adam', loss = triplet_loss, metrics = ['accuracy'])

In [5]:
from keras.applications.resnet50 import ResNet50

In [None]:
input_test = Input((3,96,96))

In [None]:
output_test = FNModel(input_test)

In [6]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape = (3,224,224))



In [7]:
embedding_dim = 64

In [8]:
x = base_model.output

In [9]:
x.shape

TensorShape([Dimension(None), Dimension(2048), Dimension(1), Dimension(1)])

In [10]:
from keras.layers import GlobalMaxPooling2D, Lambda

In [11]:
x = GlobalMaxPooling2D()(x)
x = Dropout(0.5)(x)
dense_1 = Dense(embedding_dim)(x)
normalized = Lambda(lambda  x: K.l2_normalize(x,axis=1))(dense_1)

In [12]:
base_model2 = Model(base_model.input, normalized, name="base_model")

In [13]:
input_1 = Input((3, 224,224))
r1 = base_model2(input_1)


AttributeError: 'Tensor' object has no attribute 'summary'

In [None]:
FaceData, labels, labels_OH = load_FaceData()
print(FaceData.shape)
print(labels.shape)
print(labels_OH.shape)

In [None]:
X=[FaceData[:10],FaceData[10:20],FaceData[20:30]]

In [None]:
Y = [np.zeros((10, 1)),np.zeros((10, 1)),np.zeros((10, 1))]

In [None]:
FNModel.fit(X,Y)

In [None]:
FaceEmbedding = FNmodel.predict(FaceData)

In [None]:
pca = decomposition.PCA(n_components = 2)

In [None]:
x_std = StandardScaler().fit_transform(FaceEmbedding)
x_feature_pca = pca.fit_transform(x_std)

In [None]:
plt.scatter(x_feature_pca[:,0],x_feature_pca[:,1],c=labels)
plt.title('feature space')

In [None]:
FaceEmbedding_Corr = np.corrcoef(FaceEmbedding)
plt.imshow(FaceEmbedding_Corr,cmap='jet')
plt.title('Correlation matrix')
plt.colorbar()

In [None]:
Distances_within_class = []
Distances_among_class = []

FaceEmbedding_Dist = np.ones((nFace,nFace))*100
for i in range(nFace):
    for j in range(i,nFace):
        FaceEmbedding_Dist[i,j]=face_dist(FaceEmbedding[i,:],FaceEmbedding[j,:])
        FaceEmbedding_Dist[j,i]=FaceEmbedding_Dist[i,j]
        if labels[i]==labels[j]:
            Distances_within_class.append(FaceEmbedding_Dist[i,j])
        else:
            Distances_among_class.append(FaceEmbedding_Dist[i,j])
            
Distances_within_class = np.array(Distances_within_class)
Distances_among_class = np.array(Distances_among_class)

In [None]:
plt.imshow(FaceEmbedding_Dist,cmap='jet')
plt.title('Distance matrix')
plt.colorbar()

In [None]:
plt.figure()
_ = plt.hist(Distances_among_class,bins = 50, label = 'cross class',normed = True)
_ = plt.hist(Distances_within_class,bins = 50, label = 'same class', normed = True)
plt.title('Distribution of cross-class distance')
plt.legend()

print('Cross-class distance = %1.3f +\- %1.3f' % (Distances_among_class.mean(), Distances_among_class.std()))
print('Same-class distance = %1.3f +\- %1.3f' % (Distances_within_class.mean(), Distances_within_class.std()))

### Note:
Even before doing anything, the direct embedding of faces using FaceNet is already able to recognize faces at decent level.

In [None]:
FP = []
FN = []

for threshold in np.arange(0,max(Distances_among_class),0.01):
    FP.append(100.0*np.sum(Distances_among_class<threshold)/len(Distances_among_class))
    FN.append(100.0*np.sum(Distances_within_class>threshold)/len(Distances_within_class))
    
plt.plot(FP,FN)  
threshold = 0.68
print('pct of false positive %2.3f %%' % (100.0*np.sum(Distances_among_class<threshold)/len(Distances_among_class)))
print('pct of false negative %2.3f %%' % (100.0*np.sum(Distances_within_class>threshold)/len(Distances_within_class)))

print('accuracy = %2.3f' % (100.0*(np.sum(Distances_among_class>threshold)+np.sum(Distances_within_class<threshold))/(len(Distances_among_class)+len(Distances_within_class))))

## Tweaking
Now build a one dense layer FC NN to further process the face embedding to seperate them


In [None]:
nPeople = len(list(set(labels)))
X_input = Input((nFeature,))
X = Dense(128,activation = 'relu')(X_input)
X = Dropout(0.5, seed = 1)(X)
X = Dense(nPeople,activation = 'softmax')(X)
myModel = Model(inputs = X_input, outputs = X, name='FaceRecoModel')
myModel.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
myModel.summary()

In [None]:
train_idx = []
test_idx = []
for l in set(labels):
    ind = np.where(labels==l)[0]
    if len(ind)<=1:
        ind_train = list(ind)
        ind_test = []
    else:
        i_split = max([3,int(len(ind)*0.7)])
        ind_train = list(ind[0:i_split])
        ind_test = list(ind[i_split:])
    train_idx += ind_train
    test_idx += ind_test  
print("Training data set has %d face images", len(train_idx))
print("Test data set has %d face images", len(test_idx))


In [None]:
nCorrect = 0
for i in test_idx:
    p = distance_based_prediction(FaceEmbedding[train_idx], labels[train_idx], FaceEmbedding[i])
    if p == labels[i]:
        nCorrect += 1
print('Face recognition accuracy using purely distance based method: %3.3f %%' % (100.*(0.+nCorrect)/len(test_idx)))

In [None]:
evaluate_model(myModel, FaceEmbedding[train_idx], labels_OH[train_idx], FaceEmbedding[test_idx], labels_OH[test_idx], maxItem = 1000)

In [None]:
myModel.fit(FaceEmbedding[train_idx], labels_OH[train_idx], epochs = 300, batch_size = 32)

In [None]:
evaluate_model(myModel, FaceEmbedding[train_idx], labels_OH[train_idx], FaceEmbedding[test_idx], labels_OH[test_idx], maxItem = 1000)

In [None]:
for j in test_idx:
    p=np.argmax(myModel.predict(FaceEmbedding[j:j+1,:]))
    if p!=labels[j]:
        plt.figure()
        plt.imshow(FaceData[j].transpose((1,2,0)))
        plt.title('pred %d, true %d' % (p,labels[j]))
        plt.show()
        plt.figure()
        tmp = np.where(labels==p)[0][0]
        plt.imshow(FaceData[tmp].transpose((1,2,0)))
        plt.show()

In [None]:
FaceEmbedding.shape