**Face recognition**

Task is to recognize a faces

**Dataset**

Aligned Face Dataset from Pinterest

This dataset contains 10.770 images for 100 people. All images are taken from 'Pinterest' and aligned using dlib library.

In [0]:
import os

working_dir = '/content/drive/My Drive/FaceDetectionRecognition'
os.chdir(working_dir)

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
import zipfile

In [0]:
zip_file_dir = working_dir+'/Aligned Face Dataset from Pinterest.zip'

In [0]:
#extracted = zipfile.ZipFile(zip_file_dir, 'r')
#extracted.extractall(path=working_dir)



In [0]:
ls

In [0]:
import numpy as np
import os

In [0]:
class IdentityMetadata():
    def __init__(self, base, name, file):
        # print(base, name, file)
        # dataset base directory
        self.base = base
        # identity name
        self.name = name
        # image file name
        self.file = file

    def __repr__(self):
        return self.image_path()

    def image_path(self):
        return os.path.join(self.base, self.name, self.file) 

In [0]:
def load_metadata(path):
    metadata = []
    for i in os.listdir(path):
        for f in os.listdir(os.path.join(path, i)):
            # Check file extension. Allow only jpg/jpeg' files.
            ext = os.path.splitext(f)[1]
            if ext == '.jpg' or ext == '.jpeg':
                metadata.append(IdentityMetadata(path, i, f))
    return np.array(metadata)



In [0]:
metadata = load_metadata('PINS')

In [0]:
metadata[[0,200,1000]]

In [0]:
print(metadata[200].base)
print(metadata[200].name)
print(metadata[200].file)

In [0]:
import cv2
def load_image(path):
    img = cv2.imread(path, 1)
    # OpenCV loads images with color channels
    # in BGR order. So we need to reverse them
    return img[...,::-1]

In [0]:
import matplotlib.pyplot as plt
%matplotlib inline

In [0]:
img1 = load_image(metadata[200].image_path())

plt.imshow(img1)

In [0]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ZeroPadding2D, Convolution2D, MaxPooling2D, Dropout, Flatten, Activation

def vgg_face():	
    model = Sequential()
    model.add(ZeroPadding2D((1,1),input_shape=(224,224, 3)))
    model.add(Convolution2D(64, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(Convolution2D(4096, (7, 7), activation='relu'))
    model.add(Dropout(0.5))
    model.add(Convolution2D(4096, (1, 1), activation='relu'))
    model.add(Dropout(0.5))
    model.add(Convolution2D(2622, (1, 1)))
    model.add(Flatten())
    model.add(Activation('softmax'))
    return model

In [0]:
model = vgg_face()

In [0]:
model.summary()

In [0]:
weight_file = 'vgg_face_weights.h5'

model.load_weights(weight_file)

In [0]:
from tensorflow.keras.models import Model
vgg_face_descriptor = Model(inputs=model.layers[0].input, outputs=model.layers[-2].output)

In [0]:
img_path = metadata[0].image_path()
img = load_image(img_path)

In [0]:
img = (img / 255.).astype(np.float32)

img = cv2.resize(img, dsize = (224,224))
print(img.shape)

In [0]:
embedding_vector = vgg_face_descriptor.predict(np.expand_dims(img, axis=0))[0]
print(embedding_vector.shape)

In [0]:
metadata.shape

In [0]:
embeddings = []
embeddings = np.zeros((metadata.shape[0], 2622))

In [0]:
for i, m in enumerate(metadata):
  try:
    # img_path = metadata[i].image_path()
    img_path = m.image_path()
    img = load_image(img_path)
    img = (img/255.).astype(np.float32)
    img = cv2.resize(img, dsize=(224,224))

    embeddings[i] = vgg_face_descriptor.predict(np.expand_dims(img, axis=0))[0]
  except:
    print(i, m)

print(embeddings[300])

In [0]:
embeddings[99]

In [0]:
def distance(emb1, emb2):
    return np.sum(np.square(emb1 - emb2))

In [0]:
import matplotlib.pyplot as plt

def show_pair(idx1, idx2):
    plt.figure(figsize=(8,3))
    plt.suptitle(f'Distance = {distance(embeddings[idx1], embeddings[idx2]):.2f}')
    plt.subplot(121)
    plt.imshow(load_image(metadata[idx1].image_path()))
    plt.subplot(122)
    plt.imshow(load_image(metadata[idx2].image_path()));    



In [0]:
show_pair(0, 3)
show_pair(0, 200)
show_pair(70, 72)
show_pair(70, 115)

In [0]:
train_idx = np.arange(metadata.shape[0]) % 9 != 0
test_idx = np.arange(metadata.shape[0]) % 9 == 0

In [0]:
X_train = embeddings[train_idx]
X_test = embeddings[test_idx]

targets = np.array([m.name for m in metadata])

y_train = targets[train_idx]
y_test = targets[test_idx]

In [0]:
print(metadata.shape)
print(train_idx.shape)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [0]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

In [0]:
y = le.fit_transform(targets)
y_train = y[train_idx]
y_test = y[test_idx]

In [0]:
y_train[1]

In [0]:
print(y_train.shape)
print(y_test.shape)

In [0]:
from sklearn.preprocessing import StandardScaler

In [0]:
ss = StandardScaler()

In [0]:
X_train_std = ss.fit_transform(X_train)
X_test_std = ss.fit_transform(X_test)

In [0]:
from sklearn.decomposition import PCA

In [0]:
cov_matrix = np.cov(X_train_std.T)
print('Covariance Matrix \n%s', cov_matrix)

In [0]:
X_train_std.shape

In [0]:
pca = PCA().fit(X_train_std)

In [0]:
plt.plot(np.cumsum(pca.explained_variance_ratio_))

In [0]:
pca1 = PCA(n_components=150)
pca1.fit(X_train_std)

X_train_s = pca1.transform(X_train_std)
X_test_s = pca1.transform(X_test_std)

In [0]:
print(X_train_s.shape)
print(X_test_s.shape)

In [0]:
from sklearn.svm import SVC

In [0]:
svm_model = SVC(C=0.01, kernel='linear')

In [0]:
svm_model.fit(X_train_s, y_train)

In [0]:
from sklearn.metrics import accuracy_score

In [0]:
accu = accuracy_score(y_test, svm_model.predict(X_test_s))

print(f'SVM accuracy = {accu}')


In [0]:
from sklearn.model_selection import cross_val_score

In [0]:
# scores = cross_val_score(svm_model, X_test_s, y_test, cv=10)

# print(scores)

In [0]:
import warnings

In [0]:
warnings.filterwarnings('ignore')

In [0]:
example_idx = 10
test = [embeddings[test_idx][example_idx]]
test = pca1.transform(test)
example_image = load_image(metadata[test_idx][example_idx].image_path())
example_prediction = svm_model.predict(test)
example_identity = le.inverse_transform(example_prediction)[0]

plt.imshow(example_image)
plt.title(f'Identified as {example_identity}');

In [0]:
import warnings
warnings.filterwarnings('ignore')

In [0]:

example_idx = 49
temp = [embeddings[test_idx][example_idx]]
temp = pca1.transform(temp)
example_image = load_image(metadata[test_idx][example_idx].image_path())
example_prediction = svm_model.predict(temp)
example_identity = le.inverse_transform(example_prediction)[0]


In [0]:
plt.imshow(example_image)
plt.title(f'Identified as {example_identity}');

In [0]:
import warnings
warnings.filterwarnings('ignore')

example_idx = 130
temp = [embeddings[test_idx][example_idx]]
temp = pca1.transform(temp)
example_image = load_image(metadata[test_idx][example_idx].image_path())
example_prediction = svm_model.predict(temp)
example_identity = le.inverse_transform(example_prediction)[0]

plt.imshow(example_image)
plt.title(f'Identified as {example_identity}');