<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Face-recognition-using-PCA" data-toc-modified-id="Face-recognition-using-PCA-1">Face recognition using PCA</a></span><ul class="toc-item"><li><span><a href="#Load-data" data-toc-modified-id="Load-data-1.1">Load data</a></span></li><li><span><a href="#Face-recognition-model" data-toc-modified-id="Face-recognition-model-1.2">Face recognition model</a></span></li><li><span><a href="#Hold-out-n-fold-cross-validation" data-toc-modified-id="Hold-out-n-fold-cross-validation-1.3">Hold out n fold cross validation</a></span></li><li><span><a href="#Stratified-train-test-split" data-toc-modified-id="Stratified-train-test-split-1.4">Stratified train-test split</a></span></li></ul></li></ul></div>

# Face recognition using PCA

Our final model would be based on the analysis we have done in the previous notebook. We would pick the best part of different models we tried and create a new model. This model would use PCA and SVC but also look at ways to find the faces that are not part of the training distribution for e.g. random noise.


In [68]:
# Import library / packages

# For numerical operation
import numpy as np

# Pandas for tabular manipulations
import pandas as pd

# For plots
from matplotlib import pyplot as plt

# Stratified n-fold cv
from sklearn.model_selection import StratifiedKFold

# Train-test split
from sklearn.model_selection import train_test_split

# For PCA
from sklearn.decomposition import PCA

# Support Vector Machine
from sklearn.svm import SVC

# K-Nearest Neighbour
from sklearn.neighbors import KNeighborsClassifier

## Load data

In [69]:
# Load the faces
faces_image = np.load('Faces\\olivetti_faces.npy')
faces_target = np.load('Faces\\olivetti_faces_target.npy')

# Find the dimension
faces_image.shape, faces_target.shape

((400, 64, 64), (400,))

## Face recognition model

In [86]:
# Face recognition model
class Face_recognize :
    
    def __init__(self,n_components=100,model = 'SVCRBF', mirror_face = 'Y', scaler='MinMax', \
                 k_w=1, k_e=1.8, print_info=True) :
        
        assert(isinstance(n_components,int))
        self.n = n_components
        self.pca = PCA(n_components=n_components)
        
        assert(model in ['SVCRBF', 'KNN1EU'])
        self.model = model
        if model == 'SVCRBF' :
            self.mod = SVC(kernel='rbf', gamma='auto')
        elif model == 'KNN1EU' :
            self.mod = KNeighborsClassifier(n_neighbors=1)
        
        assert(mirror_face in ['Y', 'N'])
        self.mirror = mirror_face
        
        assert(scaler in ['MinMax', 'Std', None])
        self.scaler = scaler
        
        assert(isinstance(k_w, (int, float)))        
        self.k_w = k_w
        
        assert(isinstance(k_e, (int, float)))
        self.k_e = k_e
        
        str = 'Face_recognize(n_components={}, model = {}, mirror_face = {}, scaler={}, k_w={}, k_e={}, print_info={})'.\
        format(self.n,self.model,self.mirror,self.scaler,self.k_w,self.k_e,print_info)
        if print_info :
            print(str)
        
        
    def fit(self,X,Y) :
                           
        # Add Mirror faces
        Transform_mat = np.flip(np.diagflat(np.ones(X.shape[1])),1)
        
        def Mirror_face(X) :
            return(np.dot(X,Transform_mat))

        for i, face in enumerate(X) :
            New_face = Mirror_face(face).reshape(1,face.shape[0], face.shape[1])
            X = np.append(X,New_face, axis=0)
            Y = np.append(Y,Y[i])
            
        # Normalize
        X = X.reshape(X.shape[0],-1)
        if self.scaler == 'MinMax' :
            XS = X/(np.max(X,axis=1,keepdims=True)-np.min(X,axis=1,keepdims=True))
        elif self.scaler == 'Std' :
            XS = X/np.std(X,axis=1,keepdims=True)
        elif self.scaler == None :
            XS = X
            
                    
        # Find PCA        
        Weights_train = self.pca.fit_transform(XS)
        self.Mean_face = self.pca.mean_
        self.Eigen_vec = self.pca.components_
                
        
        # Find mean weights by class
        self.W_mean = pd.concat([pd.DataFrame(Y, columns = ['Label']), pd.DataFrame(Weights_train)], axis = 1)\
                                .groupby(['Label']).mean().values        
        out = self._eu_classdist(Weights_train, self.W_mean)
        
        # Maximum allowed distance from vector
        self.max_dist = self.k_w*np.max(out[:,1])
        
        # Maximum allowed error in regenerated face
        self.max_err = self.k_e*np.max(self._face_diff(XS, self.Mean_face, Weights_train, self.Eigen_vec),axis=0)
        
        # Train the final model
        self.mod.fit(Weights_train,Y)
            
            
    # Find euclidean distance from each class
    def _eu_classdist(self,X,Mean) :
        distance = np.empty([X.shape[0], Mean.shape[0]])   
        for i, mean in enumerate(Mean) :            
            distance[:,i] = np.linalg.norm((X-mean.reshape(1,-1)), axis=1)  
        output = np.vstack((np.argmin(distance, axis = 1),np.min(distance, axis = 1))).T
        return(output)
    
    # Find error of regenerated face
    def _face_diff(self,Face,Mean_face,Weights,Eigen_vec) :
        Regen_face = Mean_face.reshape(1,-1) + np.dot(Weights,Eigen_vec)
        return(np.linalg.norm((Face.reshape(Regen_face.shape[0],Regen_face.shape[1])-Regen_face),axis=1))
    
            
    def predict(self,X) :
        
        # Normalize
        X = X.reshape(X.shape[0],-1)
        if self.scaler == 'MinMax' :
            XS = X/(np.max(X,axis=1,keepdims=True)-np.min(X,axis=1,keepdims=True))
        elif self.scaler == 'Std' :
            XS = X/np.std(X,axis=1,keepdims=True)
        elif self.scaler == None :
            XS = X

        
        # PCA
        Weights_test = self.pca.transform(XS)
        
        
        # Weight check
        out = self._eu_classdist(Weights_test, self.W_mean)
                    
        
        # Error check
        err = self._face_diff(XS, self.Mean_face, Weights_test, self.Eigen_vec)
                        
        # Model prediction
        Prediction = self.mod.predict(Weights_test)
        
        # Prediction after check
        return(np.array([np.nan if out[i,1] > self.max_dist or err[i] > self.max_err else p \
                for i, p in enumerate(Prediction)]))
        
    

## Hold out n fold cross validation

In [87]:
# Cross validation
def cross_val(model,X,Y,cv=5) :
    
    # Kfold CV
    k_fold = StratifiedKFold(n_splits = 5, random_state=123)

    Train_score = np.array([])
    Test_score = np.array([])
    for i,j in k_fold.split(X,Y) :
        model.fit(X[i,:,:],Y[i])
        Train_pred = model.predict(X[i,:,:])
        Test_pred = model.predict(X[j,:,:])
        score = np.array([1*(Y[i]==Train_pred)])
        Train_score = np.append(Train_score, np.mean(score))
        score = np.array([1*(Y[j]==Test_pred)])
        Test_score = np.append(Test_score, np.mean(score))

    return(Train_score, Test_score)
    

In [88]:
# Number of components = 80 - Cross validation results
model = Face_recognize(n_components=80, scaler = 'MinMax',print_info=True)
Train_score, Test_score = cross_val(model, faces_image, faces_target, cv=5)

print('Train error {} % +/- {}'.format(np.round(100*np.mean(Train_score),2),np.round(100*np.std(Train_score),2)))
print('Test error {} % +/- {}'.format(np.round(100*np.mean(Test_score),2),np.round(100*np.std(Test_score),2)))

Face_recognize(n_components=80, model = SVCRBF, mirror_face = Y, scaler=MinMax, k_w=1, k_e=1.8, print_info=True)
Train error 99.94 % +/- 0.13
Test error 95.5 % +/- 3.41


## Stratified train-test split

In [89]:
# Split the train and test images
faceimg_train, faceimg_test, facetrg_train, facetrg_test = train_test_split(faces_image, faces_target,\
                                                    stratify = faces_target, test_size = 0.30)

In [90]:
# Random noise
Random_face = np.random.random([120,64,64])
Z1 = Random_face[0:60,:,:]/10
Z2 = Random_face[60:120,:,:]
Z1Y = np.ones(Z1.shape[0])*np.nan
Z2Y = np.ones(Z2.shape[0])*np.nan

In [91]:
# Stack the daa in test daya
X_test = np.concatenate((faceimg_test,Z1,Z2),axis=0)
Y_test = np.concatenate((facetrg_test,Z1Y,Z2Y),axis=0)

In [92]:
# Model - Train
model = Face_recognize(n_components=80, scaler = 'MinMax',print_info=True)
model.fit(faceimg_train,facetrg_train)

Face_recognize(n_components=80, model = SVCRBF, mirror_face = Y, scaler=MinMax, k_w=1, k_e=1.8, print_info=True)


In [93]:
# Train prediction and accuracy
Train_pred = model.predict(faceimg_train)
np.round(100*np.mean(np.array([(a==b)|(np.isnan(a) & np.isnan(b)) for a, b in zip(Train_pred, facetrg_train)])),2)

100.0

In [94]:
# Test prediction and accuracy
Test_pred = model.predict(X_test)
np.round(100*np.mean(np.array([(a==b)|(np.isnan(a) & np.isnan(b)) for a, b in zip(Test_pred, Y_test)])),2)

97.5

As you would see the model predicts seen and unseen images upto 97.5% accuracy.