<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Face-recognition---training-and-prediction" data-toc-modified-id="Face-recognition---training-and-prediction-1">Face recognition - training and prediction</a></span><ul class="toc-item"><li><span><a href="#Load-data" data-toc-modified-id="Load-data-1.1">Load data</a></span></li></ul></li><li><span><a href="#Face-recognition-model" data-toc-modified-id="Face-recognition-model-2">Face recognition model</a></span><ul class="toc-item"><li><span><a href="#Stratified-train-test-split" data-toc-modified-id="Stratified-train-test-split-2.1">Stratified train-test split</a></span></li></ul></li></ul></div>

# Face recognition using PCA

Lets pick from where we left. We've extracted about 200 Principal components from the set of images with 4096 pixels/features however we don't need all the 200 components to recognize a face. From the previous notebook we could recognize some random faces with as low as 60 components accurately however we need a number that would maximize the prediction accuracy. Lets create few models and train them to predict the faces. We will then evaluate the train and test accuracy of each model.


In [1]:
# Import library / packages

# For numerical operation
import numpy as np

# Pandas for tabular manipulations
import pandas as pd

# For plots
from matplotlib import pyplot as plt

# Train-test split
from sklearn.model_selection import train_test_split

# For PCA
from sklearn.decomposition import PCA

# Support Vector Machine
from sklearn.svm import SVC

# K-Nearest Neighbour
from sklearn.neighbors import KNeighborsClassifier

## Load data

In [2]:
# Load the faces
faces_image = np.load('Faces\\olivetti_faces.npy')
faces_target = np.load('Faces\\olivetti_faces_target.npy')

# Find the dimension
faces_image.shape, faces_target.shape

((400, 64, 64), (400,))

# Face recognition model

In [43]:
class Face_recognize :
    
    def __init__(self,n_components=100,model = 'SVCRBF', mirror_face = 'Y', scaler='MinMax') :
        
        assert(isinstance(n_components,int))
        self.n = n_components
        self.pca = PCA(n_components=n_components)
        
        assert(model in ['SVCRBF', 'KNN1EU'])
        self.model = model
        if model == 'SVCRBF' :
            self.mod = SVC(kernel='rbf', gamma='auto')
        elif model == 'KNN1EU' :
            self.mod = KNeighborsClassifier(n_neighbors=1)
        
        assert(mirror_face in ['Y', 'N'])
        self.mirror = mirror_face
        
        assert(scaler in ['MinMax', 'Std', None])
        self.scaler = scaler
        
        str = 'Face_recognize(n_components={}, model = {}, mirror_face = {}, scaler={})'.\
        format(self.n,self.model,self.mirror,self.scaler)
        print(str)
        
        
    def fit(self,X,Y) :
                           
        # Add Mirror faces
        Transform_mat = np.flip(np.diagflat(np.ones(faceimg_train.shape[1])),1)
        
        def Mirror_face(X) :
            return(np.dot(X,Transform_mat))

        for i, face in enumerate(X) :
            New_face = Mirror_face(face).reshape(1,face.shape[0], face.shape[1])
            X = np.append(X,New_face, axis=0)
            Y = np.append(Y,Y[i])
            
        # Normalize
        X = X.reshape(X.shape[0],-1)
        if self.scaler == 'MinMax' :
            XS = X/(np.max(X,axis=1,keepdims=True)-np.min(X,axis=1,keepdims=True))
        elif self.scaler == 'Std' :
            XS = X/np.std(X,axis=1,keepdims=True)
        elif self.scaler == None :
            XS = X
            
                    
        # Find PCA        
        Weights_train = self.pca.fit_transform(XS)
        self.Mean_face = self.pca.mean_
        self.Eigen_vec = self.pca.components_
                
        
        # Find mean weights by class
        self.W_mean = pd.concat([pd.DataFrame(Y, columns = ['Label']), pd.DataFrame(Weights_train)], axis = 1)\
                                .groupby(['Label']).mean().values        
        out = self._eu_classdist(Weights_train, self.W_mean)
        
        # Maximum allowed distance from vector
        self.max_dist = np.max(out[:,1])
        
        # Maximum allowed error in regenerated face
        self.max_err = 1.8*np.max(self._face_diff(XS, self.Mean_face, Weights_train, self.Eigen_vec),axis=0)
        
        # Train the final model
        self.mod.fit(Weights_train,Y)
            
            
    # Find euclidean distance from each class
    def _eu_classdist(self,X,Mean) :
        distance = np.empty([X.shape[0], Mean.shape[0]])   
        for i, mean in enumerate(Mean) :            
            distance[:,i] = np.linalg.norm((X-mean.reshape(1,-1)), axis=1)  
        output = np.vstack((np.argmin(distance, axis = 1),np.min(distance, axis = 1))).T
        return(output)
    
    # Find error of regenerated face
    def _face_diff(self,Face,Mean_face,Weights,Eigen_vec) :
        Regen_face = Mean_face.reshape(1,-1) + np.dot(Weights,Eigen_vec)
        return(np.linalg.norm((Face.reshape(Regen_face.shape[0],Regen_face.shape[1])-Regen_face),axis=1))
    
            
    def predict(self,X) :
        
        # Normalize
        X = X.reshape(X.shape[0],-1)
        if self.scaler == 'MinMax' :
            XS = X/(np.max(X,axis=1,keepdims=True)-np.min(X,axis=1,keepdims=True))
        elif self.scaler == 'Std' :
            XS = X/np.std(X,axis=1,keepdims=True)
        elif self.scaler == None :
            XS = X

        
        # PCA
        Weights_test = self.pca.transform(XS)
        
        
        # Weight check
        out = self._eu_classdist(Weights_test, self.W_mean)
                    
        
        # Error check
        err = self._face_diff(XS, self.Mean_face, Weights_test, self.Eigen_vec)
                        
        # Model prediction
        Prediction = self.mod.predict(Weights_test)
        
        # Prediction after check
        return(np.array([np.nan if out[i,1] > self.max_dist or err[i] > self.max_err else p \
                for i, p in enumerate(Prediction)]))
        
    

## Stratified train-test split
The split should be stratified for accurate test results. Stratified split ensures that data from each class/category of dataset are distributed between train and test based on the size defined.

In [44]:
# Split the train and test images
faceimg_train, faceimg_test, facetrg_train, facetrg_test = train_test_split(faces_image, faces_target,\
                                                    stratify = faces_target, test_size = 0.30)

In [45]:
# Initiate the model
model = Face_recognize(n_components=80, scaler = 'MinMax')

Face_recognize(n_components=80, model = SVCRBF, mirror_face = Y, scaler=MinMax)


In [46]:
# Train the model
model.fit(faceimg_train,facetrg_train)

In [47]:
# Training data prediction
Train_pred = model.predict(faceimg_train)

# Test data prediction
Test_pred = model.predict(faceimg_test)


In [48]:
# Accuracy
np.sum(facetrg_train == Train_pred)*100/facetrg_train.shape[0]
#np.sum(Y_train_pred.reshape(-1) == Y_train.reshape(-1))*100/Y_train.shape[0]

100.0

In [49]:
np.sum(facetrg_test == Test_pred)*100/facetrg_test.shape[0]

95.83333333333333

In [50]:
facetrg_test

array([ 4., 38.,  2.,  8., 37., 15., 30., 23., 35., 34., nan,  3., 12.,
       32., 17., 38.,  6.,  6., 22., 16., 26., 10., 35., 29., 30., 32.,
       36., 21., 31., 33.,  2., 26.,  3., 35., 20., 32.,  4., 27., 34.,
       23., 19., 36.,  4., 20., 12.,  7., 15., 20., 21.,  5.,  9.,  0.,
       11., 15., 34., 38.,  0., 28., 33., 14., 21., 17., 14., 24., 27.,
       17.,  8., 13., 26., 27., 19.,  1.,  1., 39., 22., 18., 38., 25.,
       22., 36., 11., 25.,  2., 11.,  0.,  1.,  8., 31., 30.,  9., 37.,
       24., 33.,  9., 16., 18., 13., 28., 28., 14., 14., 39., 24.,  5.,
       37.,  6., 31., 18.,  7., 29., 23.,  4., 25.,  3., 19.,  5., 39.,
       13., 16., 35.])

In [41]:
SVC()

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)