# Face Recognition using PCA

In [4]:
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split

In [3]:
# class for PCA based (eigen faces) face detection
class PcaFace:
    def __init__(self, sourceFolder):
        """
            Initialize the class for PCA face detection
        """
        self.sourceFolder = sourceFolder
        
        # variables for storing all the images data
        self.allImages = []
        self.allLabels = []
        self.allImagesVstack = None
        
        # train vars
        self.trainImages = []
        self.trainImagesNorm = None
        self.trainLabels = []
        self.trainProjected = None
        
        # test vars
        self.testImages = []
        self.testLabels = []
        
        # PCA variables
        self.trainMean = None
        self.eigenFaces = None
    
    def readImages(self):
        """
            This method reads the images present in the source folder separated by class folders
        """
        folders = os.listdir(self.sourceFolder)

        # for every folder
        for folder in folders:
            # folder path to that folder
            folder_path = self.sourceFolder + "/" + folder
            
            # read in the image names for that folder
            image_names = os.listdir(folder_path)
            
            # read in the individual images
            for img in image_names:
                if (img == "Thumbs.db"):
                    continue
                self.allImages.append(cv2.imread(folder_path + "/" + img, cv2.IMREAD_GRAYSCALE))
                self.allLabels.append(folder)
            
        # flatenning and vertically stacking all images
        vStack = np.asarray(self.allImages[0]).reshape(1, -1)
        for img in self.allImages[1:]:
            vStack = np.vstack((vStack, np.asarray(img).reshape(1, -1)))
        self.allImagesVstack = vStack
        
    def splitTrainTestImages(self, testSize=.1):
        """
            This method will split read images into training and testing images
        """
        self.trainImages, self.testImages, self.trainLabels, self.testLabels = train_test_split(self.allImagesVstack, 
            self.allLabels, stratify=self.allLabels, test_size=testSize, random_state=42)
        
    def getEigenFaces(self, noComponents):
        """
            This method applies PCA to the training set and extracts top "noComponents" from the eigen vectors (eigen faces)
        """
        # finding the mean
        self.trainMean = self.trainImages.mean(axis=0)
        
        # zeroing out the mean
        trainImagesNorm = self.trainImages - self.trainMean
        self.trainImagesNorm = trainImagesNorm
        
        # calculate the intermediate matrix
        intermMatrix = np.dot(trainImagesNorm, trainImagesNorm.T)
        
        # finding the eigen values and vectors of intermMatrix
        [eigenvalues, eigenvectors] = np.linalg.eigh(intermMatrix)
        eigenvectors = np.dot(trainImagesNorm.T, eigenvectors)
        
        # normalizing the eigen vectors
        for i in range(eigenvectors.shape[1]):
            eigenvectors[:, i] = eigenvectors[:, i] / np.linalg.norm(eigenvectors[:, i])
            
        # sorting eigenvectors based on descending order of eigen values
        idx = np.argsort(-eigenvalues)
        eigenvalues = eigenvalues[idx]
        eigenvectors = eigenvectors[:, idx]
        
        # taking just the top "noComponents"
        eigenvalues = eigenvalues[0:noComponents]
        eigenvectors = eigenvectors[:, 0:noComponents]
        
        # just need the eigen faces
        self.eigenFaces = eigenvectors
    
    def __projectFaceOnEigenFaces(self, img, mean_adjust):
        """
            This function projects an image on the eigen faces space
        """
        if (not mean_adjust):
            return np.dot(img, self.eigenFaces)
        else:
            return np.dot(img - self.trainMean, self.eigenFaces)
    
    def projectTrainFacesOnEigenFaces(self):
        """
            This function projects training faces on the eigen face space
        """
        projections = []
        for img in self.trainImagesNorm:
            projections.append(self.__projectFaceOnEigenFaces(img.reshape(1, -1), False))
        self.trainProjected = projections
    
    def __edist(self, p, q):
        """
            This function returns euclidean distance between two points
        """
        p = np.asarray(p).flatten()
        q = np.asarray(q).flatten()
        return np.sqrt(np.sum(np.power((p - q), 2)))
    
    def __cdist(self, p, q):
        """
            This function returns cosine distance between two points
        """
        p = np.asarray(p).flatten()
        q = np.asarray(q).flatten()
        return -np.dot(p.T, q) / (np.sqrt(np.dot(p, p.T) * np.dot(q, q.T)))

    def __predictImageClass(self, img, mean_adjust, out=1):
        """
            This function predicts class of a single test images
        """
        minDist = [np.finfo('float').max for i in range(len(self.trainProjected))]
        minClass = [-1 for i in range(len(self.trainProjected))]
        
        Q = self.__projectFaceOnEigenFaces(img.reshape(1, -1), mean_adjust)
        
        for i in range(len(self.trainProjected)):
            dist = self.__edist(self.trainProjected[i], Q)
            if dist < minDist[i]:
                minDist[i] = dist
                minClass[i] = self.trainLabels[i]
        
        # sorting in increasing order based on minDist
        t = zip(minDist, minClass)
        t = sorted(t)
        
        minDist, minClass = zip(*t)
        
        return minClass[:out]
    
    def predictImagesClass(self):
        """
            This function predicts classes of all the test images
        """
        accuracy_count = 0
        for i in range(len(self.trainImages)):
            true = self.trainLabels[i]
            pre = self.__predictImageClass(self.trainImagesNorm[i], mean_adjust=False)
            if (true in pre):
                accuracy_count += 1
                
        print ("Train Accuracy: " + str((accuracy_count * 100/len(self.trainImages))) + "%")
        
        accuracy_count = 0
        for i in range(len(self.testImages)):
            true = self.testLabels[i]
            pre = self.__predictImageClass(self.testImages[i], mean_adjust=True)
            if (true in pre):
                accuracy_count += 1
                
        print ("Top 1 Test Accuracy: " + str((accuracy_count * 100/len(self.testImages))) + "%")
        
        accuracy_count = 0
        for i in range(len(self.testImages)):
            true = self.testLabels[i]
            pre = self.__predictImageClass(self.testImages[i], mean_adjust=True, out=3)
            if (true in pre):
                accuracy_count += 1
                
        print ("Top 3 Test Accuracy: " + str((accuracy_count * 100/len(self.testImages))) + "%")
        
        accuracy_count = 0
        for i in range(len(self.testImages)):
            true = self.testLabels[i]
            pre = self.__predictImageClass(self.testImages[i], mean_adjust=True, out=10)
            if (true in pre):
                accuracy_count += 1
                
        print ("Top 10 Test Accuracy: " + str((accuracy_count * 100/len(self.testImages))) + "%")
        
    def __normalize(self, img, low, high):
        """
            This function brings img to range [low, high]
        """
        img = np.asarray(img)
        min_img, max_img = np.min(img), np.max(img)
        img = img - float(min_img)
        img = img / float((max_img - min_img))
        
        # scale to [low ... high]
        img = img * (high - low)
        img = img + low
        
        return np.asarray(img)
    
    def showEigenFaces(self, count):
        """
            This function saves the top 'count' eigen faces
        """
        for i in range(count):
            e = self.eigenFaces[:,i].reshape(self.allImages[0].shape)
            temp = self.__normalize(e, 0, 255)

            # show the image
            # cv2.imshow("sdf", temp)
            # cv2.waitKey(0)
            # cv2.destroyAllWindows()

            # save the image
            cv2.imwrite("./output/eigen_faces_pca/" + str(i) + ".jpg", temp)

In [134]:
obj = PcaFace(sourceFolder="./face_datasets/aligned-images-AVR/aligned-images-AVR/")
obj.readImages()
obj.splitTrainTestImages(testSize=0.15)
obj.getEigenFaces(noComponents=350)
obj.projectTrainFacesOnEigenFaces()
obj.predictImagesClass()
obj.showEigenFaces(30)

Train Accuracy: 100.0%
Top 1 Test Accuracy: 58.771929824561404%
Top 3 Test Accuracy: 66.66666666666667%
Top 10 Test Accuracy: 78.94736842105263%
