# Face Recognition using LDA

In [71]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Training and Calculating Top 1 Accuracy Scores

In [85]:
# define the LDA object
lda = LinearDiscriminantAnalysis()

In [86]:
# reading in the images and vertically stacking them
sourceFolder = "./face_datasets/AVR_data_extracted_539/"
folders = os.listdir(sourceFolder)

allImages = []
allLabels = []

# for every folder
for folder in folders:
    # folder path to that folder
    folder_path = sourceFolder + "/" + folder
            
    # read in the image names for that folder
    image_names = os.listdir(folder_path)
            
    # read in the individual images
    for img in image_names:
        if (img == "Thumbs.db"):
            continue
        allImages.append(cv2.imread(folder_path + "/" + img, cv2.IMREAD_GRAYSCALE))
        allLabels.append(folder)
                
    # flatenning and vertically stacking all images
    vStack = np.asarray(allImages[0]).reshape(1, -1)
    for img in allImages[1:]:
        vStack = np.vstack((vStack, np.asarray(img).reshape(1, -1)))

In [87]:
# raw dataset read
print ("# images: " + str(len(allImages)))
print ("image size: " + str(allImages[0].shape))
print ("vStack size: " + str(vStack.shape))

# images: 546
image size: (200, 200)
vStack size: (546, 40000)


In [125]:
# train, test split
trainImages, testImages, trainLabels, testLabels = train_test_split(vStack, allLabels, stratify=allLabels, test_size=.1, random_state=1024)

In [126]:
# check the size of trian and test images
print ("train vStack size: " + str(trainImages.shape))
print ("train labels size: " + str(len(trainImages)))
print ("test vStack size: " + str(testImages.shape))
print ("test labels size: " + str(len(testImages)))

train vStack size: (491, 40000)
train labels size: 491
test vStack size: (55, 40000)
test labels size: 55


In [127]:
# fit the LDA model
lda.fit(trainImages, trainLabels)



LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
              solver='svd', store_covariance=False, tol=0.0001)

In [128]:
# calculate the accuracy on test data set
outVal = lda.score(testImages, testLabels)
print('Top 1 Accuracy: ' + str(outVal * 100) + "%")

Top 1 Accuracy: 36.36363636363637%


# Calculating Top 3 and Top 10 Accuracy Scores

In [129]:
# get the lda decision funtion which gives the confidence
confidence = lda.decision_function(testImages)

# getting how the LDA sorts the classes
lda_classes = lda.classes_

In [130]:
# for every test image we order the confidence scores in descreasing order with their labels
a=[]
for i in range(len(testImages)):
    t = zip(confidence[i], lda_classes.copy())
    t = sorted(t, reverse=True)
    x, y = zip(*t)
    a.append(y)
    
# Top 3
accuracy_count = 0
for i in range(len(testImages)):
    if (testLabels[i] in a[i][:3]):
        accuracy_count += 1
        
print ("Top 3 Accuracy: " + str((accuracy_count / len(testImages)) * 100))

# Top 10
accuracy_count = 0
for i in range(len(testImages)):
    if (testLabels[i] in a[i][:10]):
        accuracy_count += 1
        
print ("Top 10 Accuracy: " + str((accuracy_count / len(testImages)) * 100))

Top 3 Accuracy: 41.81818181818181
Top 10 Accuracy: 58.18181818181818
