In [17]:
from os import listdir
from sklearn import preprocessing
import matplotlib.image as image
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score  

In [2]:
def loadFaceImages(path):
    # return array of images

    labledDataSet = np.empty((1, 10305))
    personList = listdir(path)
    
    for person in personList:
        imgList = listdir(path + "/" + person)
        
        for img in imgList:
            anImage = image.imread(path + "/" + person + "/" + img)
            #print(path + "/" + person + "==>" + img)
            anImage = anImage.flatten()
            labledSample = np.concatenate((["face"], anImage))
            labledSample = labledSample.reshape(1, 10305)
            labledDataSet = np.append(labledDataSet, labledSample, 0)
    
    labledDataSet = labledDataSet[1:,:]
    return labledDataSet


In [3]:
def loadNonFaceImages(path):
    # return array of images

    labledDataSet = np.empty((1, 10305))
    imgList = listdir(path)
        
    for img in imgList:
        anImage = image.imread(path + "/" + img)
        #print(path + "/" + person + "==>" + img)
        anImage = anImage.flatten()
        labledSample = np.concatenate((["non-face"], anImage))
        labledSample = labledSample.reshape(1, 10305)
        labledDataSet = np.append(labledDataSet, labledSample, 0)
    
    labledDataSet = labledDataSet[1:,:]
    return labledDataSet

In [4]:
pathFaces = "att_faces/orl_faces"
pathNonFaces = "rand_imgs/rand_imgs_pgm"

#load faces dataset
labeledFaceData = loadFaceImages(pathFaces)

#split the dataset
trainingLabeledFaceData = labeledFaceData[1::2]
testLabeledFaceData = labeledFaceData[0::2]

#split the labels from the data
trainingFaceLabels, trainingFaceData = trainingLabeledFaceData[:,0], trainingLabeledFaceData[:,1:]
trainingFaceData = np.array(trainingFaceData).astype(np.float64)

testFaceLabels, testFaceData = testLabeledFaceData[:,0], testLabeledFaceData[:,1:]
testFaceData = np.array(testFaceData).astype(np.float64)

#load non-faces dataset
labeledNonFaceData = loadNonFaceImages(pathNonFaces)

#split the dataset
#trainingLabeledNonFaceData = labeledNonFaceData[1::2]
#testLabeledNonFaceData = labeledNonFaceData[0::2]

#split the labels from the data
#trainingNonFaceLabels, trainingNonFaceData = trainingLabeledNonFaceData[:,0], trainingLabeledNonFaceData[:,1:]
#trainingNonFaceData = np.array(trainingNonFaceData).astype(np.float64)



testNonFaceLabels, testNonFaceData = testLabeledNonFaceData[:,0], testLabeledNonFaceData[:,1:]
testNonFaceData = np.array(testNonFaceData).astype(np.float64)


In [5]:
print(labeledFaceData)
print("======================================")
print(labeledNonFaceData)

[['face' '48' '49' ... '47' '46' '46']
 ['face' '34' '34' ... '37' '40' '33']
 ['face' '60' '60' ... '32' '34' '34']
 ...
 ['face' '112' '109' ... '93' '88' '92']
 ['face' '111' '114' ... '88' '86' '92']
 ['face' '110' '112' ... '92' '87' '90']]
[['non-face' '215' '215' ... '95' '74' '84']
 ['non-face' '214' '214' ... '136' '110' '81']
 ['non-face' '249' '247' ... '142' '145' '144']
 ...
 ['non-face' '115' '127' ... '20' '21' '28']
 ['non-face' '90' '92' ... '125' '125' '125']
 ['non-face' '117' '121' ... '180' '174' '169']]


In [6]:
pathFaces = "att_faces/orl_faces"
pathNonFaces = "rand_imgs/rand_imgs_pgm"

#load faces dataset
labeledFaceData = loadFaceImages(pathFaces)
#load non-faces dataset
labeledNonFaceData = loadNonFaceImages(pathNonFaces)

#Concatenate both datasets
labeledData = np.row_stack((labeledFaceData,labeledNonFaceData))

trainingDataset = labeledData[1::2]
testingDataset = labeledData[0::2]

x_train = trainingDataset[:, 1:]
y_train = trainingDataset[:, 0]
x_train = np.array(x_train).astype(np.float64)
x_trainDict = {label: x_train[y_train == label] for label in np.unique(y_train)}



x_test = testingDataset[:, 1:]
y_test = testingDataset[:, 0]
x_test = np.array(x_test).astype(np.float64)
x_testDict = {test_label:x_test[y_test==test_label] for test_label in np.unique(y_test)}


print(labeledData.shape)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(500L, 10305L)
(250L, 10304L)
(250L, 10304L)
(250L,)
(250L,)


In [9]:
#total mean
totalMean = np.mean(x_train, 0)

classMean = dict()
#mean of each class
for label, data in x_trainDict.items():
    classMean[label] = np.matrix(np.mean(data,0))
    

In [13]:
Sb = np.zeros((10304,10304))
Sw = np.zeros((10304,10304))

for label, class_mean in classMean.items():
    diff = class_mean - totalMean
    
    Sb += x_trainDict[label].size * np.dot(diff.T, diff)
    
    diffWithin = x_trainDict[label] - class_mean
    Sw += np.dot(diffWithin.T,diffWithin)

In [14]:
#Get inverse of within matrix
SwPinv = np.linalg.pinv(Sw)
S =np.dot(SwPinv, Sb)

In [15]:
#Get eigenVectors and eigenValues of S
eigenValues_b, eigenVectors_b = np.linalg.eig(S)

#descending order
idx = np.argsort(eigenValues_b)[::-1]
eigenVectors_b = eigenVectors_b[:,idx]
 # sort eigenvectors according to same index
eigenValues_b = eigenValues_b[idx]

realEigenVals_b = eigenValues_b.real
realEigenVects_b = eigenVectors_b.real

# Prediction

In [35]:
#Classification using K-NN

reduced_realEigenVects_b = realEigenVects_b[:, 0:41]
projected_trainingSet = np.dot(x_train, reduced_realEigenVects_b)
projected_testingSet = np.dot(x_test, reduced_realEigenVects_b)

classifer_b = KNeighborsClassifier(n_neighbors= 1)
classifer_b.fit(projected_trainingSet, y_train)

prediction = classifer_b.predict(projected_testingSet)
accuracy = accuracy_score(prediction, y_test)

print(accuracy)

0.932
