### Convert all raw images to Grayscale and resize them to similar shape

In [7]:
import cv2
import os
import numpy as np
import sys

TRAIN_DIR = 'data_set/train_data'
TEST_DIR = 'data_set/test_data'

train_dir = 'Generated/train_set'
test_dir = 'Generated/test_set'


def create_dataset(fileList, gen_dir, dir_name):

    for i, file in enumerate(fileList):
        oldfile = file.split('.')[0]
        file = os.path.join(dir_name, file)
        img = cv2.imread(file)
        resized_img = cv2.resize(img, (312,416))
        resized_img = cv2.cvtColor( resized_img, cv2.COLOR_RGB2GRAY )
        cv2.imwrite(gen_dir + '/'+ oldfile + '_resized_bw.jpg',resized_img)
        

def create():

    fileList = sorted(os.listdir(TRAIN_DIR))
    create_dataset(fileList, train_dir, TRAIN_DIR)
    fileList = sorted(os.listdir(TEST_DIR))
    create_dataset(fileList, test_dir, TEST_DIR)


def read_data(fileList, dir_name):
    
    x = []
    y = []
    for i, file in enumerate(fileList):
        oldfile = file
        file = os.path.join(dir_name, file)
        img = cv2.imread(file)
        data_reshaped = img.flatten()
        y.append(int(oldfile[0]))
        x.append(data_reshaped)
    y = np.asarray(y)
    y = y.reshape(-1, 1)
    x = np.asarray(x)
    x = x.transpose()
    return x, y


def load_data():

    fileList = sorted(os.listdir(train_dir))
    trainx, trainy = read_data(fileList, train_dir)

    fileList = sorted(os.listdir(test_dir))
    testx, testy = read_data(fileList, test_dir)

    return trainx, trainy, testx, testy


#create()

### Functions for computing accuracy and predicting classes of unseen images

In [8]:
def accuracy(prediction, testy):

    cnt = 0
    for x, y in zip(prediction, testy):
        print(x, "\t", y)
        if x == y:
            cnt += 1
    print("Accuracy = ", (cnt/testy.shape[0])*100)

def test(mean, testx, testy, eigen_face, signature_face):

    a, b = testx.shape
    prediction = []

    for test_image in testx.T:

        test_image = test_image.reshape(a,1) - mean
        final_eigenface = eigen_face.dot(test_image)
        min_dist = np.linalg.norm(signature_face.T[0].reshape(final_eigenface.shape) - final_eigenface)
        index = 0
        count = 0

        for col in signature_face.T:
            v = col.reshape(final_eigenface.shape)
            dist = np.linalg.norm(v - final_eigenface)
            if dist < min_dist:
                min_dist = dist
                index = count
            count += 1
        prediction.append(index//4)

    accuracy(prediction, testy)

### Compute the face signatures(weights of eigen faces)

In [15]:
trainx, trainy, testx, testy = load_data()
rows, cols = trainx.shape
mean = trainx.mean(axis = 1)
mean = mean.reshape(rows, 1)
new_trainx = trainx-mean
covariance_matrix = np.cov(new_trainx.T)
eigenvalue, eigenvector = np.linalg.eig(covariance_matrix)

idx = eigenvalue.argsort()[::-1]   
eigenvalue = eigenvalue[idx]
eigenvector = eigenvector[:,idx]

sigma = eigenvector[0:k, :]
sigma = sigma.T
eigen_faces = np.dot(sigma.T , new_trainx.T)

signature_face = eigen_faces.dot(new_trainx)

### Compute within and between class scatter, fisher faces and predict classes for testing data

In [16]:
n_classes = 10
k = 10
m = 40

def scatter_within_class(dic_of_classes):
    SW = 0
    for i in dic_of_classes.keys():
        mean_temp = (np.mean(dic_of_classes[i], axis = 1)).reshape(dic_of_classes[i].shape[0],1)
        
        SW += np.matmul((dic_of_classes[i] - mean_temp),((dic_of_classes[i] - mean_temp).T))
    return SW


def scatter_between_classes(dic_of_classes, mean_signature):
    SB = 0
    for i in dic_of_classes.keys():
        mean_temp = (np.mean(dic_of_classes[i], axis = 1)).reshape(dic_of_classes[i].shape[0],1)
        sigma_i = np.matmul(mean_temp - mean_signature, mean_temp.T)
        SB += sigma_i
    return SB


def construct_feature(eigenvector, m):
    feature_matrix = eigenvector[:m,:]
    return feature_matrix


def fisher_faces(feature_matrix, signature_face):
    return np.matmul(np.transpose(feature_matrix), signature_face)


def min_distance(ff, Projected_Fisher_Test_Img):
    ffT = ff.T
    min_dist = np.linalg.norm(ffT[0].reshape(Projected_Fisher_Test_Img.shape) - Projected_Fisher_Test_Img)
    index = 0
    cnt = 0
    for row in ffT:
        dist = np.linalg.norm(row.reshape(Projected_Fisher_Test_Img.shape) - Projected_Fisher_Test_Img)
        if dist < min_dist:
            min_dist = dist
            index = cnt
        cnt += 1


    print(min_dist)
    return (index//4 )



def testing(testx, testy, mean_train, ff, feature_matrix, eigen_faces):
    a, b = testx.shape
    prediction = []
    for test_image in testx.T:
        test_image = test_image.reshape(a,1) - mean_train
        PEF = np.matmul(eigen_faces, test_image)
        Projected_Fisher_Test_Img = np.matmul(np.transpose(feature_matrix), PEF)
        predicted_class = min_distance(ff, Projected_Fisher_Test_Img)       
        prediction.append(predicted_class)
    accuracy(prediction, testy)



dic_of_classes = {}

mean_signature = (signature_face.mean(axis = 1)).reshape(signature_face.shape[0],1)
mean_train = (trainx.mean(axis = 1)).reshape(trainx.shape[0], 1)

for i in range(n_classes):
    dic_of_classes[i+1] = signature_face[:, i:i+4]

means = {}

for x,y in dic_of_classes.items():
    mean_val = y.mean(axis = 1)
    mean_val = mean_val.reshape(mean_val.shape[0], 1)
    means[x] = mean_val

SW = scatter_within_class(dic_of_classes)
SB = scatter_between_classes(dic_of_classes, mean_signature)
J = np.matmul(np.linalg.inv(SW), SB)

eigenvalue, eigenvector = np.linalg.eig(J)
idx = eigenvalue.argsort()[::-1]   
eigenvalue = eigenvalue[idx]
eigenvector = eigenvector[:,idx]

feature_matrix = eigenvector[:k, :].T
ff = fisher_faces(feature_matrix, signature_face)
testing(testx, testy, mean_train, ff, feature_matrix, eigen_faces)



212844546.89787227
214046260.14828792
271654971.4501266
1.4439909496075475e-06
116799839.23995472
265335347.35382783
116765846.91409847
177989165.91174406
255603963.50969887
197452493.67207173
5 	 [0]
5 	 [1]
2 	 [2]
3 	 [3]
4 	 [4]
5 	 [5]
6 	 [6]
7 	 [7]
8 	 [8]
1 	 [9]
Accuracy =  70.0
