### Convert raw images to Grayscale and of similar shapes

In [1]:
import cv2
import os
import numpy as np

TRAIN_DIR = 'data_set/train_data'
TEST_DIR = 'data_set/test_data'

train_dir = 'Generated/train_set'
test_dir = 'Generated/test_set'

def create_dataset(fileList, gen_dir, dir_name):

    for i, file in enumerate(fileList):
        oldfile = file.split('.')[0]
        file = os.path.join(dir_name, file)
        img = cv2.imread(file)
        resized_img = cv2.resize(img, (312,416))
        resized_img = cv2.cvtColor( resized_img, cv2.COLOR_RGB2GRAY )
        cv2.imwrite(gen_dir + '/'+ oldfile + '_resized_bw.jpg',resized_img)
        
def create():

    fileList = sorted(os.listdir(TRAIN_DIR))
    create_dataset(fileList, train_dir, TRAIN_DIR)
    fileList = sorted(os.listdir(TEST_DIR))
    create_dataset(fileList, test_dir, TEST_DIR)

def read_data(fileList, dir_name):
    
    x = []
    y = []
    for i, file in enumerate(fileList):
        oldfile = file
        file = os.path.join(dir_name, file)
        img = cv2.imread(file)
        data_reshaped = img.flatten()
        y.append(int(oldfile[0]))
        x.append(data_reshaped)
    y = np.asarray(y)
    y = y.reshape(-1, 1)
    x = np.asarray(x)
    x = x.transpose()
    return x, y

def load_data():

    fileList = sorted(os.listdir(train_dir))
    trainx, trainy = read_data(fileList, train_dir)

    fileList = sorted(os.listdir(test_dir))
    testx, testy = read_data(fileList, test_dir)

    return trainx, trainy, testx, testy

#create()

### Functions for computing accuracy and predicting classes of unseen images

In [2]:
imposter = False

def accuracy(prediction, testy):
    cnt = 0
    for x, y in zip(prediction, testy):
        print(x, "\t", y)
        if x == y:
            cnt += 1
    print("Accuracy = ", (cnt/testy.shape[0])*100)

def test(mean, test_x, test_y, eigen_face, signature_face):
    a, b = test_x.shape
    prediction = []

    for test_image in test_x.T:

        test_image = test_image.reshape(a,1) - mean
        final_eigenface = eigen_face.dot(test_image)
        min_dist = np.linalg.norm(signature_face.T[0].reshape(final_eigenface.shape) - final_eigenface)
        index = 0
        count = 0

        for col in signature_face.T:
            v = col.reshape(final_eigenface.shape)
            dist = np.linalg.norm(v - final_eigenface)
            if dist < min_dist:
                min_dist = dist
                index = count
            count += 1

        print(min_dist)
        prediction.append(index//4)

    accuracy(prediction, test_y)

### Compute Covariance Matrix, eigen-faces and weighted sum representation of the training samples

In [3]:
train_x, train_y, test_x, test_y = load_data()
rows, cols = train_x.shape
mean = train_x.mean(axis = 1)
mean = mean.reshape(rows, 1)
normalized_train_x = train_x-mean

k = 10
cov_matrix = np.cov(normalized_train_x.T)
eigenval, eigenvec = np.linalg.eig(cov_matrix)
ind = eigenval.argsort()[::-1]   
eigenval = eigenval[ind]
eigenvec = eigenvec[:,ind]

sigma = eigenvec[0:k, :]
sigma = sigma.T
eigen_faces = np.dot(sigma.T , normalized_train_x.T)
signature_faces = eigen_faces.dot(normalized_train_x)
test(mean, test_x, test_y, eigen_faces, signature_faces)

580828914.2992924
568514421.401925
827463494.554465
1.944244674479972e-06
200309410.414488
556845842.2375823
143350457.38307872
211587722.7636912
538177708.9129602
495122589.4533047
5 	 [0]
5 	 [1]
2 	 [2]
3 	 [3]
4 	 [4]
5 	 [5]
6 	 [6]
7 	 [7]
8 	 [8]
0 	 [9]
Accuracy =  70.0
