In [1]:
import cv2
import numpy as np 
import matplotlib.pyplot as plt
import os
from glob import glob

In [54]:
def load_train_test_data(dir):
    all_folders=glob(dir)
    train_data=[]
    test_data=[]
    for folder in all_folders:
        images = [cv2.imread(file,0) for file in glob("%s*.pgm"%(folder))]
        images_train = images[::2]
        images_test = images[1::2]
        train_data.append(images_train)
        test_data.append(images_test)
    return train_data,test_data

def downsampling(train_data,test_data):
    for i,folder in enumerate(train_data):
        for j,img in enumerate(folder):
            train_data[i][j] = cv2.resize(img, (0,0), fx=0.1, fy=0.1)
    
    for i,folder in enumerate(test_data):
        for j,img in enumerate(folder):
            test_data[i][j] = cv2.resize(img, (0,0), fx=0.1, fy=0.1)
    
    return train_data_copy,test_data_copy

def vectorized(train_data,test_data):
    for i,folder in enumerate(train_data):
        for j,img in enumerate(folder):
            train_data[i][j] = train_data[i][j].reshape(-1,1) 
    
    for i,folder in enumerate(test_data):
        for j,img in enumerate(folder):
            test_data[i][j] = test_data[i][j].reshape(-1,1)
    
    return train_data,test_data


def normalize(train_data,test_data):
    for i,folder in enumerate(train_data):
        for j,img in enumerate(folder):
            train_data[i][j] = train_data[i][j]/train_data[i][j].max() 
    
    for i,folder in enumerate(test_data):
        for j,img in enumerate(folder):
            test_data[i][j] = test_data[i][j]/test_data[i][j].max()
    
    return train_data,test_data

def stacking(train_data):
    classes = []
    for i,folder in enumerate(train_data):
        X = np.zeros((len(train_data[0][0]),1))
        for j,img in enumerate(folder):
            X = np.append(X,train_data[i][j],axis=1)
        classes.append(X[:,1:])
    return classes

def b_hat(model,y):
    Bi = []
    Yi = []
    for i,X in enumerate(model):
        Xt = np.transpose(X)
        inv = np.linalg.inv(np.dot(Xt,X)) 
        B = np.dot(inv,np.dot(Xt,y))
        y = np.dot(X,B)
        Yi.append(y)
        Bi.append(B)
    return Yi


def distance(model,y):
    Yi = b_hat(model,y)
    all_dist=[]
    for yi in Yi:
        #dist = np.linalg.norm(y-yi)
        #dist = np.hypot(*(y-yi).T)
        dist = np.sqrt(np.sum((y - yi) ** 2))  
        all_dist.append(dist)
    return all_dist

In [55]:
train_data,test_data=load_train_test_data("media/FaceDataset/*/")
train_data_copy = train_data.copy()
test_data_copy = test_data.copy()

train_data_ds,test_data_ds = downsampling(train_data_copy,test_data_copy)

train_data_ds_v,test_data_ds_v = vectorized(train_data_ds,test_data_ds)

train_data_ds_v_n,test_data_ds_v_n = normalize(train_data_ds_v,test_data_ds_v)

Classes = stacking(train_data_ds_v_n)

#Bi,Yi = b_hat(Classes,test_data_ds_v_n[34][2])

#all_dist = distance(Classes,test_data_ds_v_n[34][2])

#print(all_dist)

#print(all_dist.index(min(all_dist)))

In [56]:
accurate = 0
for i in range(40):
    for j in range(5):
        all_dist = distance(Classes,test_data_ds_v_n[i][j])
        if all_dist.index(min(all_dist)) == i:
            accurate += 1
accuracy = (accurate/2)

In [57]:
print(accuracy)

8.5
