In [315]:
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import collections
from os import listdir
import sys
import numpy as np
import numba as nb
from scipy.spatial.distance import cdist, pdist, squareform

In [332]:
WIDTH, HEIGHT = 50, 50

def read_face(path):
    faces, labels = [], []
    for file in listdir(path):
        if file.split(".")[-1] != "pgm":
            continue
        with Image.open(path + file) as face:
            face = face.resize([HEIGHT,WIDTH])
            faces.append([np.array(face).reshape(1,-1)])
            labels.append(file.split(".")[0])
        
    faces = np.concatenate(faces,axis=0)
    #print(faces.shape)
    faces = faces.reshape((faces.shape[0],faces.shape[2]))
    #print(faces.shape)
    return faces, labels
    
faces, lables = read_face("./Yale_Face_Database/Training/")

OSError: [WinError 433] A device which does not exist was specified: './Yale_Face_Database/Training/'

In [None]:
class KNN():
    def __init__(self, train_space, test_space, train_labels, test_labels, k):
        self.train_space = train_space
        self.test_space = test_space
        self.train_labels = train_labels
        self.test_labels = test_labels
        self.k = k
        self.embedding_space = np.vstack((self.train_space, self.test_space))
        self.distance_matrix = None
        self.acc = None

    def cal_distance_matrix(self):
        self.distance_matrix = squareform(pdist(self.embedding_space,metric="euclidean"))[135:,:135]
        return self.distance_matrix
        
    def run(self):
        self.cal_distance_matrix()
        predict_label = []
        for i in range(self.test_space.shape[0]):
            closest_point_index = np.argsort(self.distance_matrix[i])[:self.k]
            vote_list = [ self.train_labels[j] for j in closest_point_index ]
            counter=collections.Counter(vote_list)
            predict_label.append(counter.most_common(1)[0][0])

        acc = 0
        for i in range(self.test_space.shape[0]):
            if predict_label[i] == self.test_labels[i]:
                acc += 1
        
        self.acc = acc / self.test_space.shape[0]
        print(f"KNN accuracy is {self.acc}")

In [None]:
def linear_kernel(X1,X2):
    kernel = X1.T @ X2
    return kernel

def poly_kernel(X1,X2,gamma=0.001,coe=10,degree=3):
    dist_matrix = X1.T @ X2
    kernel = ((gamma * dist_matrix) + coe)** degree
    return kernel
    
def RBF_kernel(X1,X2,gamma=0.001):
    dist_matrix = X1.T @ X2
    kernel = np.exp(-gamma * cdist(X1.T, X2.T))
    return kernel

In [None]:
class PCA():
    def __init__(self, data, k, is_kernel=False,kernel_mode=None):
        self.data = data
        self.k = k
        self.random = 10
        self.is_kernel = is_kernel
        self.kernel_mode = kernel_mode
        self.conv = None
        self.eigenvalue = None
        self.eigenvector = None
        self.w = None
        self.projection = None
    
    def get_covariance(self, data):
        if self.is_kernel:
            k = self.kernel_mode(data,data)
            n = k.shape[0]
            one_N = np.ones((n,n)) / n
            conv = k - one_N @ k - k @ one_N + one_N @ k @ one_N
            self.conv = conv
        else:
            mean = np.mean(data, axis=0, keepdims=False)
            n = data.shape[0]
            A = self.data - mean
            conv = (A.T @ A) / n
            self.conv = conv
        return conv
    
    def eigen_decpmposition(self, conv, k):
        eigenvalues, eigenvectors = np.linalg.eigh(conv)
        index = np.flip(np.argsort(eigenvalues))[:k]
        
        self.eigenvalue, self.eigenvector = eigenvalues[index], eigenvectors[:,index]
        self.w = self.eigenvector
        return self.eigenvalue, self.eigenvector
        
    def random_reconstruction(self):
        random_face = np.vstack([ [self.data[random]] for random in np.random.randint(0,self.data.shape[0],10)])
        reconstruction = (random_face @ self.w) @ self.w.T
        self.show_image(reconstruction,merge=True)
        pass
    
    def show_image(self,data,merge=False,file_path=None):
        if data.shape[0] > data.shape[1]:
            data = data.T
        if merge == True:
            row_number = (data.shape[0] // 5)
            remain = data.shape[0] % 5
            row = []
            for i in range(row_number):
                row_face = np.hstack([[data[j].reshape(WIDTH,HEIGHT)][0] for j in range(i*5,i*5+5)])
                row.append(row_face)

            picture = np.vstack([ [row[j]][0] for j in range(len(row))])
            plt.imshow(picture, cmap='gray')
            plt.show()
        else:
            for face in data:
                plt.imshow(face.reshape(WIDTH,HEIGHT), cmap='gray')
                plt.show()
        if not file_path == None:
            plt.save(file_path)

    def run(self):
        self.get_covariance(self.data)
        self.eigen_decpmposition(self.conv, self.k)
        self.show_image(self.eigenvector,merge=True)
        self.projection = self.data @ self.w
        return self.projection, self.w

# PCA part 1
train_faces, train_lables = read_face("./Yale_Face_Database/Training/")
PCA_train = PCA(train_faces,25)
PCA_train.run()
print("----------------------------------------------------------")
PCA_train.random_reconstruction()

# PCA part 2
train_faces, train_lables = read_face("./Yale_Face_Database/Training/")
print(train_faces.shape)
PCA_train = PCA(train_faces,25)
PCA_train.run()

test_faces, test_lables = read_face("./Yale_Face_Database/Testing/")

train_space = PCA_train.projection
test_space = test_faces @ PCA_train.w
simple_PCA_KNN = KNN(train_space, test_space, train_lables, test_lables, k = 5)
simple_PCA_KNN.run() 


# PCA part 3
kernel_list = [linear_kernel, poly_kernel, RBF_kernel]
for i, kernel in enumerate(kernel_list):
    train_faces, train_lables = read_face("./Yale_Face_Database/Training/")
    #kernel = linear_kernel(train_faces,train_faces)
    PCA_train = PCA(train_faces, 25, is_kernel=True,kernel_mode=kernel)
    PCA_train.run()

    test_faces, test_lables = read_face("./Yale_Face_Database/Testing/")
    train_space = PCA_train.projection
    test_space = test_faces @ PCA_train.w
    kernel_PCA_KNN = KNN(train_space, test_space, train_lables, test_lables, k = 5)
    kernel_PCA_KNN.run()


OSError: [WinError 433] A device which does not exist was specified: './Yale_Face_Database/Training/'

In [None]:
class LDA():
    def __init__(self, data, labels, k, is_kernel=False,kernel_mode=None):
        self.data = data
        self.labels = labels
        self.k = k
        self.random = 10
        self.is_kernel = is_kernel
        self.kernel_mode = kernel_mode
        self.class_matrix = None
        self.total_mean = None
        self.class_mean = None
        self.class_num =None
        self.SW = None
        self.SB = None
        self.matrix = None
        self.projection = None
        if self.is_kernel == True:
            self.kernel = kernel_mode(self.data, self.data)

    def compute_mean(self):
        total_mean = np.mean(self.data,axis=0)
        class_matrix = np.zeros((self.data.shape[0],len(np.unique(self.labels))))
        for index, c in enumerate(self.labels):
            class_matrix[index, int(c[-2:])-1] = 1
        
        #print(np.sum(class_matrix,axis=0))
        class_mean = (self.data.T @ class_matrix) / np.sum(class_matrix,axis=0)
        #print(class_mean.shape)
        self.class_matrix = class_matrix
        self.total_mean = total_mean
        self.class_mean = class_mean
        
    def compute_within(self):
        if self.is_kernel:
            mj = self.data.T @ self.class_matrix / np.sum(self.class_matrix,axis=0)
            self.kernel[np.where()]
            print(mj.shape)
            # W = self.data.T - mj @ self.class_matrix.T
            # SW = np.zeros((self.data.shape[1], self.data.shape[1]))
            # for group in np.unique(self.labels):
            #     w = W[:,np.array(self.labels) == group]
            #     SW += w @ w.T / w.shape[1]
            # self.SW = SW

            pass
        else:
            mj = self.data.T @ self.class_matrix / np.sum(self.class_matrix,axis=0)
            W = self.data.T - mj @ self.class_matrix.T
            SW = np.zeros((self.data.shape[1], self.data.shape[1]))
            for group in np.unique(self.labels):
                w = W[:,np.array(self.labels) == group]
                SW += w @ w.T / w.shape[1]
            self.SW = SW
            
    def compute_between(self):
        if self.is_kernel:
            pass
        else:
            mj = self.data.T @ self.class_matrix /np.sum(self.class_matrix,axis=0)
            B = mj - self.total_mean[:, None]
            SB = (np.sum(self.class_matrix,axis=0) * B) @ B.T
            self.SB = SB
            
    def eigen_decpmposition(self, conv, k):
        eigenvalues, eigenvectors = np.linalg.eigh(conv)
        index = np.flip(np.argsort(eigenvalues))[:k]
        
        self.eigenvalue, self.eigenvector = eigenvalues[index], eigenvectors[:,index]
        self.w = self.eigenvector
        return self.eigenvalue, self.eigenvector
        
    def random_reconstruction(self):
        random_face = np.vstack([ [self.data[random]] for random in np.random.randint(0,self.data.shape[0],10)])
        reconstruction = (random_face @ self.w) @ self.w.T
        self.show_image(reconstruction,merge=True)
        pass

    def show_image(self,data,merge=False,file_path=None):
        if data.shape[0] > data.shape[1]:
            data = data.T
        if merge == True:
            row_number = (data.shape[0] // 5)
            remain = data.shape[0] % 5
            row = []
            for i in range(row_number):
                row_face = np.hstack([[data[j].reshape(WIDTH,HEIGHT)][0] for j in range(i*5,i*5+5)])
                row.append(row_face)

            picture = np.vstack([ [row[j]][0] for j in range(len(row))])
            plt.imshow(picture, cmap='gray')
            plt.show()
        else:
            for face in data:
                plt.imshow(face.reshape(WIDTH,HEIGHT), cmap='gray')
                plt.show()
        if not file_path == None:
            plt.save(file_path)


    def run(self):
        self.compute_mean()
        self.compute_between()
        self.compute_within()
        SW_inv = np.linalg.pinv(self.SW)
        self.matrix = SW_inv @ self.SB
        self.eigen_decpmposition(self.matrix, self.k)
        self.show_image(self.eigenvector,merge=True)
        self.projection = self.data @ self.w
    
#LDA part 1
# train_faces, train_lables = read_face("./Yale_Face_Database/Training/")
# LDA_train = LDA(train_faces, train_lables, 25)
# LDA_train.run()

# print("----------------------------------------------------------")
# LDA_train.random_reconstruction()

#LDA part 2
# train_faces, train_lables = read_face("./Yale_Face_Database/Training/")
# #print(train_faces.shape)
# LDA_train = LDA(train_faces, train_lables, 25)
# LDA_train.run()

# test_faces, test_lables = read_face("./Yale_Face_Database/Testing/")

# train_space = LDA_train.projection
# test_space = test_faces @ LDA_train.w
# simple_LDA_KNN = KNN(train_space, test_space, train_lables, test_lables, k = 5)
# simple_LDA_KNN.run() 


#LDA part 3
kernel_list = [linear_kernel, poly_kernel, RBF_kernel]
for i, kernel in enumerate(kernel_list):
    train_faces, train_lables = read_face("./Yale_Face_Database/Training/")
    #kernel = linear_kernel(train_faces,train_faces)
    LDA_train = LDA(train_faces, train_lables, 25, is_kernel=True,kernel_mode=kernel)
    LDA_train.run()

    test_faces, test_lables = read_face("./Yale_Face_Database/Testing/")
    train_space = LDA_train.projection
    test_space = test_faces @ LDA_train.w
    kernel_LDA_KNN = KNN(train_space, test_space, train_lables, test_lables, k = 5)
    kernel_LDA_KNN.run()

(2500, 15)


TypeError: loop of ufunc does not support argument 0 of type NoneType which has no callable conjugate method