## In this notebook we'll be using knn and pca in analyzing and building a model that recognizes faces.
##### So , click where you want to go!
<ul>
    <li><a href="#dataset">Dataset Exploration</a></li>
    <li><a href="#splitting">Dataset splitting</a></li>
    <li><a href="#pca">PCA algorithm</a></li>
    <li><a href="#KNN">KNN classifier</a></li>
    <li><a href="#bonus">Bonus with Naive Bayes</a></li>
    <li><a href="#pca2">PCA '2'</a></li>
</ul>

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from numpy.linalg import eigh
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
import matplotlib.image as mpimg
from sklearn.naive_bayes import GaussianNB

<a id = 'dataset' ></a>
### Importing the images, exploring them and generating the data matrix and the label vector!

In [2]:
def loadImages(folder, y):
    images = []
    labels = []
    pics = os.listdir(folder)
    for pic in pics:
        img = cv2.imread(os.path.join(folder,pic))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # transform to grayscale
        if img is not None:
            images.append(img)
            labels.append(y)
    return images, labels

In [3]:
def load_images_from_folder(folder):
    images = []
    labels = []
    dirs = os.listdir(folder)
    
    for directory in dirs:
        y = directory[1:]
        path = os.path.join(folder, directory)
        i, l = loadImages(path, y)
        images = images + i 
        labels = labels + l
    images = np.array(images)
    images = images.reshape(images.shape[0],images.shape[1] * images.shape[2])
    
    labels = np.array(labels)
    return images, labels


In [4]:
folder = 'C:\\Users\\AL-alamia\\Desktop\\Pattern_Project\\images'
X, y = load_images_from_folder(folder)

<a id = 'splitting' ></a>
### Split the Dataset into Training and Test sets

In [5]:
test_Datamatrix = []
test_Labelmatrix = []
train_Datamatrix = []
train_Labelmatrix = []

for i in range(400):
    if(i % 2 == 0):
        test_Datamatrix.append(X[i])
        test_Labelmatrix.append(y[i])
    else:
        train_Datamatrix.append(X[i])
        train_Labelmatrix.append(y[i])

<a id = 'KNN' ></a>
### Classifier Tuning

In [6]:
def KNNClassifier(train_data,train_label,test_data,test_label):
    neighbours = [1,3,5,7]
    accuracy_matrix = []
    for i,neighbour in zip(range(len(neighbours)),neighbours):
        print("When Neighbour = " + "[" + str(neighbour) + "]")
        clf = KNeighborsClassifier(n_neighbors = neighbour, weights = 'distance') 
        clf.fit(train_data.T, train_label) 
        output = clf.predict(test_data.T)
        accuracy_matrix.append(accuracy_score(output,test_label)) 
        print("Accuracy score is: " + str(accuracy_matrix[i]))
        count = 0
        for i in range(len(output)):
            print("[" + str(i) + "]" + "Classified as: "+ str(output[i]) +" Actual is: "+ str(test_Labelmatrix[i]))
            if((output[i]) != (test_Labelmatrix[i])):
                print("Misclassified")
                count+=1
    print("Number of Misclassified is " + str(count))
    print("=========================================")
    plt.plot(accuracy_matrix,neighbours)
    plt.show()

<a id = 'pca' ></a>
### Classification using PCA

In [7]:
def PCA():
    mean = np.mean(train_Datamatrix,axis = 0)
    Z_Matrix = train_Datamatrix - mean
    Z_Matrix_Test = test_Datamatrix - np.mean(test_Datamatrix, axis = 0)
    Cov_Matrix = np.cov(Z_Matrix,rowvar = 0 , bias = 1)
    E_Values , E_Vectors = np.linalg.eigh(Cov_Matrix)
    idx = E_Values.argsort()[::-1]
    E_Values_Sorted = E_Values[idx]
    E_Vectors_Sorted = E_Vectors[:,idx]
    r_values = [0.8,0.85,0.9,0.95]
    for r in r_values:
        r_value = FTV(0,r,E_Values_Sorted)
        New_Matrix = E_Vectors_Sorted[: , 0 : r_value + 1]
        U_Train = np.dot(New_Matrix.T , Z_Matrix.T)
        U_Test = np.dot(New_Matrix.T , Z_Matrix_Test.T)
        print("For r: " + str(r))
        KNNClassifier(U_Train,train_Labelmatrix,U_Test,test_Labelmatrix)

In [8]:
def FTV (r,alpha,E_Values_Sorted):
    for i in range(r,10304):
        B = float(sum(E_Values_Sorted))
        T = float(sum(E_Values_Sorted[:i]))
        if(T/B >= alpha):
            return i

###  PCA for KNN classifier

In [None]:
PCA()

<a id = 'bonus' ></a>
# Bonus !!!

In [None]:
def NBClassifier(train_data,train_label,test_data,test_label):
    accuracy_matrix = []
    nb = GaussianNB()
    nb.fit(train_data.T, train_label) 
    output = nb.predict(test_data.T)
    accuracy_matrix.append(accuracy_score(output,test_label))
    print("Accuracy score is: " + str(accuracy_matrix))
    count = 0
    for i in range(len(output)):
        print("[" + str(i) + "]" + "Classified as: "+ str(output[i]) +" Actual is: "+ str(test_Labelmatrix2[i]))
        if((output[i]) != (test_Labelmatrix2[i])):
            print("Misclassified")
            count+=1
    print("Number of Misclassified is " + str(count))
    print("=========================================")

In [None]:
def PCA2():    # Same pca algorithm but for naive bayes classifier
    mean = np.mean(train_Datamatrix2,axis = 0)
    Z_Matrix = train_Datamatrix2 - mean
    Z_Matrix_Test = test_Datamatrix2 - np.mean(test_Datamatrix2, axis = 0)
    Cov_Matrix = np.cov(Z_Matrix,rowvar = 0 , bias = 1)
    E_Values , E_Vectors = np.linalg.eigh(Cov_Matrix)
    idx = E_Values.argsort()[::-1]
    E_Values_Sorted = E_Values[idx]
    E_Vectors_Sorted = E_Vectors[:,idx]
    r_values = [0.8,0.85,0.9,0.95]
    for r in r_values:
        r_value = FTV(0,r,E_Values_Sorted)
        New_Matrix = E_Vectors_Sorted[: , 0 : r_value + 1]
        U_Train = np.dot(New_Matrix.T , Z_Matrix.T)
        U_Test = np.dot(New_Matrix.T , Z_Matrix_Test.T)
        print("For r: " + str(r))
        NBClassifier(U_Train,train_Labelmatrix2,U_Test,test_Labelmatrix2)

In [None]:
# Initializing label matrices
test_Labelmatrix2 = []
train_Labelmatrix2 = []
test_Datamatrix2 = []
train_Datamatrix2 = []
for i in range(0,400):
    if (i % 2 == 0 or i % 3 == 0):
        test_Labelmatrix2.append(y[i])
        test_Datamatrix2.append(X[i])
    else:  
        train_Labelmatrix2.append(y[i])
        train_Datamatrix2.append(X[i])

<a id = 'pca2' ></a>
### Running pca algorithm for naive bayes classifier!

In [None]:
PCA2()

### All the results are in the report!