## Importing libraries and dependencies

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)|
from skimage.io import imread
import os
import cv2
from matplotlib import pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from numpy import linalg as LA
from numpy.linalg import inv

## Loading dataset

In [None]:
entries = os.listdir('/kaggle/input/att-database-of-faces/')
entries.remove('README')
entries=sorted(entries)

labels=[]
D=[]
for entry in entries:
    for filename in os.listdir(os.path.join('/kaggle/input/att-database-of-faces/',entry)):
        z=entry+'/'+filename
        img = imread(os.path.join('/kaggle/input/att-database-of-faces/',z))
        if img is not None:  
            D.append(img)
            j=entry[1:]
            int(j)
            labels.append(j)
labels=np.array(labels)
D=np.array(D)

D = D.reshape((400, 10304))

In [None]:
_,axxr = plt.subplots(40,10, figsize=(12,60))
for i in range(40):
    for j in range(10):
        axxr[i][j].imshow(D[i][j].squeeze(), cmap='gray')
plt.setp(plt.gcf().get_axes(), xticks=[], yticks=[])
# ref : https://stackoverflow.com/questions/25124143/matplotlib-subplots-get-rid-of-tick-labels-altogether
plt.show()

## train_test split

In [None]:
x_train = []
y_train = []
x_test = []
y_test = []
for i in range(400):
    if i%2==1 :
        x_train.append(D[i])
        y_train.append(labels[i])
    else:
        x_test.append(D[i])
        y_test.append(labels[i])
x_train, y_train, x_test, y_test = np.array(x_train), np.array(y_train), np.array(x_test), np.array(y_test)
df_train = pd.DataFrame(x_train)
df_test = pd.DataFrame(x_test)

In [None]:
df_train

# **PCA**

In [None]:
def mean(data):
    return np.mean(data)  #compute mean

def center(data):
    return data - mean(data)  #centeralize data

def cov_matrix(data):
    z = center(data)
    return (np.matmul(np.transpose(z), z)) / len(data)  #compute covariance matrix

def eigen(data):
    cov = cov_matrix(data)
    eig_values, eig_vectors = np.linalg.eigh(cov)
    idx = eig_values.argsort()[::-1]  
    eig_values = eig_values[idx]
    eig_vectors = eig_vectors[:,idx]
    return eig_values, eig_vectors    #compute eigen values, eigen vectors sorted

def get_dimensionality(alpha, eigen_values):
    total_sum = np.sum(eigen_values)
    sum = 0
    idx = 0
    for value in eigen_values: 
        sum = sum + value
        idx = idx + 1
        if (sum / total_sum >= alpha):
            break
    return idx   #choose dimensionality for given alpha


In [None]:
alphas = [0.8, 0.85, 0.9, 0.95]
means = mean(df_train)
Z = center(df_train)
eigen_values, eigen_vectors = eigen(df_train)
new_dims=[]
for alpha in alphas:
    new_dims.append(get_dimensionality(alpha, eigen_values))
    

In [None]:
eigen_vectors.shape

In [None]:
new_dims

In [None]:
for x in range(len(new_dims)):
    
    p_train_data = np.matmul(eigen_vectors[:, 0:new_dims[x]].T, df_train.T).T
    p_test_data = np.matmul(eigen_vectors[:, 0:new_dims[x]].T, df_test.T).T

    neigh = KNeighborsClassifier(n_neighbors=)
    neigh.fit(p_train_data, y_train)

    y_pred=neigh.predict(p_test_data)

    print('accuracy for alpha = ',alphas[x])
    print(classification_report(y_test, y_pred))

In [None]:
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(x_train, y_train)

y_pred=neigh.predict(x_test)

print('accuracy for alpha = ',alphas[x])
print(classification_report(y_test, y_pred))

In [None]:
print( y_pred)
print(y_test)

# LDA

In [None]:
df_train['class'] = y_train
class_mean_df = df_train.groupby(by='class').agg(['mean'],axis='columns')
class_mean = np.array(class_mean_df)     #Calculaing mean for every class 
sample_mean = df_train.mean()            #Calcuaing sample mean

In [None]:
sample_mean=pd.DataFrame(np.array(df_train.mean()).T)

sb=np.zeros((10304,10304))

for row in range(1,41): #Calculating Sb matrix
    
    row_vector = (np.array(class_mean_df.loc[str(row)]).reshape(10304,1))-(np.array(sample_mean)[:-1])
    sb = sb+(5*(np.matmul(row_vector,row_vector.T)))
    

In [None]:
sb

In [None]:
z_df=pd.DataFrame()
for i in range(1,41):
    x=pd.DataFrame(df_train.loc[df_train['class']==str(i)].drop(columns='class').values-(class_mean_df.loc[str(i)]).values)
    x['class']=str(i)
    z_df=z_df.append(x)  

In [None]:
s_df

In [None]:
s_df=np.zeros((10304,10304))
for row in range(1,41): #Calculating S matrix
    mat=np.array(z_df[z_df["class"]==str(row)].drop(columns=["class"]))
    x=np.matmul(mat.T,mat)
    s_df=s_df+x

In [None]:
s_inv=inv(s_df)

In [None]:
eig_values_lda, eig_vectors_lda = np.linalg.eigh(np.matmul(s_inv,sb))

In [None]:
idx = eig_values_lda.argsort()[::-1]
eig_vectors_lda=eig_vectors_lda[:,idx]

In [None]:
U_lda=eig_vectors_lda[:,0:39]

In [None]:
eig_vectors_lda.shape

In [None]:
def eigen_faces(eigen_vectors):
    fig, axes = plt.subplots(10, 4, figsize=(40, 40),
                            subplot_kw={'xticks':[], 'yticks':[] })
    for i, ax in enumerate(axes.flat):
        if i >= 39:
            break
        ax.imshow(eigen_vectors[:,i].reshape(112, 92), cmap='gray')
        ax.set_title('Face: {}'.format(i+1))
    plt.show()
#eigen_faces(U_lda)

In [None]:
U_lda.shape

In [None]:
lda_train_p = np.matmul(U_lda.T, np.array(df_train.drop(columns=["class"])).T).T
lda_test_p = np.matmul(U_lda.T, np.array(df_test.T)).T


In [None]:
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(lda_train_p, y_train)

y_pred=neigh.predict(lda_test_p)

print(classification_report(y_test, y_pred))

# non-face

In [None]:
def get_images(directory):
    dir=os.listdir('/kaggle/input/natural-images/natural_images/')[:-1]
    Images = []
    Labels = []  # 0 for airplane , 1 for car, 2 for cat, 3 for dog, 4 for flower , 5 for fruit, 6 for motorbike, 7 for person
    label = 0
    
    for labels in os.listdir('/kaggle/input/natural-images/natural_images/'): #Main Directory where each class label is present as folder name.
        if labels == 'airplane': 
            label = 0
        elif labels == 'car':
            label = 1
        elif labels == 'cat':
            label = 2
        elif labels == 'dog':
            label = 3
        elif labels == 'flower':
            label = 4
        elif labels == 'fruit':
            label = 5
        elif labels == 'motorbike':
            label = 6
        
        for image_file in os.listdir('/kaggle/input/natural-images/natural_images/'+labels): #Extracting the file name of the image from Class Label folder
            image = cv2.imread('/kaggle/input/natural-images/natural_images/'+labels+'/'+image_file) #Reading the image (OpenCV)
            image = cv2.resize(image,(112,92)) #Resize the image, Some images are different sizes. (Resizing is very Important)
            greyscale= cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            
            Images.append(greyscale)
            Labels.append(label)
            
#     return Images, Labels
    return (Images,Labels)            

def get_classlabel(class_code):
    labels = {0:'airplane', 1:'car', 2:'cat', 3:'dog', 4:'flower', 5:'fruit', 5:'motorbike'}
    
    return labels[class_code]
X,Y=get_images('/kaggle/input/natural-images/natural_images/')



In [None]:
images=X[:400]
np.array(images).shape
non_face = np.array(images).reshape((400, 10304))
non_face_df=pd.DataFrame(non_face)

In [None]:
non_face_df

In [None]:
non_faces_train = non_face_df.head(200)
non_faces_test = non_face_df.tail(200)

non_faces_train['class'] = 0
non_faces_test['class'] = 0

df_train2=df_train
df_train2['class'] = 1

df_test2=df_test
df_test2['class'] = 1

non_faces_train=non_faces_train.append(df_train2)
non_faces_test=non_faces_train.append(df_test2)

In [None]:
non_faces_train

In [None]:
def LDA(df_train,df_test,dimensionality):
    class_mean_df = df_train.groupby(by='class').agg(['mean'],axis='columns')
    class_mean = np.array(class_mean_df)     #Calculaing mean for every class 
    sample_mean = df_train.mean()
    
    sample_mean=pd.DataFrame(np.array(df_train.mean()).T)

    sb=np.zeros((10304,10304))
    for row in range(2): #Calculating Sb matrix
        row_vector = (np.array(class_mean_df.loc[row]).reshape(10304,1))-(np.array(sample_mean)[:-1])
        sb = sb+(2*(np.matmul(row_vector,row_vector.T)))


        
    z_df=pd.DataFrame()
    for i in range(2):
        x=pd.DataFrame(df_train.loc[df_train['class']==i].drop(columns='class').values-(class_mean_df.loc[i]).values)
        x['class']=i
        z_df=z_df.append(x) 
        
        
        
    s_df=np.zeros((10304,10304))
    for row in range(2): #Calculating S matrix
        mat=np.array(z_df[z_df["class"]==row].drop(columns=["class"]))
        print(mat)
        x=np.matmul(mat.T,mat)
        s_df=s_df+x
        

        
    s_inv=inv(s_df)
    
    
    eig_values_lda, eig_vectors_lda = np.linalg.eigh(np.matmul(s_inv,sb))
    
    
    idx = eig_values_lda.argsort()[::-1]
    eig_vectors_lda=eig_vectors_lda[:,idx]
    U_lda=eig_vectors_lda[:,0:dimensionality]
    
    
    lda_train_p = np.matmul(U_lda.T, np.array(df_train.drop(columns=["class"])).T).T
    lda_test_p = np.matmul(U_lda.T, np.array(df_test.drop(columns=["class"])).T).T
    
    return (lda_train_p,lda_test_p,U_lda)
    
    
    

In [None]:
p_faces_train,p_faces_test,U_lda_nonfaces = LDA(non_faces_train,non_faces_test,1)

In [None]:
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(p_faces_train, non_faces_train['class'])

y_pred=neigh.predict(p_faces_test)

print(classification_report(non_faces_test['class'], y_pred))

In [None]:
eigen_faces(U_lda_nonfaces)

In [None]:
class_mean_df = non_faces_train.groupby(by='class').agg(['mean'],axis='columns')
class_mean_df.loc[0]