# Eigen Faces

## Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import zipfile
import urllib.request
from PIL import Image
from sklearn import svm
from sklearn.metrics import accuracy_score,precision_score  
from sklearn.decomposition import PCA

## Download and Extract Dataset

In [None]:
url = 'http://conradsanderson.id.au/lfwcrop/lfwcrop_grey.zip'
filename = 'lfwcrop_grey.zip'
if not os.path.exists(filename):
    urllib.request.urlretrieve(url, filename)
with zipfile.ZipFile(filename, 'r') as zip_ref:
    zip_ref.extractall('lfw_crop')

## Loading an image


First we need to load the image from the dataset and then display it using matplotlib library


In [None]:
# Load an image from the dataset
img_path = 'lfw_crop/lfwcrop_grey/faces/Muhammad_Ali_0003.pgm'
img = np.asarray(Image.open(img_path)).flatten()
img=img.reshape(64,64)
# Display the image
plt.imshow(img,cmap='gray')
plt.show()

## Import dataset

In [None]:
faces_dir = 'lfw_crop/lfwcrop_grey/faces'
faces_files = os.listdir(faces_dir)
len(faces_files)

Loading each image of format PGM as a Numpy array and append it to the image list


In [None]:
img_arrays = []
for file in faces_files:
    img_path = os.path.join(faces_dir, file)
    img = np.asarray(Image.open(img_path)).flatten()
    img_arrays.append(img)
img_arrays=np.array(img_arrays)
img_arrays.shape

In [None]:
df_faces=pd.DataFrame(img_arrays)
faces_files=[i[:-4] for i in faces_files]
df_faces.insert(0,"Names",faces_files,True)
df_faces.head()

## Displaying an Image from Dataframe

In [None]:
img=np.asarray(df_faces[df_faces.Names=="Muhammad_Ali_0003"].iloc[:,1:])
img=img.reshape(64,64)
plt.imshow(img,cmap='gray')
plt.show()

## Creating a dataset as mentioned in the list

Here, we are considering that the same images are represented as 1 and different images are representing as 0.

We have declared a function make_a_dataframe to combine all the training and testing datasets.

In [None]:
lists_dir = 'lfw_crop/lfwcrop_grey/lists'
list_files=os.listdir(lists_dir)
len(list_files)

In [None]:
def make_a_dataframe(filename):
    path = os.path.join(lists_dir, filename)
    data=[]
    if filename.find("same")!=-1:
        with open(path) as f:
            for line in f:
                name1,name2=line.split(" ")
                name2=name2.strip()
                value=np.concatenate((df_faces[df_faces.Names==name1].iloc[:,1:],df_faces[df_faces.Names==name2].iloc[:,1:]),axis=None)
                data.append([1]+value.tolist())
    else:
        with open(path) as f:
            for line in f:
                name1,name2=line.split(" ")
                name2=name2.strip()
                value=np.concatenate((df_faces[df_faces.Names==name1].iloc[:,1:],df_faces[df_faces.Names==name2].iloc[:,1:]),axis=None)
                data.append([0]+value.tolist())
    
    dataframe=pd.DataFrame(data)
    return dataframe

In [None]:
train=pd.DataFrame()
test=pd.DataFrame()
for file in list_files:
    if file.find("train")==-1:
        test=pd.concat([test,make_a_dataframe(file)],axis=0) 
    else:
        train=pd.concat([train,make_a_dataframe(file)],axis=0)


In [None]:
test.shape,train.shape

## Model Creation

we will try to check the accuracy of the images before applying PCA using the unsupervised machine learning model SVM (Support Vector Machine)

In [None]:
y=train[0]
train.drop([0],axis=1,inplace=True)
y_test=test[0]
test.drop([0],axis=1,inplace=True)
train.head()

### Fitting and Prediction of the Model

In [None]:
model=svm.SVC()
model.fit(train,y)

In [None]:
y_pred=model.predict(test)

### Accuracy and Precision

In [None]:
accuracy_before=accuracy_score(y_test,y_pred)
precision_before=precision_score(y_test,y_pred)
accuracy_before,precision_before

## Applying PCA 97%

now, we will apply PCA for 97% of data.

In [None]:
pca=PCA(0.97)
train_pca1=pca.fit_transform(train,y)
test_pca1=pca.transform(test)
train_pca1.shape,test_pca1.shape

### Fitting and Prediction of the Model

In [None]:
model=svm.SVC()
model.fit(train_pca1,y)

In [None]:
y_pred=model.predict(test_pca1)

### Accuracy and Precision

In [None]:
accuracy_97=accuracy_score(y_test,y_pred)
precision_97=precision_score(y_test,y_pred)
accuracy_97,precision_97

## Applying PCA 95%

now, we will apply PCA for 95% of data.

In [None]:
pca=PCA(0.95)
train_pca2=pca.fit_transform(train,y)
test_pca2=pca.transform(test)
train_pca2.shape,test_pca2.shape

### Fitting and Prediction of the Model

In [None]:
model=svm.SVC()
model.fit(train_pca2,y)

In [None]:
y_pred=model.predict(test_pca2)

### Accuracy and Precision

In [None]:
accuracy_95=accuracy_score(y_test,y_pred)
precision_95=precision_score(y_test,y_pred)
accuracy_95,precision_95

## Data Visualization

In [None]:
data = {'Method': ['Before PCA', 'PCA 97%', 'PCA 95%'],
        'Accuracy': [accuracy_before,accuracy_97,accuracy_95],
        "Precision":[precision_before,precision_97,precision_95]
       }
vis = pd.DataFrame(data)
plt.figure(figsize=(10,12))
vis.plot.bar(x='Method')
plt.xticks(rotation=45, ha='right')
plt.title('Method Vs Accuracy & Precision', fontsize=14)
plt.xlabel('Method', fontsize=14)
plt.ylabel('Accuracy & Precision', fontsize=14)
plt.legend()
plt.savefig("Comparison.png")
plt.show()