In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sympy import *
import mtcnn
import cv2
from mtcnn.mtcnn import MTCNN
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer

### Part 1: Dataset Selection and Preprocessing (15 points)

#### Testing pre-processing and face detection on one file

Set the default image dimensions

In [None]:
nrows = 64
ncolumns = 64

#### Setup the dataset into training and testing data

Load the file data into different lists

In [None]:
subject_numbers = []
file_names = []
categories  = []
# Get the list of filenames
for file in os.listdir('yalefaces'):
    # Add the subject number, filename and category to seperate lists
    file_names.append(file)
    categories.append(file.split(".")[1])
    subject_numbers.append(file.split(".")[0].replace('subject0','').replace('subject',''))

Create a pandas DataFrame from the lists

In [None]:
faces_dict = {'Subject':subject_numbers, 'Category':categories, 'File':file_names}
faces_df = pd.DataFrame(faces_dict, columns = ['Subject', 'Category','File'])
faces_df.head()

Set the sizes of the test and training datasets

In [None]:
training_set_count = 8
class_count = len(faces_df["Subject"].unique())
dataset_size = faces_df["Category"].count()
test_size = 1 - (training_set_count * class_count / dataset_size)

Define the X and Y columns that will be used for testing and training

In [None]:
y = faces_df['Subject']
X = faces_df.drop('Subject', axis = 1)

X_train_info, X_test_info, y_train, y_test = train_test_split(
    X, y, test_size = test_size, random_state = 45, stratify = y)

Load the X_test and X_train data

In [None]:
def load_images_dataset(image_directory, dataset):
    face_images = []
    for filename in dataset["File"]:
        # Load the image from the directory as a PIL image
        image = Image.open(os.path.join(image_directory, filename))
        image = image.resize((nrows, ncolumns))
        face_images.append(np.asarray(image))
        
    return np.asarray(face_images)

In [None]:
X_test = load_images_dataset('yalefaces', X_test_info)
X_train = load_images_dataset('yalefaces', X_train_info)

Reshape and normalize the data

In [None]:
# Normalize the input vectors and labels
in_encoder = Normalizer(norm='l2')

# Reshape X_train into a 2D array using the dimensions of the 3D array
X_train_reshaped = X_train.reshape(X_train.shape[0],
                                   X_train.shape[1]*X_train.shape[2])
print('Reshaped X_train', X_train_reshaped.shape)
X_train_reshaped = in_encoder.transform(X_train_reshaped)

# Reshape X_test into a 2D array using the dimensions of the 3D array
X_test_reshaped = X_test.reshape(X_test.shape[0],
                                 X_test.shape[1]*X_test.shape[2])
print('Reshaped X_test', X_test_reshaped.shape)
X_test_reshaped = in_encoder.transform(X_test_reshaped)

#Normalize labels
out_encoder = LabelEncoder()
out_encoder.fit(y_train)

In [None]:
X_train.shape

### Part 2: Eigenface Generation (20 points)

Calculate the average face image

In [None]:
# Get the length of the images array
length_dataset = X_train_reshaped.shape[0]

mean_face = np.sum(X_train_reshaped, axis=0, dtype='float64') / length_dataset

Find the mean matrix using the numpy tile function. np.tile constructs an array by repeating a given dataset by the number of times specified

In [None]:
mean_matrix = np.tile(mean_face, (length_dataset, 1))

Mean-center the image data. Mean-centering calculates the difference between the mean value and the actual value, ensuring that we are only looking at the variance in the dataset

In [None]:
X_centered = X_train_reshaped - mean_matrix

Caclulate the co-variance in the data. Convariance is a measure of the relationship between two variables.

In this case, we calculate the co-variance by finding the dot product of the image data and a transposed version of the image data

In [None]:
covariance_matrix = (X_centered.dot(X_centered.T)) / length_dataset

Find the eigenvalues and eigenvectors using the covariance matrix. Because they are square and symmetrical, covariance matrixes are diagonalizable, which means an eigendecomposition can be performed on the matrix.

In [None]:
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)

Convert the eigenvalues and eigenvectors from complex numbers to real numbers

In [None]:
eigenvalues = np.real(eigenvalues)
eigenvectors = np.real(eigenvectors)

Find the order of the eignevalues

In [None]:
order = np.real(eigenvalues).argsort()[::-1]

Sort the eigenvalues by their order

In [None]:
eigenvalues = eigenvalues[order]

Get the principal components

In [None]:
eigenvectors_C = eigenvectors[:, order]

Project all images to the eigen space

In [None]:
eigenvectors_C = X_centered.T @ eigenvectors 

Normalize all the eigenfaces

In [None]:
eigenfaces = preprocessing.normalize(eigenvectors_C.T)

Plot the first eigenface

In [None]:
plt.imshow(np.resize(eigenfaces[0], (nrows,ncolumns)), cmap='gray')
plt.title('First Eigenface')
plt.show()

### Part 3: Face Recognition (35 points)

Create a function to project an image to the eigenspace

In [None]:
def project_to_eigenspace(image, eigen_faces, mean_face):
    return eigen_faces.dot(image - mean_face)

Create a function to find the cosine similarity between two matrices

In [None]:
def cosine_similarity(a, b):
    dot_product = np.dot(a, b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    return dot_product / (norm_a * norm_b)

Create a function to find the maximum similarity between an image projection and the set of eigen faces

In [None]:
def compute_max_similarity(image_projection, eigen_faces, num_components):
    eigen_projections = [project_to_eigenspace(ef, eigen_faces, mean_face) for ef in eigen_faces[:num_components]]
    similarities = [cosine_similarity(image_projection, ep) for ep in eigen_projections]
    
    return np.max(similarities)

Define a function to determine if an image contains a face

In [None]:
def is_face(image, eigen_faces, mean_face, num_components, threshold = 0.5):
    image_projection = project_to_eigenspace(image, eigen_faces, mean_face)
    max_similarity = compute_max_similarity(image_projection, eigen_faces[:num_components], num_components)
    return max_similarity > threshold

Check if an image is a face

In [None]:
given_image = Image.open('non_faces/B1_00107.pgm')
given_image = given_image.resize((nrows, ncolumns))
num_components = 5

# Convert the given image into a numpy array
given_image = np.asarray(given_image.getdata())

# Get the face evaluation result
result = is_face(given_image, eigen_faces[:num_components], mean_face, num_components)
if result:
    print("The image is a face")
else:
    print("The image is not a face")