In [1]:
import os
import numpy as np 
from PIL import Image
import matplotlib.pyplot as plt
from sklearn import preprocessing

#### Part 1: Dataset Selection and Preprocessing (15 points)

Get the number of rows and columns from the first image in the directory

In [2]:
image = Image.open('yalefaces/subject01.centerlight')
nrows, ncolumns = image.height, image.width

Define a function to get load and preprocess the data

In [9]:
def process_images(image_directory):
    image_files = []
    for i, file in enumerate(os.listdir(image_directory)):
        image = Image.open(os.path.join(image_directory, file))
        image = image.resize((300, 400)).convert('L')
        image_files.append(np.asarray(image.getdata()))
    return image_files

#### Part 2: Eigenface Generation (20 points)

Load the images from the 'yalefaces' directory and flatten them into a dataset

In [10]:
X = np.asarray(process_images('yalefaces'))

Confirm the shape of the flattened dataset. Since the dataset contains 165 images and we have resized the images into 300 by 400 size, we should expect the shape to by 165 by 300 * 400

In [11]:
X.shape

(165, 120000)

Calculate the average face image

In [15]:
# Get the length of the images array
length_dataset = X.shape[0]

mean_face = np.sum(X, axis=0, dtype='float64') / length_dataset

Find the mean matrix using the numpy tile function. np.tile constructs an array by repeating a given dataset by the number of times specified

In [16]:
mean_matrix = np.tile(mean_face, (length_dataset, 1)) 

Mean-center the image data. Mean-centering calculates the difference between the mean value and the actual value, ensuring that we are only looking at the variance in the dataset

In [17]:
X_centered = X - mean_matrix

Caclulate the co-variance in the data. Convariance is a measure of the relationship between two variables.

In this case, we calculate the co-variance by finding the dot product of the image data and a transposed version of the image data

In [18]:
covariance_matrix = (X_centered.dot(X_centered.T)) / length_dataset

Find the eigenvalues and eigenvectors using the covariance matrix. Because they are square and symmetrical, covariance matrixes are diagonalizable, which means an eigendecomposition can be performed on the matrix.

In [19]:
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)

Find the order of the eignevalues

In [20]:
order = eigenvalues.argsort()[::-1]

Sort the eigenvalues by their order

In [21]:
eigenvalues = eigenvalues[order]

Get the principal components

In [23]:
eigenvectors_C = eigenvectors[:, order]

Project all images to the eigen space

In [24]:
eigenvectors_C = X_centered.T @ eigenvectors 

Normalize all the eigenfaces

In [27]:
eigenfaces = preprocessing.normalize(eigenvectors_C.T)

ValueError: Complex data not supported
[[-4.86737370e+01+0.00000000e+00j -7.77793566e+01+0.00000000e+00j
  -8.69613346e+01+0.00000000e+00j ... -2.85648336e+01+0.00000000e+00j
  -2.75107970e+01+0.00000000e+00j -2.70522939e+01+0.00000000e+00j]
 [-9.47427847e+01+0.00000000e+00j -1.10129694e+02+0.00000000e+00j
  -1.10706229e+02+0.00000000e+00j ...  3.19657070e+01+0.00000000e+00j
   3.40089772e+01+0.00000000e+00j  3.31238582e+01+0.00000000e+00j]
 [ 4.43513892e+00+0.00000000e+00j  5.79034303e+00+0.00000000e+00j
   9.33619393e+00+0.00000000e+00j ... -1.08749397e+00+0.00000000e+00j
  -4.14668419e-01+0.00000000e+00j  1.40578921e-01+0.00000000e+00j]
 ...
 [-2.79345111e-14-5.21163048e-15j -1.82071050e-14+4.67698124e-15j
  -2.61104355e-14+2.08510928e-14j ...  3.48637457e-16+3.38737701e-15j
   4.80110257e-15+5.17181114e-15j  9.94320776e-15+4.54228992e-15j]
 [ 9.18534228e-14+0.00000000e+00j  7.90646345e-14+0.00000000e+00j
   5.36802355e-14+0.00000000e+00j ... -6.19483430e-15+0.00000000e+00j
  -1.98998111e-14+0.00000000e+00j -2.26730384e-14+0.00000000e+00j]
 [-1.25712578e-14+0.00000000e+00j -1.62448449e-14+0.00000000e+00j
  -2.65289411e-14+0.00000000e+00j ... -3.99547652e-15+0.00000000e+00j
  -3.37512214e-15+0.00000000e+00j -3.37071089e-15+0.00000000e+00j]]


Plot the first eigenface

In [28]:
plt.imshow(np.resize(eigenfaces[0], (nrows,ncolumns)), cmap='gray')
plt.title('First Eigen Face')
plt.show()

NameError: name 'eigenfaces' is not defined