In [None]:
import numpy as np
import cv2
from sklearn.preprocessing import StandardScaler
from PIL import Image
import os
import random

In [None]:
def create_vec(location, count_of_same_celebrity=10):
    '''
    Create a random vectorized array with 'n' number of images of each celebrity

    location: location of the master folder
    count_of_same_celebrity: number of random images taken of the same celebrity
    '''
    # Create an empty list to store arrays
    img_arrays = []

    for celeb in os.listdir(location):
        celeb_path = os.path.join(location, celeb)
        if not os.path.isdir(celeb_path):
            continue  # Skip if the item in the directory is not a folder

        files = [f for f in os.listdir(celeb_path) if os.path.isfile(os.path.join(celeb_path, f))]

        # Use min() to avoid going beyond the available indices
        selected_files = random.sample(files, min(len(files), count_of_same_celebrity))

        for selected_file in selected_files:
            face_path = os.path.join(celeb_path, selected_file)

            # Open and resize the face image
            face_image = Image.open(face_path).convert("L")  # Convert to grayscale
            resized_face_image = face_image.resize((50, 50))
            img_array = np.array(resized_face_image).flatten()
            img_arrays.append(img_array)

    # Convert the list of arrays into a NumPy array
    img_matrix = np.transpose(img_arrays)

    return img_matrix

In [None]:
A=create_vec('/content/drive/MyDrive/Cropped Celebrity Faces Dataset')
A.shape

In [None]:
mean = np.mean(A, axis=1, keepdims=True)
B = A - mean

In [None]:
#Covariance
C= B @ B.T
#Eigendecomposition D=Eigenvalues V=Eigenvectors
U , S , V = np.linalg.svd(B, full_matrices=True)
S.shape

In [None]:
S_mat = np.zeros((U.shape[1],V.shape[0]))
for i in range(len(S)):
  S_mat[i][i] = S[i]

In [None]:
print(U.shape,S_mat.shape,V.shape)

In [None]:
Sigma = np.zeros((B.shape[0], B.shape[1]))
k = min(B.shape[0], B.shape[1])
for i in range(k):
    Sigma[i,i]=S[i]

In [None]:
# Sorting eigenvectors and eigenvalues
S_new = np.sqrt(S) # dim(S_new): (170,)
sorted_indices = np.argsort(S_new)[::-1]
U_sorted = U[:, sorted_indices]
U_sorted.shape

In [None]:
# Number of eigenfaces
num_components = 50
selected_U = U_sorted[:, :num_components]
selected_U.shape

In [None]:
# Project data onto selected eigenvectors (eigenfaces)
W = selected_U.T @ B
W.shape

In [None]:
#Testing image
def preprocess_test_image(file_path, target_size=(50, 50)):
    img = Image.open(file_path).convert('L')  # Convert image to grayscale
    img_resized = img.resize(target_size, Image.LANCZOS)  # Resize image to a uniform size
    img_array = np.array(img_resized).flatten()  # Flatten the resized image to a single column vector
    return img_array

# Path to your test image file (replace with the actual path to your test image)
test_image_path = '/content/drive/MyDrive/Test/angelina jolie.jpg'  # Replace with your test image file path

# Load and preprocess the test image into a column vector
x = preprocess_test_image(test_image_path)

In [None]:
x_reshaped = x.reshape(-1,1)
x_reshaped.shape

#Normalizing
x_norm = x_reshaped - mean
#Transforming
x_trans = selected_U.T @ x_norm
x_trans.shape

In [None]:
d = np.linalg.norm(x_trans - W, axis=0)
d_min = np.min(d)
d_index = np.argmin(d)

In [None]:
from IPython.display import display
import matplotlib.pyplot as plt

def verify(A,n):
  '''
  Display the 'n'th image (column) from the 'A' matrix
  n: number less than len(A)
  A: Matrix of image column-wise
  '''
  reshaped_image = A[:,n].reshape(50,50)
  # Convert the reshaped image to a PIL Image
  image = Image.fromarray(reshaped_image.astype('uint8'))
  display(image)

In [None]:
verify(A, d_index)