In [20]:
from PIL import Image
import os
import matplotlib.pyplot as plt
import zipfile
import tempfile
import numpy as np
from scipy.spatial.distance import pdist, squareform
from scipy.linalg import eigh

In [21]:
# Extract ZIP file
def extract_zip(zip_path, extract_to):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

In [22]:
def load_data(path, height, width):
    img = os.listdir(path)
    images = np.zeros((width * height, len(img)))
    train_labels = np.zeros(len(img)).astype('uint8')
    for pic, i in zip(img, np.arange(len(img))):
        label_str = pic.split('.')[0].replace('subject', '')  # Extract the label part
        if label_str.isdigit():
            train_labels[i] = int(label_str) - 1
        else:
            print(f"Skipping file {pic} as it doesn't match expected naming convention.")
            continue  # Skip files that don't match the naming convention
        image = np.asarray(Image.open(os.path.join(path, pic)).resize((width, height), Image.ANTIALIAS)).flatten()
        images[:, i] = image
    return images, train_labels


In [23]:
def PCA(imgData_train, num_dim=None):
    imgData_mean = np.mean(imgData_train, axis=1).reshape(-1, 1)
    data_centered = imgData_train - imgData_mean
    S = data_centered.T @ data_centered
    eigenvalues, eigenvectors = np.linalg.eig(S)

    sort_index = np.argsort(-eigenvalues)
    if num_dim is None:
        for value, i in zip(eigenvalues[sort_index], np.arange(len(eigenvalues))):
            if value <= 0:
                sort_index = sort_index[:i]
                break
    else:
        sort_index = sort_index[:num_dim]

    eigenvalues = eigenvalues[sort_index]
    eigenvectors = data_centered @ eigenvectors[:, sort_index]
    eigenvectors_norm = np.linalg.norm(eigenvectors, axis=0)
    eigenvectors = eigenvectors / eigenvectors_norm
    return eigenvectors, eigenvalues

In [24]:
def kernel_function(X, gamma, alpha, kernel_type):
    sq_dists = pdist(X, 'sqeuclidean')
    mat_sq_dists = squareform(sq_dists)

    if kernel_type == '1':
        K = np.exp(-gamma * mat_sq_dists)
    elif kernel_type == '2':
        K = (1 + gamma * mat_sq_dists / alpha**alpha)
    return K



In [25]:
def kernel_PCA(imgData_train, gamma, alpha, n_components, kernel_type):
    K = kernel_function(imgData_train, gamma, alpha, kernel_type)

    N = K.shape[0]
    one_n = np.ones((N, N)) / N
    K = K - one_n.dot(K) - K.dot(one_n) + one_n.dot(K).dot(one_n)

    eigenvalues, eigenvectors = eigh(K)
    eigenvalues, eigenvectors = eigenvalues[::-1], eigenvectors[:, ::-1]
    eigenvectors = np.column_stack([eigenvectors[:, i] for i in range(n_components)])
    return eigenvectors, eigenvalues

In [26]:
zip_file_path = 'Yale_Face_Database.zip'  # Path to the uploaded ZIP file

# Use a temporary directory to extract files
with tempfile.TemporaryDirectory() as tmpdirname:
    extract_zip(zip_file_path, tmpdirname)
    images, train_labels = load_data(tmpdirname, height=50, width=50)  # Specify your desired height and width

Skipping file Yale_Face_Database as it doesn't match expected naming convention.
