In [2]:
from PIL import Image
import torch
from torch import Tensor, linalg
import numpy as np

In [21]:
image_count_per_person = 5
person_count = 40
images = [Image.open(f"../data/s{j + 1}/{i + 1}.pgm") for i in range(image_count_per_person) for j in range(person_count)]

a = np.array(images)
a.resize((image_count_per_person * person_count, images[0].width * images[0].height))
a = Tensor(a)

PCA(data: Tensor, $\alpha$: float) -> Tensor

In [7]:
def power_method(a: Tensor, tol: float, max_iterations: int) -> tuple[float, Tensor]:
    x = torch.ones((len(a), 1))
    k = 1.0

    error = 100.0
    iterations = 0
    while error > tol and iterations < max_iterations:
        iterations += 1

        w = a @ x
        k = ((w.T @ x) / (x.T @ x))[0]

        error = torch.norm(w / torch.norm(w) - x)

        x = w / torch.norm(w)

    return (k.numpy()[0], x)


def largest_covariance_eigenvalues(covariance: Tensor, variance: float, alpha: float):
    eigen_values = []
    eigen_vectors = Tensor(torch.Size((covariance.shape[1], 0)))
    sum = 0
    while sum < variance * alpha:
        eigenvalue, eigenvector = power_method(covariance, 1e-6, 1000)
        eigen_values.append(eigenvalue)
        eigen_vectors = torch.hstack((eigen_vectors, eigenvector))
        covariance -= eigenvalue * eigenvector * eigenvector.T
        sum += eigenvalue

    return eigen_values, eigen_vectors

In [22]:
def pca1(data: Tensor, alpha: float) -> Tensor:
    centered_data: Tensor = data - data.mean(0)
    covariance: Tensor = 1 / len(data) * centered_data.T @ centered_data

    eigen_values, eigen_vectors = linalg.eigh(covariance)

    # eigen_values, eigen_vectors = largest_covariance_eigenvalues(covariance, variance, alpha)

    variance: float = covariance.trace().numpy()
    projected_variance = 0.0

    i = len(eigen_values) - 1
    while i >= 0 and projected_variance < alpha * variance:
        projected_variance += eigen_values[i]
        i -= 1

    eigen_vectors = torch.fliplr(eigen_vectors[:, i + 1:])

    return data @ eigen_vectors

In [13]:
def pca3(data: Tensor, alpha: float) -> Tensor:
    centered_data: Tensor = data - data.mean(0)
    covariance: Tensor = 1 / len(data) * centered_data.T @ centered_data
    print('here')
    variance: float = covariance.trace().numpy()
    eigen_values, eigen_vectors = largest_covariance_eigenvalues(covariance, variance, alpha)
    print(len(eigen_values))
    return data @ eigen_vectors

In [23]:
pca3(a, 0.8)

here
33


tensor([[ 8630.7793,  8739.8477, -1752.8748,  ...,    60.7056,   -49.2612,
          -295.8193],
        [ 8186.0869,  6870.6509, -1500.2716,  ...,    44.9240,  -430.6197,
           247.5276],
        [ 7187.0371,  8077.3491,   523.0039,  ...,   193.0413,   179.6431,
            36.3750],
        ...,
        [ 5287.4883,  8266.7285,   572.9722,  ...,   377.7743,    60.6791,
          -245.8417],
        [ 4618.3794,  5242.9062,   810.6750,  ...,   107.4536,   113.2103,
           251.2421],
        [ 8148.7295,  9163.2666,  1008.1982,  ...,   226.1054,   306.7886,
           102.3078]])

In [20]:
pca1(a, 0.8)

torch.Size([10304, 44])


tensor([[-9.3552e+03, -7.9087e+03, -2.4372e+03,  ..., -2.9994e+02,
          1.6506e+02, -5.6371e+02],
        [-8.6998e+03, -6.1307e+03, -1.9186e+03,  ..., -5.3984e+00,
          1.7888e+02, -4.3669e+02],
        [-7.9627e+03, -7.2608e+03, -8.3477e+01,  ...,  2.8649e+02,
          1.7648e+02, -3.6640e+02],
        ...,
        [-6.0040e+03, -7.8446e+03, -9.6201e+01,  ...,  2.3931e+02,
          1.6761e+02, -7.4175e+02],
        [-5.1594e+03, -4.7759e+03,  6.7011e+01,  ...,  6.4504e+02,
          4.3894e+02, -5.7031e+02],
        [-8.3588e+03, -7.3135e+03,  1.4888e+03,  ...,  2.6679e+01,
          2.1479e+02, -3.1819e+02]])