In [47]:
import numpy as np 
def pca(data: np.ndarray, k: int) -> list[list[int|float]]:
    # standardise: set mean to 0 and std to 1, axis on 0 as we want to only
    # calculate the mean and std over the columns (feature-wise)
    standardised_data = (data - np.mean(data, axis=0)) / np.std(data, axis=0)
    # covariance matrix calc, we want rowvr to be false
    cov = np.cov(standardised_data, rowvar=False)
    # eigen(values/vectors)
    eigenvalues, eigenvectors = np.linalg.eig(cov)
    print(f'{eigenvalues=}')
    print(f'{eigenvectors=}')
    # sort eigenvectors by eigenvalues (we are 
    # looking at the columns of eigen vectors, not the columns)
    idx = np.argsort(eigenvalues)[::-1]
    eigenvectors_sorted = eigenvectors[:, idx]

    return eigenvectors_sorted[:, :k]

data = np.array([[1, 2], [3, 4], [5, 6]])
k = 1
pca(data, k)

eigenvalues=array([3., 0.])
eigenvectors=array([[ 0.70710678, -0.70710678],
       [ 0.70710678,  0.70710678]])
eigenvectors_sorted=array([[ 0.70710678, -0.70710678],
       [ 0.70710678,  0.70710678]])


array([[0.70710678],
       [0.70710678]])

In [48]:
print(pca(np.array([[4,2,1],[5,6,7],[9,12,1],[4,6,7]]),2))

eigenvalues=array([2.79940678, 0.02069312, 1.1799001 ])
eigenvectors=array([[ 0.68547545,  0.72394434,  0.07764021],
       [ 0.62021767, -0.63643914,  0.45855781],
       [-0.3813836 ,  0.26617629,  0.88526647]])
eigenvectors_sorted=array([[ 0.68547545,  0.07764021,  0.72394434],
       [ 0.62021767,  0.45855781, -0.63643914],
       [-0.3813836 ,  0.88526647,  0.26617629]])
[[ 0.68547545  0.07764021]
 [ 0.62021767  0.45855781]
 [-0.3813836   0.88526647]]
