### Implementation of Principal Components Analysis (PCA)

In [1]:
# Imports
import numpy as np

In [127]:
def get_pca(X, dim):
    """ Dimensionalty reduction with PCA.

    Keyword arguments:
    X -- Data matrix with M rows (observations) and N columns (features)
    dim -- Reduced number of dimensions
    """
    
    # M observations, N features    
    M = X.shape[0]
    N = X.shape[1]

    # Step 1: Calculate empirical mean along each observation and substract it from each value of X
    mu = np.mean(X, 0)
    X0 = X - mu
    
    # Check shape of X
    # If M <= N, use the eigendecomposition of the covariance matrix
    if M <= N:

        print(f"Eigendecomposition is used as n_observations ({M}) <= n_features ({N})")
        
        # Step 2a: Find the empirical covariance matrix
        C = (1 / M) * X0.T @ X0
        
        # Step 2b: Compute the eigenvalues and eigenvectors of C and sort them
        eigenvalues, eigenvectors = np.linalg.eig(C)
        
        # Step 2c: Order eigenvalues and corresponding left eigenvectors in descending order, select terms from one to selected dimension
        idx = eigenvalues.argsort()[::-1]
        eigenvalues_sorted = eigenvalues[idx]
        eigenvalues_dim = eigenvalues_sorted[0:dim]
        eigenvectors_sorted = eigenvectors[:,idx]
        eigenvectors_dim = eigenvectors_sorted[:,0:dim]
        
        # Step 2d: Apply projection and obtain dimensionality reduction
        Y = X0 @ eigenvectors_dim
        
        return eigenvalues_sorted, eigenvectors_sorted, Y

    # If M>n, use the SVD of the centered data matrix
    else:

        print(f"SVD is used as n_observations ({M}) > n_features ({N})")
        
        # Step 2a: Compute the singular value decomposition of X0 (already ordered)
        U, S, Vh = np.linalg.svd(X0, full_matrices=True)

        # Step 2b: Select terms from one to selected dimension
        singvalues_dim = S[0:dim]
        singvectors_dim = U[:,0:dim]
        
        # Step 2c: Apply projection and obtain dimensionality reduction
        Y = singvectors_dim @ np.diag(singvalues_dim) 

        return singvalues_dim, singvectors_dim, Y
        