In [1]:
# PCA Example using Eigenvalue Decomposition (ED) and Singular Value Decomposition (SVD)

import numpy as np

# 1. Define the dataset
X = np.array([
    [2, 0],
    [0, 2],
    [3, 3]
])

# 2. Mean-center the data
X_meaned = X - np.mean(X, axis=0)
print("Mean-centered data:\n", X_meaned)

# 3. Calculate Covariance Matrix
cov_matrix = np.cov(X_meaned, rowvar=False)
print("\nCovariance matrix:\n", cov_matrix)

# 4. Eigenvalue Decomposition (ED)
eig_values, eig_vectors = np.linalg.eigh(cov_matrix)
print("\nEigenvalues:\n", eig_values)
print("\nEigenvectors (PC directions):\n", eig_vectors)

# 5. Sort eigenvalues and eigenvectors in descending order
sorted_index = np.argsort(eig_values)[::-1]
eig_values = eig_values[sorted_index]
eig_vectors = eig_vectors[:, sorted_index]

print("\nSorted Eigenvalues:\n", eig_values)
print("\nSorted Eigenvectors (principal components):\n", eig_vectors)

# 6. Singular Value Decomposition (SVD)
U, S, VT = np.linalg.svd(X_meaned)
print("\nSVD - U matrix:\n", U)
print("\nSVD - Singular values:\n", S)
print("\nSVD - VT matrix (principal directions):\n", VT)

# Variance explained from SVD singular values
explained_variance_svd = (S**2) / (X.shape[0]-1)
print("\nExplained variance from SVD:\n", explained_variance_svd)


Mean-centered data:
 [[ 0.33333333 -1.66666667]
 [-1.66666667  0.33333333]
 [ 1.33333333  1.33333333]]

Covariance matrix:
 [[2.33333333 0.33333333]
 [0.33333333 2.33333333]]

Eigenvalues:
 [2.         2.66666667]

Eigenvectors (PC directions):
 [[-0.70710678  0.70710678]
 [ 0.70710678  0.70710678]]

Sorted Eigenvalues:
 [2.66666667 2.        ]

Sorted Eigenvectors (principal components):
 [[ 0.70710678 -0.70710678]
 [ 0.70710678  0.70710678]]

SVD - U matrix:
 [[ 4.08248290e-01 -7.07106781e-01  5.77350269e-01]
 [ 4.08248290e-01  7.07106781e-01  5.77350269e-01]
 [-8.16496581e-01  1.11022302e-16  5.77350269e-01]]

SVD - Singular values:
 [2.30940108 2.        ]

SVD - VT matrix (principal directions):
 [[-0.70710678 -0.70710678]
 [-0.70710678  0.70710678]]

Explained variance from SVD:
 [2.66666667 2.        ]
