In [2]:
import numpy as np
import matplotlib.pyplot as plt
import numpy.random as rnd
# Create random 2d data
mu = np.array([10, 13])
sigma = np.array([[3.5, -1.8], [-1.8, 3.5]])

print("Mu ", mu.shape)
print("Sigma ", sigma.shape)

# Create 1000 samples using mean and sigmaL
org_data = rnd.multivariate_normal(mu, sigma, size=(1000))
print("Data shape ", org_data.shape)


# Subtract mean from data
mean = np.mean(org_data, axis=0)
print("Mean ", mean.shape)
mean_data = org_data - mean
print("Data after subtracting mean ", org_data.shape, "\n")

# Compute covariance matrix
cov = np.cov(mean_data.T)
cov = np.round(cov, 2)
print("Covariance matrix ", cov.shape, "\n")

# Perform eigen decomposition of covariance matrix
eig_val, eig_vec = np.linalg.eig(cov)
print("Eigen vectors ", eig_vec)
print("Eigen values ", eig_val, "\n")


# Sort eigen values and corresponding eigen vectors in descending order
indices = np.arange(0, len(eig_val), 1)
indices = ([x for _, x in sorted(zip(eig_val, indices))])[::-1]
eig_val = eig_val[indices]
eig_vec = eig_vec[:, indices]
print("Sorted Eigen vectors ", eig_vec)
print("Sorted Eigen values ", eig_val, "\n")

# Get explained variance
sum_eig_val = np.sum(eig_val)
explained_variance = eig_val / sum_eig_val
print(explained_variance)
cumulative_variance = np.cumsum(explained_variance)
print(cumulative_variance)

# Take transpose of eigen vectors with data
pca_data = np.dot(mean_data, eig_vec)
print("Transformed data ", pca_data.shape)

Mu  (2,)
Sigma  (2, 2)
Data shape  (1000, 2)
Mean  (2,)
Data after subtracting mean  (1000, 2) 

Covariance matrix  (2, 2) 

Eigen vectors  [[-0.70919569  0.70501168]
 [-0.70501168 -0.70919569]]
Eigen values  [1.83997041 5.22002959] 

Sorted Eigen vectors  [[ 0.70501168 -0.70919569]
 [-0.70919569 -0.70501168]]
Sorted Eigen values  [5.22002959 1.83997041] 

[0.73938096 0.26061904]
[0.73938096 1.        ]
Transformed data  (1000, 2)
