In [2]:
from sklearn.decomposition import PCA
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D , proj3d

In [3]:
np.random.seed(2343243)

In [6]:
mean1 = np.array([1,1,1])
cov1 = np.array([[1,0,0],[0,1,0],[0,0,1]])
                 
class1 = np.random.multivariate_normal(mean1,cov1,100)
class1.shape

(100, 3)

Firstly Applying inbuilt one to check results of our implementation:

In [9]:
pca = PCA(n_components = 2)
class_trans = pca.fit_transform(class1)

In [10]:
pca.components_

array([[-0.25581305,  0.64160172,  0.72312303],
       [ 0.35022262, -0.63569298,  0.68792336]])

In [12]:
pca.explained_variance_

array([ 1.38489552,  0.87683765])

# Implementing our own PCA

1. step 1: build a covariance matrix. For n features , it will be n * n
2. step 2: a)Singular Value Decomposition , finding Eigen Vectors(directions) and Eigen Values(variance values)
2. step 2: b) Sorting eigen vectors on basis of eigen values

In [15]:
# step 1

class1_t = class1.T
cov_matrix = np.cov(class1_t)
print(cov_matrix.shape)
cov_matrix

(3, 3)


array([[ 0.73827272, -0.16523334, -0.08214447],
       [-0.16523334,  1.04699247,  0.24135635],
       [-0.08214447,  0.24135635,  1.14168894]])

In [21]:
# step 2 a
eigen_values, eigen_vectors = np.linalg.eig(cov_matrix)
print(eigen_values)
print(eigen_vectors) # Note that directions are given column wise 
# that is , eigen value 1.38 corresponds to vector -0.25 0.64 0.72

[ 1.38489552  0.66522095  0.87683765]
[[-0.25581305  0.90105704  0.35022262]
 [ 0.64160172  0.42923382 -0.63569298]
 [ 0.72312303 -0.06208487  0.68792336]]


In [25]:
# Step 2 b
eigen = []
i = 0;
l = len(eigen_values)

while i<l:
    current_eigen = [eigen_values[i],eigen_vectors[:,i]]
    eigen.append(current_eigen)
    i += 1
    
print(eigen)

[[1.3848955218163801, array([-0.25581305,  0.64160172,  0.72312303])], [0.66522094919918617, array([ 0.90105704,  0.42923382, -0.06208487])], [0.87683765273056302, array([ 0.35022262, -0.63569298,  0.68792336])]]


In [27]:
# Step 2 b
eigen.sort(reverse=True)
eigen

[[1.3848955218163801, array([-0.25581305,  0.64160172,  0.72312303])],
 [0.87683765273056302, array([ 0.35022262, -0.63569298,  0.68792336])],
 [0.66522094919918617, array([ 0.90105704,  0.42923382, -0.06208487])]]

Note the top two: explained variance values and components values are same as inbuilt one

In [30]:
print(pca.explained_variance_)
print()
print(pca.components_)

[ 1.38489552  0.87683765]

[[-0.25581305  0.64160172  0.72312303]
 [ 0.35022262 -0.63569298  0.68792336]]
