# Implementing PCA Using Eigen Decomposition

In [2]:
import numpy as np

In [4]:
arr = np.array([[1,2,3,4],[5,5,6,7],[1,4,2,3],[5,3,2,1],[8,1,2,2]])
print(arr)

[[1 2 3 4]
 [5 5 6 7]
 [1 4 2 3]
 [5 3 2 1]
 [8 1 2 2]]


### Standardize the dataset.

In [9]:
def standerize(column):
    mean_column = np.sum(column,axis=0)/column.shape[0]
    std_column = np.std(column)
    z = (column-mean_column)/(std_column)
    return z

In [10]:
standerized = np.zeros((arr.shape[0],arr.shape[1]))
for i in range(arr.shape[1]):
    z = standerize(arr[0:,i])
    standerized[0:,i] = z

In [11]:
print(standerized)

[[-1.         -0.63245553  0.          0.26062335]
 [ 0.33333333  1.26491106  1.73205081  1.56374007]
 [-1.          0.63245553 -0.57735027 -0.1737489 ]
 [ 0.33333333  0.         -0.57735027 -1.04249338]
 [ 1.33333333 -1.26491106 -0.57735027 -0.60812114]]


### Calculate the covariance matrix for the features in the dataset.

In [12]:
covariance = np.dot(standerized.T,standerized)/arr.shape[0]
print(covariance)

[[ 0.8        -0.25298221  0.03849002 -0.14479075]
 [-0.25298221  0.8         0.51120772  0.49449803]
 [ 0.03849002  0.51120772  0.8         0.75235479]
 [-0.14479075  0.49449803  0.75235479  0.8       ]]


### Calculate the eigenvalues and eigenvectors for the covariance matrix.

In [None]:
eigenValues, eigenVectors = np.linalg.eig(covariance)

### Sort eigenvalues and their corresponding eigenvectors.

In [13]:
idx = eigenValues.argsort()[::-1]   
eigenValues = eigenValues[idx]
eigenVectors = eigenVectors[:,idx]

In [14]:
print(eigenValues)


[2.01263459 0.8522308  0.31510964 0.02002497]


In [15]:
print(eigenVectors)

[[ 0.16195986 -0.91705888 -0.30707099  0.19616173]
 [-0.52404813  0.20692161 -0.81731886  0.12061043]
 [-0.58589647 -0.3205394   0.1882497  -0.72009851]
 [-0.59654663 -0.11593512  0.44973251  0.65454704]]


### Pick k eigenvalues and form a matrix of eigenvectors.

In [16]:
picked = eigenVectors[0:,0:2]
print(picked)

[[ 0.16195986 -0.91705888]
 [-0.52404813  0.20692161]
 [-0.58589647 -0.3205394 ]
 [-0.59654663 -0.11593512]]


### Transform the original matrix.

In [17]:
transformed = standerized@picked
print(transformed)

[[ 1.40033078e-02  7.55974765e-01]
 [-2.55653399e+00 -7.80431775e-01]
 [-5.14801919e-02  1.25313470e+00]
 [ 1.01415002e+00  2.38808310e-04]
 [ 1.57986086e+00 -1.22891650e+00]]
