### Example 01

In [5]:
import numpy as np
 
def PCA(X , num_components):
     
    #Step-1 : Subtract the mean of each variable
    X_meaned = X - np.mean(X , axis = 0)
     
    #Step-2 : Calculate the Covariance Matrix
    cov_mat = np.cov(X_meaned , rowvar = False)
     
    #Step-3 : Compute the Eigenvalues and Eigenvectors
    eigen_values , eigen_vectors = np.linalg.eigh(cov_mat)
     
    #Step-4 : Sort Eigenvalues in descending order
    sorted_index = np.argsort(eigen_values)[::-1]
    sorted_eigenvalue = eigen_values[sorted_index]
    sorted_eigenvectors = eigen_vectors[:,sorted_index]
     
    #Step-5 : Select a subset from the rearranged Eigenvalue matrix
    eigenvector_subset = sorted_eigenvectors[:,0:num_components]
     
    #Step-6 : Transform the data
    X_reduced = np.dot(eigenvector_subset.transpose() , X_meaned.transpose() ).transpose()
     
    return X_reduced

In [10]:
#Generate a dummy dataset.
X = np.random.randint(10,50,100).reshape(20,5) 

print("Before PCA")
print(X)

print("After PCA")
PCA(X,num_components=2)

Before PCA
[[38 12 17 22 28]
 [41 27 47 12 48]
 [34 31 17 19 40]
 [32 16 11 36 11]
 [47 31 17 21 27]
 [30 11 45 32 41]
 [26 28 43 39 39]
 [46 30 14 40 21]
 [37 22 25 16 13]
 [18 21 42 17 27]
 [13 30 11 37 48]
 [15 17 16 23 44]
 [24 25 36 44 46]
 [14 22 43 32 16]
 [10 10 42 28 38]
 [42 20 29 15 43]
 [18 18 49 36 31]
 [39 28 37 18 49]
 [48 16 32 40 44]
 [19 26 30 20 24]]
After PCA


array([[-15.70069531,   0.88599797],
       [  9.51186531, -24.51073057],
       [-11.91727355,  -8.64167521],
       [-22.34294987,  20.887929  ],
       [-23.387173  ,  -6.80877599],
       [ 15.29427141,  -4.74361613],
       [ 13.70109406,  -0.96516717],
       [-25.33169602,   3.92765067],
       [-16.50570725,   7.93156814],
       [ 11.63883959,   7.48670468],
       [ -0.44166147,   4.56841171],
       [  1.09250623,   3.34346475],
       [ 12.94922767,  -1.87053187],
       [ 11.8222371 ,  21.63519916],
       [ 22.36693476,   9.12827222],
       [ -5.10310942, -17.07516904],
       [ 20.52610225,   9.27299051],
       [  3.91954624, -20.97106542],
       [ -2.79124406, -14.43220403],
       [  0.69888533,  10.95074661]])

In [4]:
import numpy as np

# Manualy capculate PCA using Numpy
#project our 3×2 matrix onto a 3×1 matrix with little loss

# Define a mtrix
A = np.array([[1,2],[3,4],[5,6]])

import numpy as np

#project our 3×2 matrix onto a 3×1 matrix with little loss

# Define a mtrix
A = np.array([[1,2],[3,4],[5,6]])

# Calculate mean of Each Column
M = np.mean(A.T,axis =1)

# Center columns by subtracting column means
C = A - M

# calculate covariance matrix of centered matrix
V = np.cov(C.T)

# eigendecomposition of covariance matrix
values, vectors = np.linalg.eig(V)
p = vectors.T.dot(C.T)
print(p.T)

# Using SCIKIT
from sklearn.decomposition import PCA
pca = PCA(1)
pca.fit(A)  
B = pca.transform(A)
B



[[-2.82842712  0.        ]
 [ 0.          0.        ]
 [ 2.82842712  0.        ]]


array([[-2.82842712],
       [ 0.        ],
       [ 2.82842712]])