In [30]:
from numpy import array
from numpy import mean
from numpy import cov
from numpy.linalg import eig
from numpy import argsort

# define a matrix with 5 sample and each sample have 3 features
A = array([[90, 60, 90], [90, 90, 30], [60, 60, 60], [60, 60, 90], [30, 30, 30]])
print("Org: \n",A)
# calculate the mean of each column (features)
M = mean(A.T, axis=1)
print("Mean: ", M)
# center columns by subtracting column means
C = A - M
print("centered: \n", C)
# calculate covariance matrix of centered matrix
V = cov(C.T)
print("co-variance: \n", V)
# eigendecomposition of covariance matrix
values, vectors = eig(V)
print("eigen vector: \n", vectors)
print("eigen value: ",values)
# sort the eigen value to get largest magnitudes 
sort_descending = argsort(-values)
print("Sort_index: ", sort_descending)
# find top n pca, simply find top n heighest magnitudes
sorted_eigenvalue = array(values)[sort_descending]
principal_components = array(vectors.T)[sort_descending]
print("sorted eigenvalue: ",sorted_eigenvalue)
print("corresponding principal components: \n", principal_components)
# project data
P = principal_components.dot(C.T)
print(P.T)

Org: 
 [[90 60 90]
 [90 90 30]
 [60 60 60]
 [60 60 90]
 [30 30 30]]
Mean:  [66. 60. 60.]
centered: 
 [[ 24.   0.  30.]
 [ 24.  30. -30.]
 [ -6.   0.   0.]
 [ -6.   0.  30.]
 [-36. -30. -30.]]
co-variance: 
 [[630. 450. 225.]
 [450. 450.   0.]
 [225.   0. 900.]]
eigen vector: 
 [[ 0.6487899  -0.65580225 -0.3859988 ]
 [-0.74104991 -0.4291978  -0.51636642]
 [-0.17296443 -0.62105769  0.7644414 ]]
eigen value:  [  56.02457535 1137.5874413   786.38798335]
Sort_index:  [1 2 0]
sorted eigenvalue:  [1137.5874413   786.38798335   56.02457535]
corresponding principal components: 
 [[-0.65580225 -0.4291978  -0.62105769]
 [-0.3859988  -0.51636642  0.7644414 ]
 [ 0.6487899  -0.74104991 -0.17296443]]
[[-34.37098481  13.66927088  10.3820247 ]
 [ -9.98345733 -47.68820559  -1.47160698]
 [  3.93481353   2.31599277  -3.89273939]
 [-14.69691716  25.24923474  -9.08167225]
 [ 55.11654576   6.45370719   4.06399392]]


In [20]:
# Principal Component Analysis
from numpy import array
from sklearn.decomposition import PCA
# define a matrix
A = array([[90, 60, 90], [90, 90, 30], [60, 60, 60], [60, 60, 90], [30, 30, 30]])
print(A)
# create the PCA instance
pca = PCA(3)
# fit on data
pca.fit(A)
# access values and vectors
print(pca.components_)
print(pca.explained_variance_)
# transform data
B = pca.transform(A)
print(B)

[[90 60 90]
 [90 90 30]
 [60 60 60]
 [60 60 90]
 [30 30 30]]
[[-0.65580225 -0.4291978  -0.62105769]
 [ 0.3859988   0.51636642 -0.7644414 ]
 [ 0.6487899  -0.74104991 -0.17296443]]
[1137.5874413   786.38798335   56.02457535]
[[-34.37098481 -13.66927088  10.3820247 ]
 [ -9.98345733  47.68820559  -1.47160698]
 [  3.93481353  -2.31599277  -3.89273939]
 [-14.69691716 -25.24923474  -9.08167225]
 [ 55.11654576  -6.45370719   4.06399392]]
