# Singular value decomposition (SVD) and Principal component analysis (PCA) 

In [1]:
import sys
import sklearn

import numpy as np
import os

### Build 3D dataset

On computer, each data is stored in horizontal vector.  
(This is not the same as a mathematical expression. It is transposed.）

In [2]:
np.random.seed(4)
m = 60
w1, w2 = 0.1, 0.3
noise = 0.1

angles = np.random.rand(m) * 3 * np.pi / 2 - 0.5
X = np.empty((m, 3))
X[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * np.random.randn(m) / 2
X[:, 1] = np.sin(angles) * 0.7 + noise * np.random.randn(m) / 2
X[:, 2] = X[:, 0] * w1 + X[:, 1] * w2 + noise * np.random.randn(m)

In [3]:
X_centered = X - X.mean(axis=0)
print(X_centered[:10])

[[-1.03976771e+00 -7.60238460e-01 -3.32880482e-01]
 [-3.17841939e-02  3.90260570e-01 -3.64766659e-02]
 [-9.77238797e-01 -6.73862060e-01 -3.20757101e-01]
 [-9.44190485e-01  7.70779228e-04 -4.97304144e-02]
 [-7.87164831e-01 -5.10641487e-02  1.19970744e-01]
 [ 1.09409378e+00  1.15762056e-01  2.45551498e-01]
 [-1.04665623e+00 -8.53165791e-01 -2.05241169e-01]
 [ 6.49452398e-01 -4.82750342e-01 -7.94325731e-02]
 [ 9.92128132e-01  3.06140931e-01  3.96278747e-01]
 [ 5.25509785e-01  4.67955007e-01  1.62461684e-01]]


## Perform SVD for $X_{center}$

$U$: left singular matrix    
$s$: singular values  
$V$: right singular matrix  

$S$: covariance matrix. $s$ is its diagonal element.  


In [4]:
# Perform SVD
U, s, Vt = np.linalg.svd(X_centered)

print(f' shape of U = {U.shape}')
print(f' shape of s = {s.shape}')
print(f' shape of Vt = {Vt.shape}')

# create a cov matrix S from s
m, n = X.shape
S = np.zeros(X_centered.shape)
S[:n, :n] = np.diag(s)

print(f' shape of S = {S.shape}')

 shape of U = (60, 60)
 shape of s = (3,)
 shape of Vt = (3, 3)
 shape of S = (60, 3)


In [5]:
# for confirmation

u1 = U[:, 0]
u2 = U[:, 1]
u3 = U[:, 2]

v1 = Vt.T[:, 0]
v2 = Vt.T[:, 1]
v3 = Vt.T[:, 2]

print(f'X_centered = {X_centered.shape}')
print('\n=======')
print(f'U.shape = {U.shape}')
print(f's.shape = {s.shape}')
print(f'Vt.shape = {Vt.shape}')

print('\n=======')

#print(f'U (u1, u2, u3, u4, u5) =\n {U}')
#print('--------')
print('left singular matrix U')
print('first (u1) to third vector (u3) of U')
print(f'u1  = {u1}')
print(f'u2  = {u2}')
print(f'u3  = {u3}')

print('\n=======')
print(f's = {s}' )
print(f'S (first 10 rows) = \n {S[:10]}')

print('\n=======')
#print(f'Vt (v1, v2, v3) =\n {Vt}')
#print('-------')
print('right singular matrix V')
print('first (v1) to third vector (v3); they are in horizontal direction')
print(f'v1  = {v1}')
print(f'v2  = {v2}')
print(f'v3  = {v3}')

X_centered = (60, 3)

U.shape = (60, 60)
s.shape = (3,)
Vt.shape = (3, 3)

left singular matrix U
first (u1) to third vector (u3) of U
u1  = [-0.18623814  0.01180778 -0.17346216 -0.13178818 -0.10775006  0.16297163
 -0.18780601  0.06630759  0.16137682  0.09765781 -0.15416057  0.15632448
 -0.16787761 -0.17124618  0.14781726 -0.04058771  0.06732845  0.11762291
  0.09876105 -0.19284393  0.09778846 -0.18503433 -0.15354203 -0.1860085
  0.16201204  0.10523169 -0.02624812  0.15196118 -0.1395307   0.11952411
  0.02979314 -0.19806744  0.02006933  0.12110959  0.13402096  0.0098926
  0.02056666 -0.05512735  0.10790961 -0.07718429  0.12712583 -0.04899798
 -0.14678399  0.15424011  0.12908978  0.04494672 -0.09398136  0.16656596
  0.00566108 -0.06107452 -0.19540901  0.13719378 -0.05943372  0.1645123
  0.04709703  0.15150457 -0.18320183  0.09853295 -0.17176825  0.1008287 ]
u2  = [-0.14896282  0.12490007 -0.12778066  0.10928631  0.08995651 -0.07154635
 -0.16565382 -0.23985759  0.01582059  0.101455    0.

## Perform PCA for $X$.  
Super-easy. Compare variance-covraiance matrix of $X$ with the above. (in covX)

In [6]:
from sklearn.decomposition import PCA
pca3 = PCA(n_components=3)
X3D = pca3.fit_transform(X_centered)
COV_pca = pca3.get_covariance()
print(f' variance-covariance matrix calculated by sklearn :\n {COV_pca}')
print('\n this is the same with the above')

 variance-covariance matrix calculated by sklearn :
 [[0.69812855 0.17640539 0.12137931]
 [0.17640539 0.1801727  0.07253614]
 [0.12137931 0.07253614 0.04552382]]

 this is the same with the above


### Calculation of unitary matrix $U_{PCA}$ where each column is eigenvector of $X_c$ (by PCA)

We see this is equivalent to $V$ (left singular vector) of $X$ by SVD.
(note this is also the same with $U$)  


In [7]:
# eigen vectors; u1, u2, u3
U_pca=pca3.components_
print(f'eigenvector U_pca (in horizontal direction; U^T)= \n {U_pca}')

# as for comparison 
print('\n right singular matrix V by SVD')
print('first (v1) to third vector (v3); they are in horizontal direction')
print(f'v1  = {v1}')
print(f'v2  = {v2}')
print(f'v3  = {v3}')

eigenvector U_pca (in horizontal direction; U^T)= 
 [[-0.93636116 -0.29854881 -0.18465208]
 [ 0.34027485 -0.90119108 -0.2684542 ]
 [-0.08626012 -0.31420255  0.94542898]]

 right singular matrix V by SVD
first (v1) to third vector (v3); they are in horizontal direction
v1  = [0.93636116 0.29854881 0.18465208]
v2  = [-0.34027485  0.90119108  0.2684542 ]
v3  = [-0.08626012 -0.31420255  0.94542898]


Comparison of singular value of $X$ (by SVD; $\sigma_i$) and eigenvalue of $X$ (by PCA; $\lambda_i$).

$\Lambda = \frac{1}{N-1} \Sigma^2$  
$ \lambda_i = \frac{1}{N-1} \sigma^2$ 

Confirm they are the same.

In [8]:
# eigenvalues - you will find the same.
eigenvalues = pca3.explained_variance_
print(f'PCA; eigenvalues of cov(X) = {eigenvalues}')

print(f'SVD: singular values of X = {s}')


print(f'\n  (singular value)^2 / (N-1)= \n  {s*s/(m-1)}')

PCA; eigenvalues of cov(X) = [0.77830975 0.1351726  0.01034272]
SVD: singular values of X = [6.77645005 2.82403671 0.78116597]

  (singular value)^2 / (N-1)= 
  [0.77830975 0.1351726  0.01034272]
