####SVD (Singular Value Decomposition)

In [1]:
import numpy as np

In [2]:
np.random.seed(121)
A = np.random.randn(4,4)
print(np.round(A, 3))

[[-0.212 -0.285 -0.574 -0.44 ]
 [-0.33   1.184  1.615  0.367]
 [-0.014  0.63   1.71  -1.327]
 [ 0.402 -0.191  1.404 -1.969]]


In [3]:
U, Sigma, Vt = np.linalg.svd(A)
print(U.shape, Sigma.shape, Vt.shape)
print(U)
print(Sigma)
print(Vt)

(4, 4) (4,) (4, 4)
[[-0.07913928 -0.31822729  0.86653217  0.37628494]
 [ 0.38294523  0.78655287  0.12002097  0.46934262]
 [ 0.65640669  0.02243181  0.35668848 -0.66437971]
 [ 0.64515128 -0.52873697 -0.32785711  0.44353889]]
[3.4229581  2.02287339 0.46263157 0.07935069]
[[ 0.04100747  0.22367823  0.78643002 -0.57429458]
 [-0.20019867  0.56209596  0.37041464  0.71187191]
 [-0.77847455  0.3945136  -0.33259252 -0.3573774 ]
 [-0.5934781  -0.69164673  0.36565426  0.18895901]]


In [4]:
A_recovered = np.dot(U, np.diag(Sigma)).dot(Vt)
print(np.round(A_recovered, 3))

[[-0.212 -0.285 -0.574 -0.44 ]
 [-0.33   1.184  1.615  0.367]
 [-0.014  0.63   1.71  -1.327]
 [ 0.402 -0.191  1.404 -1.969]]


In [5]:
# There is dependancy between rows

In [6]:
A[2] = A[0] + A[1]
A[3] = A[0]
print(np.round(A, 3))

[[-0.212 -0.285 -0.574 -0.44 ]
 [-0.33   1.184  1.615  0.367]
 [-0.542  0.899  1.041 -0.073]
 [-0.212 -0.285 -0.574 -0.44 ]]


In [7]:
U, Sigma, Vt = np.linalg.svd(A)
print(U.shape, Sigma.shape, Vt.shape)
print(np.round(Sigma, 3))

(4, 4) (4,) (4, 4)
[2.663 0.807 0.    0.   ]


In [8]:
A_recovered = np.dot(U[:,:2], np.diag(Sigma)[:2, :2]).dot(Vt[:2, :])
print(np.round(A_recovered, 3))

[[-0.212 -0.285 -0.574 -0.44 ]
 [-0.33   1.184  1.615  0.367]
 [-0.542  0.899  1.041 -0.073]
 [-0.212 -0.285 -0.574 -0.44 ]]


####Truncated SVD

In [9]:
import numpy as np

np.random.seed(121)
matrix = np.random.random((6, 6))
print(matrix)

print("========")
num_components = 4
U, Sigma, Vt = np.linalg.svd(matrix)
print(U.shape, Sigma.shape, Vt.shape)
print(Sigma)
U_, Sigma_, Vt_ = U[:,:num_components], np.diag(Sigma)[:num_components, :num_components], Vt[:num_components]
matrix_ = np.dot(U_, Sigma_).dot(Vt_)
print(matrix_)

print("========")
from scipy.sparse.linalg import svds
U__, Sigma__, Vt__ = svds(matrix, k=num_components)
print(U__.shape, Sigma__.shape, Vt__.shape)
matrix__ = np.dot(U__, np.diag(Sigma__)).dot(Vt__)
print(matrix__)

[[0.11133083 0.21076757 0.23296249 0.15194456 0.83017814 0.40791941]
 [0.5557906  0.74552394 0.24849976 0.9686594  0.95268418 0.48984885]
 [0.01829731 0.85760612 0.40493829 0.62247394 0.29537149 0.92958852]
 [0.4056155  0.56730065 0.24575605 0.22573721 0.03827786 0.58098021]
 [0.82925331 0.77326256 0.94693849 0.73632338 0.67328275 0.74517176]
 [0.51161442 0.46920965 0.6439515  0.82081228 0.14548493 0.01806415]]
(6, 6) (6,) (6, 6)
[3.2535007  0.88116505 0.83865238 0.55463089 0.35834824 0.0349925 ]
[[0.19222941 0.21792946 0.15951023 0.14084013 0.81641405 0.42533093]
 [0.44874275 0.72204422 0.34594106 0.99148577 0.96866325 0.4754868 ]
 [0.12656662 0.88860729 0.30625735 0.59517439 0.28036734 0.93961948]
 [0.23989012 0.51026588 0.39697353 0.27308905 0.05971563 0.57156395]
 [0.83806144 0.78847467 0.93868685 0.72673231 0.6740867  0.73812389]
 [0.59726589 0.47953891 0.56613544 0.80746028 0.13135039 0.03479656]]
(6, 4) (4,) (4, 6)
[[0.19222941 0.21792946 0.15951023 0.14084013 0.81641405 0.42533

####Sklearn TruncatedSVD

In [12]:
from sklearn.decomposition import TruncatedSVD, PCA
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt

iris = load_iris()
iris_data = iris.data
tsvd =TruncatedSVD(n_components=2)
tsvd.fit(iris_data)
iris_tsvd = tsvd.transform(iris_data)
print(iris_tsvd.shape)
print(iris_tsvd[:20])

(150, 2)
[[5.91274714 2.30203322]
 [5.57248242 1.97182599]
 [5.44697714 2.09520636]
 [5.43645948 1.87038151]
 [5.87564494 2.32829018]
 [6.47759822 2.32464996]
 [5.5159752  2.07090423]
 [5.85092859 2.14807482]
 [5.15891972 1.77506408]
 [5.64500117 1.99000106]
 [6.26539771 2.42576813]
 [5.75200785 2.02037338]
 [5.48058085 1.97777558]
 [4.95112411 2.04828749]
 [6.52596417 2.91606081]
 [6.79037199 2.82500759]
 [6.27239468 2.60811578]
 [5.92953789 2.26766614]
 [6.6481313  2.38959506]
 [6.09486463 2.36082303]]


In [13]:
U, Sigma, Vt = np.linalg.svd(iris_data)
iris_data_ = np.dot(U[:,:2], np.diag(Sigma)[:2, :2]).dot(Vt[:2])
print(iris_data_.shape)
print(iris_data_[:20])

(150, 4)
[[5.0952927  3.50597743 1.40192232 0.20165319]
 [4.74588049 3.19610853 1.46136967 0.25800276]
 [4.68667405 3.21586325 1.30954904 0.19452725]
 [4.61488457 3.08894388 1.46347879 0.27002699]
 [5.07488651 3.50623125 1.36428119 0.1863997 ]
 [5.52598407 3.7330351  1.67566825 0.28872322]
 [4.731593   3.2288014  1.36216771 0.21446447]
 [5.00510918 3.39830515 1.47931372 0.24418439]
 [4.37933538 2.93134058 1.38864652 0.25618379]
 [4.80551481 3.23360903 1.48569239 0.26393296]
 [5.39533378 3.70766642 1.49514864 0.2183418 ]
 [4.89451945 3.29088668 1.51906398 0.27146211]
 [4.67854319 3.16443092 1.41000708 0.24052709]
 [4.30090163 3.00174374 1.08842179 0.12739443]
 [5.73037625 4.07476895 1.2813685  0.09359359]
 [5.90310008 4.12548386 1.48153824 0.16928199]
 [5.45240789 3.81002345 1.36951483 0.15684908]
 [5.09813811 3.49356935 1.43489086 0.21628344]
 [5.67252863 3.83336074 1.71712893 0.29503728]
 [5.24878923 3.60734078 1.45368795 0.21202774]]
