Mathematical understanding of SVD
>uses:
    Psudoinv calculation
    Dimensionality reduction using manual method as well as Sklearn Truncated SVD

## Basic understanding of SVD

In [1]:
import numpy as np
from scipy import linalg

In [2]:
A = np.array([[1,2],[3,4],[5,6]])
print(A) #3*2

[[1 2]
 [3 4]
 [5 6]]


In [3]:
U,s,VT = linalg.svd(A)

In [4]:
print(U) #3*3

[[-0.2298477   0.88346102  0.40824829]
 [-0.52474482  0.24078249 -0.81649658]
 [-0.81964194 -0.40189603  0.40824829]]


In [5]:
print(s) #this should be 3*2

[9.52551809 0.51430058]


In [6]:
# first create a sigma of the shape m*n
Sigma = np.zeros((A.shape[0],A.shape[1]))
Sigma

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [7]:
# populate Sigma with n x n diagonal matrix
#Sigma[:n,:n]
Sigma[:A.shape[1], :A.shape[1]] = np.diag(s)
Sigma

array([[9.52551809, 0.        ],
       [0.        , 0.51430058],
       [0.        , 0.        ]])

In [8]:
print(VT) #this should be 2*2

[[-0.61962948 -0.78489445]
 [-0.78489445  0.61962948]]


In [9]:
#reconstruct A using U,SIgma and VT
A_reconstrcuted = np.dot(U,np.dot(Sigma,VT))
A_reconstrcuted

array([[1., 2.],
       [3., 4.],
       [5., 6.]])

When A is a square matrix then reconstrut the sigma using
Sigma = diag(s)

## SVD is also used for calculating the Pseudo Inverse

A = U . Sigma . V^T
We can get U and V from the SVD operation.

In [None]:
The D^+ can be calculated by creating a diagonal matrix from Sigma, 
calculating the reciprocal of each non-zero element in Sigma,
and taking the transpose if the original matrix was rectangular.

In [None]:
         s11,   0,   0
Sigma = (  0, s22,   0)
           0,   0, s33
    
       1/s11,     0,     0
D^+ = (    0, 1/s22,     0)
           0,     0, 1/s33

In [13]:
# NumPy provides the function pinv() for calculating the pseudoinverse of a rectangular matrix.
# Pseudoinverse

# define matrix
A = array([
 [0.1, 0.2],
 [0.3, 0.4],
 [0.5, 0.6],
 [0.7, 0.8]])
print(A)
# calculate pseudoinverse
B = linalg.pinv(A)
print("Pseudo Inverse of A\n",B)

[[0.1 0.2]
 [0.3 0.4]
 [0.5 0.6]
 [0.7 0.8]]
Pseudo Inverse of A
 [[-1.00000000e+01 -5.00000000e+00  9.02056208e-15  5.00000000e+00]
 [ 8.50000000e+00  4.50000000e+00  5.00000000e-01 -3.50000000e+00]]


## We can calculate the pseudoinverse manually via the SVD and compare the results to the pinv() function.

In [14]:
print("A:\n",A)
U,S,VT = linalg.svd(A)
print("S:\n",S)

A:
 [[0.1 0.2]
 [0.3 0.4]
 [0.5 0.6]
 [0.7 0.8]]
S:
 [1.42690955 0.06268282]


In [15]:
sigma = np.zeros((A.shape[0],A.shape[1]))
#Construct the diagnol matrix using the reciprocal of S
sigma[:A.shape[1],:A.shape[1]] = np.diag(1/S)

In [16]:
#A_psudo_inv = U_dash*d_dash*V_dash
A_psudo_inv = np.dot(VT.T,np.dot(sigma.T,U.T))
A_psudo_inv

array([[-1.00000000e+01, -5.00000000e+00,  9.07607323e-15,
         5.00000000e+00],
       [ 8.50000000e+00,  4.50000000e+00,  5.00000000e-01,
        -3.50000000e+00]])

In [17]:
B

array([[-1.00000000e+01, -5.00000000e+00,  9.02056208e-15,
         5.00000000e+00],
       [ 8.50000000e+00,  4.50000000e+00,  5.00000000e-01,
        -3.50000000e+00]])

## SVD for Dimensionality Reduction

In [18]:
# define a matrix
A = array([
 [1,2,3,4,5,6,7,8,9,10],
 [11,12,13,14,15,16,17,18,19,20],
 [21,22,23,24,25,26,27,28,29,30]])
print(A)
print("Shape of A",(A.shape))

[[ 1  2  3  4  5  6  7  8  9 10]
 [11 12 13 14 15 16 17 18 19 20]
 [21 22 23 24 25 26 27 28 29 30]]
Shape of A (3, 10)


In [19]:
U,s,VT = linalg.svd(A)
print("Shape of U",(U.shape))
print("Shape of s",(s.shape))
print("Shape of VT",(VT.shape))

Shape of U (3, 3)
Shape of s (3,)
Shape of VT (10, 10)


In [20]:
#create a sigma matrix with zeros
sigma = np.zeros(A.shape)
sigma[:A.shape[0],:A.shape[0]] = np.diag(s)

In [21]:
#visualising the sigma matrix
import pandas as pd
pd.DataFrame(sigma)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,96.965734,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,7.255783,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.488795e-15,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
#select columns from sigma
n_elements = 2
sigma = sigma[:, :n_elements]
sigma.shape

(3, 2)

In [23]:
#select rows from VT
VT = VT[:n_elements,:] 
VT.shape

(2, 10)

In [24]:
#reconstruct the matrix
B = U.dot(sigma.dot(VT))
print(B.shape)
#A= 3*10
#U = 3*3 
#Sigma = 3*2
#VT = 2*10

(3, 10)


In [25]:
# transform
T = U.dot(sigma)
print(T)

[[-18.52157747  -6.47697214]
 [-49.81310011  -1.91182038]
 [-81.10462276   2.65333138]]


In [26]:
A.dot(VT.T)
# U.sigma = A.VT.T (A dot VT transpose)

array([[-18.52157747,  -6.47697214],
       [-49.81310011,  -1.91182038],
       [-81.10462276,   2.65333138]])

## Truncated SVD in SKLearn

In [27]:
A

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]])

In [28]:
from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=2)
svd.fit(A)

In [29]:
result = svd.transform(A)
print(result)

[[18.52157747  6.47697214]
 [49.81310011  1.91182038]
 [81.10462276 -2.65333138]]


In [None]:
#result = U.sigma = A.VT.T 