In [1]:
import numpy as np
import scipy
import utils
from time import time
from utils import QR_Factorization, EVD, SVD, Bidiagonal_fastMult

%load_ext autoreload
%autoreload 2

# Problem 1: SVD by Two-Phase Approach

## Phase-I: Golub-Kahan Bidiagonalization

In [2]:
A = np.array([[0, 0, 0, 0],
              [0, 0, 0, 0],
              [0, 0, 1, 0],
              [0, 0, 0, 0],
              [2, 5, 0, 0],
              [0, 0, 0, 0],
              [0, 0, 0, 0]], dtype=np.float64)
B, Qt, P = SVD.svd_phaseI(A)
print(B)


[[ 2. -5.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0. -1.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]


## Phase-II

In [3]:
m = n = 1024
A = np.random.rand(m,n)

**Test Fast Multiplication for A@B, where B is upper bidiagonal matrix.**

Numpy @ might use multi-thread to accelerate the computation. But our implementation is O(n^2) which is theoretically more effcient.

In [4]:
B, _, _ = SVD.svd_phaseI(A)

Test for `fastMult_upper_bidiagonal`

In [5]:
numpy_mul_begin = time()
for i in range(1000):
    A@B
numpy_mul_end = time()
print("{:.4f}s".format(numpy_mul_end - numpy_mul_begin))

18.4201s


In [6]:
fastMul_begin = time()
for i in range(1000):
    Bidiagonal_fastMult.fastMult_upper_bidiagonal(A, B)
fastMul_end = time()
print("{:.4f}s".format(fastMul_end - fastMul_begin))

24.0754s


Test for `upper_fastMult_lower_bidiagonal`


In [7]:
numpy_mul_begin = time()
for i in range(1000):
    B@B.T
numpy_mul_end = time()
print("{:.4f}s".format(numpy_mul_end - numpy_mul_begin))

13.7348s


In [8]:
fastMul_begin = time()
for i in range(1000):
    Bidiagonal_fastMult.upper_fastMult_lower_bidiagonal(B, B.T)
fastMul_end = time()
print("{:.4f}s".format(fastMul_end - fastMul_begin))

7.7112s


Test for `qr_tridiagonal_by_Givens` and `qr_lower_bidiagonal_by_Givens`

In [9]:
begin = time()
for i in range(100):
    QR_Factorization.qr_tridiagonal_by_Givens(B.T, return_Givens=True)
end = time()
print("{:.4f}s".format(end-begin))

3.6617s


In [10]:
begin = time()
for i in range(100):
    QR_Factorization.qr_lower_bidiagonal_by_Givens(B.T, return_Givens=True)
end = time()
print("{:.4f}s".format(end-begin))

0.9985s


**Test SVD**

Choose the parameter phaseII as 'A', 'B1', 'B2' to test different implementations of phase II

In [11]:
m = 150
n = 150
A = np.random.rand(m,n)
A[n-50:n] = A[n-50:n] * 1000

In [12]:
U, S, Vt = SVD.svd(A, phaseII='A')
# U, S, Vt = SVD.svd(A, phaseII='B1')
# U, S, Vt = SVD.svd(A, phaseII='B2')
# U, S, Vt = SVD.svd(A, phaseII='C')

acc = 1e-8
print("Percentage of entrices successfully recovered by SVD with accuracy: {}".format(acc))
print(np.sum(np.abs(U@np.diag(S)@Vt - A)< acc) / (n*m) * 100, "%")

phaseI: 0.0374s
phaseII: 0.4317s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %


In [13]:
# U, S, Vt = SVD.svd(A, phaseII='A')
# U, S, Vt = SVD.svd(A, phaseII='B1')
U, S, Vt = SVD.svd(A, phaseII='B2')
# U, S, Vt = SVD.svd(A, phaseII='C')

acc = 1e-8
print("Percentage of entrices successfully recovered by SVD with accuracy: {}".format(acc))
print(np.sum(np.abs(U@np.diag(S)@Vt - A)< acc) / (n*m) * 100, "%")

phaseI: 0.0214s
phaseII: 5.2051s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %


**Accuracy Test:**

Modify acc as you like!

In [14]:
acc = 1e-8
print("Percentage of entrices successfully recovered by SVD with accuracy: {}".format(acc))
print(np.sum(np.abs(U@np.diag(S)@Vt - A)< acc) / (n*m) * 100, "%")

Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %


**Scipy SVD**

In [15]:
U, S, Vt  = scipy.linalg.svd(A, full_matrices=False)
# print(np.abs(U@np.diag(S)@Vt - A))
acc = 1e-8
print("Percentage of entrices successfully recovered by SVD with accuracy: {}".format(acc))
print(np.sum(np.abs(U@np.diag(S)@Vt - A)< acc) / (n*m) * 100, "%")
# U, S, Vt

Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %


## Test for p2

### Important note! 
Phase 2B-optional requires higher convergence standard, i.e. tol=1e-15, to generate sufficient accurate SVD. In comparison, Phase-2A and Phase-2B use tol=1e-8.

In [16]:
m = n = 1024
k = 100
A = np.array(np.diag([1/2]*n, 0)+np.diag([3/8]*(n-1), 1)+np.diag([1/8]*(n-1), -1))

A = np.linalg.matrix_power(A, k)
U, S, Vt = SVD.svd(A, phaseII="B2")
# U, S, Vt = scipy.linalg.svd(A)
acc = 1e-8
print("Percentage of entrices successfully recovered by SVD with accuracy: {}".format(acc))
print(np.sum(np.abs(U@np.diag(S)@Vt - A)< acc) / (n*m) * 100, "%")

phaseI: 11.4141s
phaseII: 9.3148s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %


Modify acc as you like!

In [17]:
acc = 1e-8
print("Percentage of entrices successfully recovered by SVD with accuracy: {}".format(acc))
print(np.sum(np.abs(U@np.diag(S)@Vt - A)< acc) / (n*m) * 100, "%")

Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
