In [7]:
import numpy as np
import scipy.linalg
import utils
from time import time
from utils import QR_Factorization, EVD, SVD, Bidiagonal_fastMult
np.set_printoptions(precision=7)
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Problem 1: SVD by Two-Phase Approach

## Phase-I: Golub-Kahan Bidiagonalization

In [8]:
A = np.array([[0, 0, 0, 0],
              [0, 0, 0, 0],
              [0, 0, 1, 0],
              [0, 0, 0, 0],
              [2, 5, 0, 0],
              [0, 0, 0, 0],
              [0, 0, 0, 0]], dtype=np.float64)
B, Qt, P = SVD.svd_phaseI(A)
print(B)


[[ 2. -5.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0. -1.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]


## Phase-II

In [9]:
m = n = 1024
A = np.random.rand(m,n)

**Test Fast Multiplication for A@B, where B is upper bidiagonal matrix.**

Numpy @ might use multi-thread to accelerate the computation. But our implementation is O(n^2) which is theoretically more effcient.

In [10]:
B, _, _ = SVD.svd_phaseI(A)

Test for `fastMult_upper_bidiagonal`

In [11]:
numpy_mul_begin = time()
for i in range(1000):
    A@B
numpy_mul_end = time()
print("{:.4f}s".format(numpy_mul_end - numpy_mul_begin))

41.1411s


In [12]:
fastMul_begin = time()
for i in range(1000):
    Bidiagonal_fastMult.fastMult_upper_bidiagonal(A, B)
fastMul_end = time()
print("{:.4f}s".format(fastMul_end - fastMul_begin))

34.0637s


Test for `upper_fastMult_lower_bidiagonal`


In [13]:
numpy_mul_begin = time()
for i in range(1000):
    B@B.T
numpy_mul_end = time()
print("{:.4f}s".format(numpy_mul_end - numpy_mul_begin))

20.6029s


In [14]:
fastMul_begin = time()
for i in range(1000):
    Bidiagonal_fastMult.upper_fastMult_lower_bidiagonal(B, B.T)
fastMul_end = time()
print("{:.4f}s".format(fastMul_end - fastMul_begin))

7.8426s


Test for `qr_tridiagonal_by_Givens` and `qr_lower_bidiagonal_by_Givens`

In [15]:
begin = time()
for i in range(100):
    QR_Factorization.qr_tridiagonal_by_Givens(B.T, return_Givens=True)
end = time()
print("{:.4f}s".format(end-begin))

4.5729s


In [16]:
begin = time()
for i in range(100):
    QR_Factorization.qr_lower_bidiagonal_by_Givens(B.T, return_Givens=True)
end = time()
print("{:.4f}s".format(end-begin))

1.2722s


**Test SVD**

Choose the parameter phaseII as 'A', 'B1', 'B2' to test different implementations of phase II

In [17]:
m = 150
n = 150
A = np.random.rand(m,n)
A[n-50:n] = A[n-50:n] * 1000

In [18]:
U, S, Vt = SVD.svd(A, phaseII='A')
# U, S, Vt = SVD.svd(A, phaseII='A2')
# U, S, Vt = SVD.svd(A, phaseII='B')
# U, S, Vt = SVD.svd(A, phaseII='B2')
# U, S, Vt = SVD.svd(A, phaseII='C')
_, Ss, _  = scipy.linalg.svd(A, full_matrices=False)

SVD.accuracy_test(A, U, S, Vt, acc=1e-8)

phaseI: 0.0650s
phaseII: 0.4899s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
100.0 %
Max error of singular values:
8.58538320297475e-10


(100.0, 100.0, 8.58538320297475e-10)

**Accuracy Test:**

Modify acc as you like!

In [19]:
SVD.accuracy_test(A, U, S, Vt, acc=1e-8)

Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
100.0 %
Max error of singular values:
8.58538320297475e-10


(100.0, 100.0, 8.58538320297475e-10)

**Scipy SVD**

In [20]:
U, Ss, Vt  = scipy.linalg.svd(A, full_matrices=False)
# print(np.abs(U@np.diag(S)@Vt - A))
acc = 1e-8
print("Percentage of entrices successfully recovered by SVD with accuracy: {}".format(acc))
print(np.sum(np.abs(U@np.diag(Ss)@Vt - A)< acc) / (n*m) * 100, "%")
# U, S, Vt

Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %


## Test for p2

In [21]:
accuracy = []
time = []
for m in range(100, 1001, 100):
    print(m)
    n = m
    k = 66
    A = np.array(np.diag([1/4.1]*n, 0)+np.diag([1/4.1]*(n-1), 1)+np.diag([2.1/4.1]*(n-1), -1))
    A = np.linalg.matrix_power(A, k)
    UA, SA, VAt,time_ps1,time_ps2 = SVD.svd(A, phaseII="A", timed = True)
    accuracy_result = SVD.accuracy_test(A, UA, SA, VAt, acc=1e-8)
    accuracy.append([m, accuracy_result[0], accuracy_result[1], accuracy_result[2]]) 
    time.append([m, time_ps1, time_ps2])

import pandas as pd
accuracy = pd.DataFrame(accuracy)
accuracy.to_csv("A_Kernel_Accuracy.csv")
time = pd.DataFrame(time)
time.to_csv("A_Kernel_Time.csv")

100
phaseI: 0.0120s
phaseII: 0.0180s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
97.05882352941177 %
Max error of singular values:
1.6777644322036522e-08
200
phaseI: 0.1260s
phaseII: 0.0860s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
97.22222222222221 %
Max error of singular values:
2.39692387164227e-08
300
phaseI: 0.3884s
phaseII: 0.1517s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
97.24770642201835 %
Max error of singular values:
2.7038950980300296e-08
400
phaseI: 0.8529s
phaseII: 0.2821s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
96.5986394557823 %
Max error of singular values:
4.038962878639958e-08
500
phaseI: 1.4984s
phaseII: 0.4

In [22]:
accuracy = []
time = []
for m in range(10, 201, 10):
    print(m)
    n = m
    A = np.random.rand(m, n)
    UA, SA, VAt,time_ps1,time_ps2 = SVD.svd(A, phaseII="A", timed = True)
    accuracy_result = SVD.accuracy_test(A, UA, SA, VAt, acc=1e-8)
    accuracy.append([m, accuracy_result[0], accuracy_result[1], accuracy_result[2]]) 
    time.append([m, time_ps1, time_ps2])
import pandas as pd
accuracy = pd.DataFrame(accuracy)
accuracy.to_csv("A_Random_Accuracy.csv")
time = pd.DataFrame(time)
time.to_csv("A_Random_Time.csv")

10
phaseI: 0.0020s
phaseII: 0.0080s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
100.0 %
Max error of singular values:
2.1316368808976804e-13
20
phaseI: 0.0040s
phaseII: 0.0220s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
100.0 %
Max error of singular values:
6.572867250476122e-15
30
phaseI: 0.0050s
phaseII: 0.0520s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
100.0 %
Max error of singular values:
7.105427357601002e-15
40
phaseI: 0.0070s
phaseII: 0.0898s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
100.0 %
Max error of singular values:
1.0658141036401503e-14
50
phaseI: 0.0080s
phaseII: 0.1497s
Percentage of entrices successfully recovered 

In [23]:
accuracy = []
time = []
for m in range(100, 1001, 100):
    print(m)
    n = m
    k = 66
    A = np.array(np.diag([1/4.1]*n, 0)+np.diag([1/4.1]*(n-1), 1)+np.diag([2.1/4.1]*(n-1), -1))
    A = np.linalg.matrix_power(A, k)
    UB, SB, VBt,time_ps1,time_ps2 = SVD.svd(A, phaseII="B", timed = True)
    accuracy_result = SVD.accuracy_test(A, UB, SB, VBt, acc=1e-8)
    accuracy.append([m, accuracy_result[0], accuracy_result[1], accuracy_result[2]]) 
    time.append([m, time_ps1, time_ps2])

import pandas as pd
accuracy = pd.DataFrame(accuracy)
accuracy.to_csv("B_Kernel_Accuracy.csv")
time = pd.DataFrame(time)
time.to_csv("B_Kernel_Time.csv")

100
phaseI: 0.0260s
phaseII: 0.0550s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
100.0 %
Max error of singular values:
6.0585375741087076e-15
200
phaseI: 0.2020s
phaseII: 0.2820s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
100.0 %
Max error of singular values:
1.887379141862766e-15
300
phaseI: 0.4913s
phaseII: 0.5070s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
100.0 %
Max error of singular values:
5.440092820663267e-15
400
phaseI: 0.9378s
phaseII: 1.0041s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
100.0 %
Max error of singular values:
5.773159728050814e-15
500
phaseI: 1.4745s
phaseII: 1.7415s
Percentage of entrices successfully recove

In [24]:
accuracy = []
time = []
for m in range(10, 201, 10):
    print(m)
    n = m
    A = np.random.rand(m, n)
    UB, SB, VBt,time_ps1,time_ps2 = SVD.svd(A, phaseII="B", timed = True)
    accuracy_result = SVD.accuracy_test(A, UB, SB, VBt, acc=1e-8)
    accuracy.append([m, accuracy_result[0], accuracy_result[1], accuracy_result[2]]) 
    time.append([m, time_ps1, time_ps2])

import pandas as pd
accuracy = pd.DataFrame(accuracy)
accuracy.to_csv("B_Random_Accuracy.csv")
time = pd.DataFrame(time)
time.to_csv("B_Random_Time.csv")

10
phaseI: 0.0030s
phaseII: 0.0810s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
100.0 %
Max error of singular values:
2.6645352591003757e-15
20
phaseI: 0.0040s
phaseII: 0.2243s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
95.0 %
Max error of singular values:
0.0564696268578293
30
phaseI: 0.0060s
phaseII: 1.0633s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
100.0 %
Max error of singular values:
1.3766765505351941e-14
40
phaseI: 0.0090s
phaseII: 1.1907s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
100.0 %
Max error of singular values:
1.8207657603852567e-14
50
phaseI: 0.0120s
phaseII: 0.9552s
Percentage of entrices successfully recovered by 

SVD accuracy test playground: modify acc as you like!

In [26]:
n = m = 1000
k = 66
A = np.array(np.diag([1/4.1]*n, 0)+np.diag([1/4.1]*(n-1), 1)+np.diag([2.1/4.1]*(n-1), -1))
U, S, Vt = SVD.svd(A, phaseII='A')
SVD.accuracy_test(A, U, S, Vt, acc=1e-8)

phaseI: 11.2736s
phaseII: 65.1873s
Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of singular values with accuracy: 1e-08
99.7 %
Max error of singular values:
2.059021695659859e-08


(100.0, 99.7, 2.059021695659859e-08)

In [27]:
print("Percentage of entrices successfully recovered by SVD with accuracy: {}".format(acc))
print(np.sum(np.abs(U@np.diag(S)@Vt - A)< acc) / (n*m) * 100, "%")
acc = 1e-8
print("Percentage of entrices of pseudoinverse successfully recovered by SVD with accuracy: {}".format(acc))
print(np.sum(np.abs(Vt.T@np.diag(S)@U.T - np.linalg.inv(A))< acc) / (n*m) * 100, "%")

Percentage of entrices successfully recovered by SVD with accuracy: 1e-08
100.0 %
Percentage of entrices of pseudoinverse successfully recovered by SVD with accuracy: 1e-08
45.1451 %
