# Scaled Array

In [1]:
from lmdec.array.core.scaled import *
from lmdec.array.core.metrics import *
from lmdec.decomp.init_methods import *
from lmdec.decomp import PowerMethod
from lmdec.array.core.matrix_ops import svd_to_trunc_svd
import numpy as np
from math import sqrt
import dask.array as da
import matplotlib.pyplot as plt

In [3]:
N, P = 1000, 400
a = da.array(np.random.randn(N, P))

U, S, V = da.linalg.svd(a)

print(subspace_dist(U, v_init(a, V), S))
print(subspace_dist(U, v_init(a, V, S), S))

2.5088819910479288e-12
2.5088819910479288e-12


In [2]:
 N, P = 100, 40
k = 10
a = da.array(np.random.randn(N, P))
sa = ScaledArray(False, False, None)
sa.fit(a)
U, S, V = da.linalg.svd(a)
Uk, Sk = svd_to_trunc_svd(u=U, s=S, k=k)

for i in range(1, 11):
    U1 = sub_svd_init(sa, k=k, warm_start_row_factor=i, log=0)

    print(subspace_dist(U1, Uk, Sk))


0.27579961172298484
0.24727224312654894
0.2158305965323125
0.1902187273705228
0.12966705867060635
0.10257624225637163
0.08105323081408333
0.05719559906762972
0.05158510744099165




5.126227220486612e-10


In [71]:
def v_init_2(a, s, v):
    """
    Suppose SVD(a) -> usv'
    
    To recover u from a and v:
    
    av ->  usv'v
           us
           us*inv(s)
    """
    return a.dot(v.T).dot(np.diag(1/s))

def v_init(a, v):
    U, _, _ = np.linalg.svd(a.dot(v.T), full_matrices=False)
    return U

In [72]:
N, P = 100, 40
a = np.random.randn(N, P)
aat = a.dot(a.T)

U, S, V = np.linalg.svd(a, full_matrices=False)
Us, Ss, Vs = np.linalg.svd(aat, full_matrices=False)

print(U.shape)
print(V.shape)

print(a.T.dot(U).shape)

np.testing.assert_array_almost_equal(a.T.dot(U), V.T.dot(np.diag(S)))

np.testing.assert_almost_equal(U, v_init_2(a, S, V))


np.testing.assert_almost_equal(subspace_dist(U, v_init(a, V), S), 0)

(100, 40)
(40, 40)
(40, 40)


In [None]:
print(np.diag((V_k_PM@scaled_centered_array_other@V_k_PM.T).compute()))
print(np.linalg.norm(V_k_PM, 2).compute())
print(S_k_PM.compute())

In [None]:
print(np.diag((V_k@scaled_centered_array_other@V_k.T)))
print(np.linalg.norm(V_k, 2))
print(S_k)

In [None]:
plt.plot(S_o)
plt.plot(S, '+')


In [None]:
S_k

In [None]:
print(np.mean(np.abs(scaled_centered_array_other - U_o.dot(np.diag(S_o)).dot(V_o))))
print(np.mean(np.abs(scaled_centered_array_sym - U.dot(np.diag(S)).dot(V))))

In [None]:
L, V = np.linalg.eigh(scaled_centered_array_other)

In [None]:
U_k.shape

In [None]:
# A'.dot(U) 
U_1, S_1, V_1 = np.linalg.svd(scaled_centered_array.T.dot(U_k))

In [None]:
scaled_centered_array.T.dot(U_k).shape

In [None]:
U_1.shape

In [None]:
np.max(np.abs(scaled_centered_array_other - scaled_centered_array_other.T))

In [None]:
plt.plot(L[::-1])

In [None]:
V[:, 0]

In [None]:
plt.plot(PM.history['acc']['v-subspace']) 
plt.yscale('log')

In [None]:
plt.plot(V_k_PM.T.compute())
plt.plot(V_k.T)
plt.show()

In [None]:
#A'A 
#V_k eigenvector of (A'A)


In [None]:
test_PowerMethod_case1()

In [50]:
array = np.random.rand(1000, 1000)
mu = array.mean(axis=0)
std = np.diag(1/array.std(axis=0))
scaled_centered_array = (array-mu).dot(std)
scaled_centered_array = scaled_centered_array.dot(scaled_centered_array.T)
U, S, V = np.linalg.svd(scaled_centered_array, full_matrices=False)  # Ground Truth
for k in range(1, 10):
    U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :]

    PM = PowerMethod(k=k, tol=1e-2, scoring_method='rmse', max_iter=100, init_row_sampling_factor=1)
    U_k_PM, S_k_PM, V_k_PM = PM.svd(array)

    #np.testing.assert_array_almost_equal(S_k, S_k_PM)
    assert V_k.shape == V_k_PM.shape
    assert U_k.shape == U_k_PM.shape
    print(S_k, S_k_PM.compute())
    print(subspace_dist(V_k, V_k_PM, S_k_PM))
    print(subspace_dist(U_k, U_k_PM, S_k_PM))
    #np.testing.assert_almost_equal(subspace_dist(V_k, V_k_PM, S_k_PM), 0)
    #np.testing.assert_almost_equal(subspace_dist(U_k, U_k_PM, S_k_PM), 0)

[3981.80228617] [3981.54596818]
6.417089082333405e-14
0.0002798897469753481
[3981.80228617 3902.29416668] [3981.77142904 3901.38768147]
3.26405569239796e-14
0.0011630632237362715
[3981.80228617 3902.29416668 3850.02264476] [3981.78054518 3902.01110684 3849.32345376]
2.142730437526552e-14
0.0005984416000240911
[3981.80228617 3902.29416668 3850.02264476 3821.26097074] [3981.78457816 3902.24585525 3849.13467948 3820.70612613]
1.6431300764452317e-14
0.0006519215618882024
[3981.80228617 3902.29416668 3850.02264476 3821.26097074 3815.5483137 ] [3981.79120493 3902.27200865 3849.35793247 3820.73099261 3815.10406183]
1.3322676295501878e-14
0.0005307105297818193
[3981.80228617 3902.29416668 3850.02264476 3821.26097074 3815.5483137
 3782.14370029] [3981.79184233 3902.18809593 3849.56942379 3820.54691868 3814.78229922
 3781.80080132]
1.0880185641326534e-14
0.0007569293354001694
[3981.80228617 3902.29416668 3850.02264476 3821.26097074 3815.5483137
 3782.14370029 3742.16306485] [3981.79411951 3902.1

In [None]:
PM = PowerMethod(k=k, tol=1e-16, scoring_method='v-subspace', max_iter=100)
U_k_PM, S_k_PM, V_k_PM = PM.svd(array)

In [None]:
PM.history['acc']

In [None]:
((scaled_centered_array.T.dot(V_k_PM.T)) - V_k_PM.T.dot(np.diag(S_k))).compute()

In [None]:
plt.plot(U_k[:,0])
plt.plot(U_k_PM[:,0])

In [None]:
plt.plot(V_k.T[:,1])
plt.plot(V_k_PM.T[:,0])

In [None]:
np.linalg.svd(U_k.T.dot(U_k_PM))

In [None]:
a = da.array(np.random.rand(100000,100))

In [None]:
Q, R = da.linalg.tsqr(a)
Q_f, R_f = da.linalg.qr(a)

In [None]:
(np.abs(Q - Q_f)).mean().compute()

In [None]:
a = da.array(np.random.rand(1000,100))


In [None]:
U, S, V = da.linalg.tsqr(a, compute_svd=True)

In [None]:
U

In [175]:
n, p, k = 1000, 1000, 100
array = np.zeros((n, p))
i = np.arange(0,n)
ix = np.random.choice(i, k, replace=False)
iy = np.random.choice(i, k, replace=False)
iz = np.clip(1*np.random.normal(size=(k)), 0, 10)
array[ix, iy] = iz
array = array + .1*np.random.normal(size=(n, p))
array = array + array.T


In [195]:
U, S, V = np.linalg.svd(array)

In [211]:
PM = PowerMethod(k=10, tol=1e-6, scoring_method='q-vals', max_iter=1000)
U_k_PM, S_k_PM, V_k_PM = PM.svd(array)

In [212]:
_, L, _ = np.linalg.svd(U_k_PM.T.compute().dot(U[:, 0:10]))
print(L)

[0.99703567 0.99652347 0.99534357 0.99408236 0.99336468 0.99080704
 0.98968138 0.98273281 0.95408963 0.94644522]


In [213]:
_, L, _ = np.linalg.svd(V_k_PM.compute().dot(V[:, 0:10]))
print(L)

[0.17916469 0.15662706 0.13227742 0.11284952 0.09082899 0.07461324
 0.05218605 0.03684601 0.02745764 0.01108023]


In [152]:
def test_PowerMethod_case2():
    array = np.random.rand(100, 100)
    mu = array.mean(axis=0)
    std = np.diag(1/array.std(axis=0))
    scaled_centered_array = (array-mu).dot(std)
    scaled_centered_array = scaled_centered_array.dot(scaled_centered_array.T)
    U, S, V = np.linalg.svd(scaled_centered_array, full_matrices=False)  # Ground Truth
    k = 10
    U_k, S_k, V_k = U[:, :k], S[:k], V[:k, :]
    previous_error = float('inf')
    for t in np.logspace(0, -12, 20):

        PM = PowerMethod(k=k, tol=t, scoring_method='q-vals', max_iter=100)
        U_k_PM, S_k_PM, V_k_PM = PM.svd(array)

        assert np.linalg.norm(S_k-S_k_PM) <= previous_error
        previous_error = np.linalg.norm(S_k-S_k_PM)

In [84]:
test_PowerMethod_case1()

[3973.70470889 3911.22802025 3834.86346896 3822.59637709 3816.85008082
 3783.82972356 3764.07030912 3733.22321992 3691.21097468 3683.09834406]
1528.411942088105
3292.8637086936155
4856.703896858297
3690.5402481335436
2400.013931375379
1263.5830991114924
647.8195817207073
361.1877727069728
194.3577387189954
98.4033990412142
