# Iterative algorithms for Sparse Tensors

## Power method for SVD 

In [1]:
import torch

In [5]:
def gen_sparse():
    Ai = torch.stack([torch.randint(7,(10,)), torch.randint(5,(10,))])
    Av =  torch.randn(10)
    A = torch.sparse_coo_tensor(Ai, Av, (7, 5))
    return A

A = gen_sparse()
A

tensor(indices=tensor([[1, 5, 0, 2, 2, 0, 5, 1, 2, 0],
                       [3, 2, 1, 1, 3, 2, 4, 2, 4, 1]]),
       values=tensor([ 0.5164,  1.0448,  0.9814, -0.7731, -0.2806,  0.1752,
                      -1.8242, -0.1353, -0.2924,  0.4695]),
       size=(7, 5), nnz=10, layout=torch.sparse_coo)

In [6]:
A.is_coalesced()

False

In [7]:
A = A.coalesce()
A

tensor(indices=tensor([[0, 0, 1, 1, 2, 2, 2, 5, 5],
                       [1, 2, 2, 3, 1, 3, 4, 2, 4]]),
       values=tensor([ 1.4509,  0.1752, -0.1353,  0.5164, -0.7731, -0.2806,
                      -0.2924,  1.0448, -1.8242]),
       size=(7, 5), nnz=9, layout=torch.sparse_coo)

In [8]:
A_dense = A.to_dense()
A_dense

tensor([[ 0.0000,  1.4509,  0.1752,  0.0000,  0.0000],
        [ 0.0000,  0.0000, -0.1353,  0.5164,  0.0000],
        [ 0.0000, -0.7731,  0.0000, -0.2806, -0.2924],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  1.0448,  0.0000, -1.8242],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]])

In [9]:
B = gen_sparse().coalesce()
B

tensor(indices=tensor([[1, 1, 1, 2, 2, 5, 6, 6],
                       [0, 1, 4, 0, 4, 3, 0, 3]]),
       values=tensor([-0.6735, -1.3761,  0.0919,  2.6513, -0.4857,  0.3119,
                      -1.9706,  4.4178]),
       size=(7, 5), nnz=8, layout=torch.sparse_coo)

In [10]:
AB = A + B
AB

tensor(indices=tensor([[0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 5, 5, 5, 6, 6],
                       [1, 2, 0, 1, 2, 3, 4, 0, 1, 3, 4, 2, 3, 4, 0, 3]]),
       values=tensor([ 1.4509,  0.1752, -0.6735, -1.3761, -0.1353,  0.5164,
                       0.0919,  2.6513, -0.7731, -0.2806, -0.7781,  1.0448,
                       0.3119, -1.8242, -1.9706,  4.4178]),
       size=(7, 5), nnz=16, layout=torch.sparse_coo)

In [11]:
AB.is_coalesced()

True

In [12]:
AT = A.t()
AT

tensor(indices=tensor([[1, 2, 2, 3, 1, 3, 4, 2, 4],
                       [0, 0, 1, 1, 2, 2, 2, 5, 5]]),
       values=tensor([ 1.4509,  0.1752, -0.1353,  0.5164, -0.7731, -0.2806,
                      -0.2924,  1.0448, -1.8242]),
       size=(5, 7), nnz=9, layout=torch.sparse_coo)

In [13]:
AT.is_coalesced()

False

In [14]:
ATA = torch.sparse.mm(AT, A)
ATA

tensor(indices=tensor([[1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4],
                       [4, 3, 2, 1, 4, 3, 2, 1, 4, 1, 3, 2, 2, 4, 3, 1]]),
       values=tensor([ 0.2260,  0.2169,  0.2542,  2.7028, -1.9060, -0.0699,
                       1.1407,  0.2542,  0.0820,  0.2169,  0.3454, -0.0699,
                      -1.9060,  3.4132,  0.0820,  0.2260]),
       size=(5, 5), nnz=16, layout=torch.sparse_coo)

In [15]:
AT @ A_dense

tensor([[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  2.7028,  0.2542,  0.2169,  0.2260],
        [ 0.0000,  0.2542,  1.1407, -0.0699, -1.9060],
        [ 0.0000,  0.2169, -0.0699,  0.3454,  0.0820],
        [ 0.0000,  0.2260, -1.9060,  0.0820,  3.4132]])

In [16]:
A_dense + B

tensor([[ 0.0000,  1.4509,  0.1752,  0.0000,  0.0000],
        [-0.6735, -1.3761, -0.1353,  0.5164,  0.0919],
        [ 2.6513, -0.7731,  0.0000, -0.2806, -0.7781],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  1.0448,  0.3119, -1.8242],
        [-1.9706,  0.0000,  0.0000,  4.4178,  0.0000]])

In [17]:
U,S,V = torch.svd_lowrank(A, 2)
S

tensor([2.1218, 1.6610])

In [18]:
V

tensor([[ 1.5771e-11,  1.8626e-09],
        [ 4.3301e-02, -9.8822e-01],
        [-4.9058e-01, -1.2839e-01],
        [ 2.7824e-02, -8.0726e-02],
        [ 8.6987e-01, -2.0632e-02]])

In [19]:
U

tensor([[-0.0109, -0.8767],
        [ 0.0381, -0.0146],
        [-0.1393,  0.4772],
        [ 0.0000,  0.0000],
        [ 0.0000,  0.0000],
        [-0.9895, -0.0581],
        [ 0.0000,  0.0000]])

In [20]:
torch.dist(A_dense, U @ torch.diag_embed(S) @ V.t())

tensor(0.5841)

In [21]:
U_full,S_full,V_full = torch.svd_lowrank(A, 5)
print(S_full)
torch.dist(A_dense, U_full @ torch.diag_embed(S_full) @ V_full.t())

tensor([2.1218, 1.6610, 0.5755, 0.0995, 0.0000])


tensor(7.9630e-07)

In [22]:
v0 = V.t()[0]
v0

tensor([ 1.5771e-11,  4.3301e-02, -4.9058e-01,  2.7824e-02,  8.6987e-01])

In [23]:
ATA @ v0

tensor([ 0.0000,  0.1950, -2.2085,  0.1246,  3.9161])

In [24]:
 S[0].pow(2) * v0

tensor([ 7.1001e-11,  1.9494e-01, -2.2085e+00,  1.2526e-01,  3.9161e+00])

In [25]:
torch.dist(ATA @ v0, S[0].pow(2) * v0)

tensor(0.0006)

In [26]:
v = torch.randn(5)
v /= v.norm()
v

tensor([ 0.5650, -0.5829,  0.4306,  0.0756, -0.3873])

In [53]:
v = ATA @ v
v /= v.norm()
v

tensor([ 0.0000, -0.0433,  0.4906, -0.0277, -0.8699])

In [54]:
torch.dist(v,-v0)

tensor(0.0002)

In [55]:
vp = ATA @ v
vp

tensor([ 0.0000, -0.1950,  2.2085, -0.1246, -3.9161])

In [56]:
s = torch.sqrt(vp.norm())
s

tensor(2.1218)

In [57]:
torch.dist(S[0],s)

tensor(2.3842e-07)

In [58]:
u = (A @ v) / s
u

tensor([ 0.0109, -0.0380,  0.1393,  0.0000,  0.0000,  0.9895,  0.0000])

In [60]:
u0 = U.t()[0]
torch.dist(-u0, u)

tensor(4.1069e-05)

In [61]:
uOv = torch.outer(u,v)
uOv

tensor([[ 0.0000e+00, -4.7168e-04,  5.3418e-03, -3.0137e-04, -9.4724e-03],
        [-0.0000e+00,  1.6467e-03, -1.8649e-02,  1.0521e-03,  3.3069e-02],
        [ 0.0000e+00, -6.0346e-03,  6.8342e-02, -3.8557e-03, -1.2119e-01],
        [ 0.0000e+00, -0.0000e+00,  0.0000e+00, -0.0000e+00, -0.0000e+00],
        [ 0.0000e+00, -0.0000e+00,  0.0000e+00, -0.0000e+00, -0.0000e+00],
        [ 0.0000e+00, -4.2860e-02,  4.8539e-01, -2.7385e-02, -8.6072e-01],
        [ 0.0000e+00, -0.0000e+00,  0.0000e+00, -0.0000e+00, -0.0000e+00]])

In [62]:
uOv = uOv.to_sparse()
uOv

tensor(indices=tensor([[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 5, 5, 5, 5],
                       [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]]),
       values=tensor([-4.7168e-04,  5.3418e-03, -3.0137e-04, -9.4724e-03,
                       1.6467e-03, -1.8649e-02,  1.0521e-03,  3.3069e-02,
                      -6.0346e-03,  6.8342e-02, -3.8557e-03, -1.2119e-01,
                      -4.2860e-02,  4.8539e-01, -2.7385e-02, -8.6072e-01]),
       size=(7, 5), nnz=16, layout=torch.sparse_coo)

In [63]:
Ap = A - (uOv * s)
Ap

tensor(indices=tensor([[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 5, 5, 5, 5],
                       [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]]),
       values=tensor([ 1.4519e+00,  1.6388e-01,  6.3944e-04,  2.0098e-02,
                      -3.4939e-03, -9.5716e-02,  5.1421e-01, -7.0164e-02,
                      -7.6028e-01, -1.4501e-01, -2.7241e-01, -3.5249e-02,
                       9.0939e-02,  1.4936e-02,  5.8104e-02,  2.0462e-03]),
       size=(7, 5), nnz=16, layout=torch.sparse_coo)

In [67]:
def svd1D(A, tol=1e-6, max_iter=50):
    currentV = torch.randn(5)
    currentV /= currentV.norm()
    lastV = torch.zeros_like(currentV)
    ATA = torch.sparse.mm(A.t(), A)
    
    iterations = 0
    while torch.dist(currentV,lastV) > tol :
        iterations += 1
        lastV = currentV
        currentV = ATA @ lastV
        currentV /= currentV.norm()

        if iterations > max_iter:
            print(f"SVD failed to converge in {iterations} iterations:") 
            print(f"{torch.dist(currentV,lastV)} > {tol}")
            break
            
    return currentV

In [68]:
vp = svd1D(Ap)

In [70]:
torch.dist(vp, -V.t()[1])

tensor(0.0051)

In [71]:
def get_singular_value(ATA, v):
    s2v = ATA @ v
    s = torch.sqrt(s2v.norm())
    return s
sp = get_singular_value(ATA, vp)
sp

tensor(1.6610)

In [72]:
S[1]

tensor(1.6610)

In [74]:
up = (A @ vp) / sp
up

tensor([ 0.8764,  0.0163, -0.4778,  0.0000,  0.0000,  0.0583,  0.0000])

In [76]:
torch.dist(up, -U.t()[1])

tensor(0.0018)

## Conjugate Gradient Method

In [77]:
A = torch.eye(5) + 0.05 * ATA.to_dense()
print(A)
torch.linalg.det(A)

tensor([[ 1.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  1.1351,  0.0127,  0.0108,  0.0113],
        [ 0.0000,  0.0127,  1.0570, -0.0035, -0.0953],
        [ 0.0000,  0.0108, -0.0035,  1.0173,  0.0041],
        [ 0.0000,  0.0113, -0.0953,  0.0041,  1.1707]])


tensor(1.4179)

In [78]:
A = A.to_sparse()
A

tensor(indices=tensor([[0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4],
                       [0, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]]),
       values=tensor([ 1.0000,  1.1351,  0.0127,  0.0108,  0.0113,  0.0127,
                       1.0570, -0.0035, -0.0953,  0.0108, -0.0035,  1.0173,
                       0.0041,  0.0113, -0.0953,  0.0041,  1.1707]),
       size=(5, 5), nnz=17, layout=torch.sparse_coo)

In [79]:
b = torch.randn(5)
x = torch.randn(5)
b

tensor([ 1.3225,  0.7097, -1.1266, -0.7852,  1.2434])

In [80]:
def conjgrad(A,b,x):
    r = b - A @ x
    p = r
    rsold = r.t() @ r
    n = b.numel()
    for i in range(n):
        Ap = A @ p
        alpha = rsold / (p.t() @ Ap)
        x = x + alpha * p
        r = r - alpha * Ap
        rsnew = r.t() @ r
        if torch.sqrt(rsnew) < 1e-6:
              break
        p = r + (rsnew / rsold) * p
        rsold = rsnew
    return x

In [83]:
x0 = conjgrad(A,b,x)
x0

tensor([ 1.3225,  0.6340, -0.9878, -0.7859,  0.9783])

In [84]:
Ad = A.to_dense()

In [85]:
torch.linalg.inv(Ad)

tensor([[ 1.0000,  0.0000,  0.0000,  0.0000, -0.0000],
        [ 0.0000,  0.8813, -0.0115, -0.0094, -0.0094],
        [ 0.0000, -0.0115,  0.9532,  0.0031,  0.0777],
        [ 0.0000, -0.0094,  0.0031,  0.9831, -0.0031],
        [ 0.0000, -0.0094,  0.0777, -0.0031,  0.8606]])

In [86]:
x00 = torch.linalg.inv(Ad) @ b

In [87]:
torch.dist(x00,x0)

tensor(2.7957e-07)