# Stationary Iterative Methods

In [None]:
import torch

In [None]:
!git clone https://github.com/grinisrit/noa.git

In [None]:
!mkdir -p build

In [None]:
noa_location = 'noa'
from torch.utils.cpp_extension import load

## Jacobi method

[Wiki](https://en.wikipedia.org/wiki/Jacobi_method) is the reference for that.

You need the following files:
* stationary-methods.hh
* jacobi-method.cc
* jacobi-method.cu

In [None]:
jacobi_method = load(name='jacobi_method',
             build_directory='./build',
             sources=['jacobi-method.cc'],
             extra_include_paths=[f'{noa_location}/src', '.'],    
             extra_cflags=['-O3 -std=c++17 -fopenmp'],
             verbose=True)

In [None]:
jacobi_method_cuda = load(name='jacobi_method_cuda',
             build_directory='./build',
             sources=['jacobi-method.cu'],
             extra_include_paths=[f'{noa_location}/src', '.'],    
             extra_cflags=['-O3 -std=c++17'],
             extra_cuda_cflags=['-std=c++17 --expt-relaxed-constexpr --expt-extended-lambda'],
             verbose=True)  if torch.cuda.is_available() else None

In [None]:
def generate_tridiagonal(n, l, d, u):
    c = torch.tensor([-1,0,1]).repeat(n)
    r = torch.arange(n).repeat_interleave(3)
    cr = c + r
    rows = r[1:-1]
    cols = cr[1:-1]
    vals = torch.tensor([l, d ,u]).repeat(n)[1:-1]
    Ai = torch.stack([rows, cols])
    A = torch.sparse_coo_tensor(Ai, vals, (n,n))
    return A

In [None]:
n = 100000
A = generate_tridiagonal(n, -0.5, 2.5, -1.5)
A

In [None]:
Ad = A.to_dense()
Ad 

In [None]:
Acsr = A.to_sparse_csr()
Acsr

In [None]:
x0 = torch.ones(n)
b = Acsr @ x0
b[:5]

In [None]:
crow_indices = Acsr.crow_indices().int()
col_indices = Acsr.col_indices().int()
values =  Acsr.values()

In [None]:
x = jacobi_method.solve(crow_indices, col_indices, values, b)
torch.dist(x,x0)/n

In [None]:
crow_indices_cu = crow_indices.cuda()
col_indices_cu = col_indices.cuda()
values_cu =  values.cuda()
b_cu = b.cuda()

In [None]:
x_cu = jacobi_method_cuda.solve(crow_indices_cu, col_indices_cu, values_cu, b_cu)
torch.dist(x_cu.cpu(), x0)/n

In [None]:
%timeit jacobi_method.solve(crow_indices, col_indices, values, b)

In [None]:
%timeit jacobi_method_cuda.solve(crow_indices_cu, col_indices_cu, values_cu, b_cu)