In [None]:
# Run boilerplate code to set up environment

%run ../prelude.py

# Triangular solver

Based on scipy's [scipy.linalg.solve_triangular](https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.solve_triangular.html)

_Note:_ this still needs a little work, because it's all still uncompressed. Also, the special-casing of case j = 0 and j > 0 is very awkward.

In [None]:
x = [[3, 2, 1, 1],
     [0, 1, 0, 1],
     [0, 0, 1, 1],
     [0, 0, 0, 1]]

N = 4

b = [4,
     2,
     4,
     2]

A_MN = Tensor.fromUncompressed(["M", "N"], x).setName("A")
B_N = Tensor.fromUncompressed(["N"], b).setName("b")

B_check = Tensor.fromUncompressed(["N"], b).setName("b check")


A = A_MN.getRoot()
# hack to induce coordinates in A where there are zeros,
# and allowing us to use square brackets (i.e. __getitem__)
# because positions == coordinates
A << Fiber(coords=range(N))
for j, a_j in A:
    a_j << Fiber(coords=range(N))

B = B_N.getRoot()

displayTensor(A)
displayTensor(B)

## Pipelined triangular solver

`P` is a representation of pipelined communication; each row corresponds to the data passed from the previous stage to the next. Each row is N elements wide (same as B).

In [None]:
# create "pipes" between each stage: N-entry tensors
# (can't simply do a rank-2 tensor because I'd have multiple entries for addActivity for that tensor, and
# current pipelining API doesn't really handle that
P = [Tensor(rank_ids=["N"], shape=[N]).setName("pipe-{}".format(i)) for i in range(N)]

C = Tensor(rank_ids=["N"], shape=[N]).setName("output")
C.getRoot() << Fiber(coords=range(N))

canvas = createCanvas(A_MN, B_N, *P, C, enable_wait=True)

cycle = 0
stage_delay = 1

# "wait" means, for each key-value pair in the supplied dictionary,
# we must wait `value` cycles when the `key`th argument in the canvas has been updated 
# in order to add our own activity

for j in range(N):
    # in the first row, read from B, rather than P
    # also, skew these starts by cycle, as these input elements are piped in one after another
    # i_act and j_act provide pair-wise activity locations for adjacent stages in the pipeline
    # e.g., [(j,), (j,), ()] then [(), (j,), (j,)] then [(), (), (j,)]
    j_act = [(j,) if j == jj or j-1 == jj else [] for jj in range(N)]
    # print(j_act)
    
    if j == 0:
        b_val = B.getPayload(j)
        p_j = P[0].getPayloadRef(j)
        p_j <<= b_val / A.getPayload(j, j)
        j_val = p_j
        addActivity(canvas, (j, j), (j,), *j_act, worker=str(j), skew=cycle)
        cycle += 1
    else:
        p_j = P[j-1].getPayload(j)
        p_jj = P[j].getPayloadRef(j)
        p_jj <<= p_j / A.getPayload(j, j)
        j_val = p_jj
        addActivity(canvas, (j, j), [], *j_act, worker=str(j), wait={j+1:stage_delay})
    
    
    for i in range(j+1, N):
        # in the first row, read from B, rather than P
        i_act = [(i,) if j == ii or j-1 == ii else [] for ii in range(N)]
        i_val = B[i] if j == 0 else P[j-1].getPayload(i)
        
        p_j_i = P[j].getPayloadRef(i)
        p_j_i <<= i_val - j_val * A.getPayload(j, i)
        # print("j=", j, "i=", i, "p_j_i=", p_j_i)

        if j == 0:
            addActivity(canvas, (j, i), (i,), *i_act, worker=str(j), skew=cycle)
            cycle += 1
        else:
            addActivity(canvas, (j, i), [], *i_act, worker=str(j), wait={j+1:stage_delay})
            

for j in range(N):
    c_j = C.getPayloadRef(j)
    c_j <<= P[j].getPayload(j)
    j_act = [(j,) if j == jj else [] for jj in range(N)]
    addActivity(canvas, [], [], *j_act, (j,), wait={2+j:stage_delay})
            
displayCanvas(canvas)


In [None]:
# final result
displayTensor(C)

## Check results

Check the result by performing the dot product of `A` and `x` and ensuring that equals the `b` we started with. (The jth entry of `b` corresponds to the dot product of the jth _column_ of `A` and `x`.)

In [None]:
B2 = Fiber(coords=range(N), initial=0)
# displayTensor(B2)

canvas2 = createCanvas(A, C, B2)
for j in range(N):
    for i in range(N):
        # print(j, i, a_pay[i], x_val, b2_ref)
        b2_ref = B2.getPayloadRef(j)
        x_val = C.getPayload(i)
        b2_ref += A.getPayload(i,j) * x_val 
        addActivity(canvas2, (i,j), (i,), (i,))
        
displayCanvas(canvas2)

In [None]:
# check results
B == B2

# [1.33 -0.66 2.66 -1.33]