# OuterSPACE spMspM accelerator

First, include some libraries

In [None]:
# Begin - startup boilerplate code

import pkgutil

if 'fibertree_bootstrap' not in [pkg.name for pkg in pkgutil.iter_modules()]:
  !python3 -m pip  install git+https://github.com/Fibertree-project/fibertree-bootstrap --quiet

# End - startup boilerplate code


from fibertree_bootstrap import *
fibertree_bootstrap()

## Control panel to control display behavior manually

In [None]:
# Uncomment the appropriate lines to change the default behavior

# FTD.showAnimations(True)      # Turn on animations
# FTD.showAnimations(False)     # Turn off animations

# FTD.setStyle(matrix=True)     # Show tensor as a matrix
# FTD.setStyle(matrix=False)    # Show tensor as a fiber tree


## Read matrices


In [None]:
a = Tensor.fromYAMLfile(os.path.join(data_dir, "sparse-matrix-a.yaml"))
b = Tensor.fromYAMLfile(os.path.join(data_dir, "sparse-matrix-b.yaml"))

# Transpose the "a" matrix as desired by the outer product traveral order
at = Tensor.fromFiber(["K", "M"], a.getRoot().swapRanks())

print("Input A")
displayTensor(a.setColor("blue"))

print("Input A - transposed")
displayTensor(at.setColor("blue"))

print("Input B")
displayTensor(b.setColor("green"))
    
z_verify = Tensor.fromYAMLfile(os.path.join(data_dir, "sparse-matrix-a_times_b.yaml"))

print("Result Z (precomputed)")
displayTensor(z_verify)

## Outer Product - Naive


In [None]:
z = Tensor(rank_ids=["M", "N"])

canvas = createCanvas(at, b, z)

at_k = at.getRoot()
b_k = b.getRoot()
z_m = z.getRoot()

for k, (at_m, b_n) in at_k & b_k:
    #print(f"    {k}, ({a_m}, {b_n})")

    for m, (z_n_ref, at_val) in  z_m << at_m:
        #print(f"        {m}, ({z_n_ref}, {at_val})")
        
        for n, (z_ref, b_val) in z_n_ref << b_n:
            #print(f"            {n}, ({z_ref}, {b_val})")
            z_ref += at_val * b_val
            canvas.addFrame((k, m), (k, n), (m, n))

print("Result Z (computed)")
displayTensor(z)
displayCanvas(canvas)

## Check result

In [None]:
z_verify == z

# OuterSPACE - Step 1

In [None]:
t = Tensor(rank_ids=["M", "K", "N"])

canvas =createCanvas(at, b, t)

at_k = at.getRoot()
b_k = b.getRoot()
t_m = t.getRoot()

for k, (at_m, b_n) in at_k & b_k:                # Parallelize here
    #print(f"    {k}, ({a_m}, {b_n})")

    for m, at_val in  at_m:
        #print(f"        {m}, {at_val})")

        temp_n = t_m.getPayloadRef(m, k)
        for n, (t_ref, b_val) in temp_n << b_n:
            #print(f"            {n}, ({z_ref}, {b_val})")
            t_ref += at_val * b_val
            canvas.addFrame((k, m), (k, n), (m, k, n))

print("Intermediate Output - T")
displayTensor(t)
displayCanvas(canvas)

# Outerspace - Step 2 - Serial

In [None]:
# t = Tensor(rank_ids=["M", "K", "N"]) - created in step 1 above
t.setColor("blue")

z = Tensor(rank_ids=["M", "N"])

print("Input Intermediate - T")
displayTensor(t)

canvas = createCanvas(t, z)

t_m = t.getRoot()
z_m = z.getRoot()

for m, (z_n, t_k) in z_m << t_m:
  for k, t_n in t_k:                          # Coords are all ordered
    for n, (z_ref, t_val) in z_n << t_n:
      z_ref += t_val
      canvas.addFrame((m, k, n), (m, n))

print("Tensor z")
displayTensor(z)
displayCanvas(canvas)

## Check result

In [None]:
z_verify == z

# Outerspace - Step 2 - Wide merge - Parallel (opportunity)

In [None]:
# t = Tensor(rank_ids=["M", "K", "N"]) - created in step 1 above
t.setColor("blue")

z = Tensor(rank_ids=["M", "N"])

print("Input Intermediate - T")
displayTensor(t)

canvas = createCanvas(t, z)

t_m = t.getRoot()
z_m = z.getRoot()


for m, (z_n, t_k) in z_m << t_m:

  print(f"\n\nt_k for coord m = {m}")
  displayTensor(t_k)
  tp_n = t_k.swapRanks() # The merge!
  print(f"Transposed nt_k for coord m = {m} -> tp_n")
  displayTensor(Tensor.fromFiber(["N", "K"], tp_n))

  for n, (z_ref, tp_k) in z_n << tp_n:          # Ordered coords
    print(f"Sum tp_k = {tp_k}")
    shards = []
    for k, tp_val in tp_k:                      # Parallel!
      z_ref += tp_val                           # Spatial reduction
      shards.append((m, k, n))

    #displayTensor(t, shards)
    canvas.addFrame(shards, (m, n))

  print(f"\nTensor z (for tp_n at coord m = {m} after all tp_k reductions)")
  displayTensor(z)

print("Final Result - Z")
displayTensor(z)
displayCanvas(canvas)

## Check result

In [None]:
z_verify == z

# Outerspace - Step 2 - Narrrow merge - Parallel (opportunity)

Implementation incomplete...

In [None]:
# t = Tensor(rank_ids=["M", "K", "N"]) - created in step 1 above

z = Tensor(rank_ids=["M", "N"])

displayTensor(t)

t_m = t.getRoot()
z_m = z.getRoot()

# Want to do logrithmic merge!!!

for m, (z_n, t_k) in z_m << t_m:
  displayTensor(t_k)
  t_k1 = t_k.splitEqual(2)
  displayTensor(t_k1)
  for k1, t_k0 in t_k1:
    print(f"\n\nt_k at coord m = {m} for split {k1}")
    displayTensor(t_k)
    tp_n = t_k0.swapRanks() # The merge!
    print(f"Transposed = tp_n")
    displayTensor(Tensor.fromFiber(["N", "K"], tp_n))

    for n, (z_ref, tp_k) in z_n << tp_n:          # Ordered coords
      print(f"tp_k = {tp_k}")
      for k, tp_val in tp_k:                      # Parallel!
        z_ref += tp_val                           # Spatial reduction

    print(f"\nTensor z (for tp_n for coord m = {m} after split {k1} tp_k reductions)")
    displayTensor(z)

print("Final tensor z")
displayTensor(z)


## Check result

In [None]:
z_verify == z

## Testing area

For running alternative algorithms

In [None]:
displayTensor(a)
asplit = a.getRoot().splitEqual(1)
displayTensor(asplit)