In [37]:
from ExTensorClasses import DRAMIntersector
from ExTensorClasses import LLBIntersector
from ExTensorClasses import PEArray
from ExTensorClasses import PEIntersector
from ExTensorClasses import CSFNode
from ExTensorClasses import coo_to_csf
from ExTensorClasses import print_csf_tree
import numpy as np
from scipy.sparse import coo_matrix
from threading import Thread
from threading import Event

In [41]:
LLB_TILE_SIZE = 50
PE_TILE_SIZE = 25
NUM_PES = 120

endFlag = True
dramIntersector = DRAMIntersector(True)
llbIntersector = LLBIntersector(True)
peArray = PEArray(NUM_PES)
peIntersectorList = []
for x in range(NUM_PES):
    peIntersectorList.append(PEIntersector(True, LLB_TILE_SIZE, PE_TILE_SIZE,x))
dramIntersector.setNext(llbIntersector)
llbIntersector.setNext(peArray)
peArray.setNext(peIntersectorList)



gen = np.random.default_rng()
data1 = gen.integers(1,100,10000)
row1 = gen.integers(0,1000,10000)
col1 = gen.integers(0,1000,10000)

data2 = gen.integers(1,100,10000)
row2 = gen.integers(0,1000,10000)
col2 = gen.integers(0,1000,10000)
i1 = coo_matrix((data1, (row1, col1)), shape=(1000, 1000))
i2 = coo_matrix((data2, (row2, col2)), shape=(1000, 1000))

input1 = coo_to_csf(i1,LLB_TILE_SIZE,LLB_TILE_SIZE,PE_TILE_SIZE,PE_TILE_SIZE,False)
input2 = coo_to_csf(i2,LLB_TILE_SIZE,LLB_TILE_SIZE,PE_TILE_SIZE,PE_TILE_SIZE,True)


In [42]:
dramIntersector.input(input1, input2)

dIEvent = Event()
dIEvent.set()
Thread(target=dramIntersector.running,args=[dIEvent]).start()

lIEvent = Event()
lIEvent.set()
Thread(target=llbIntersector.running,args=[lIEvent]).start()

peAEvent = Event()
peAEvent.set()
Thread(target=peArray.running,args=[peAEvent]).start()

peThreadList = [None] * NUM_PES
peEventList = [None] * NUM_PES
for x in range(NUM_PES):
    peEventList[x] = Event()
    peEventList[x].set()
    Thread(target=peIntersectorList[x].running,args=[peEventList[x]]).start()
count = 0
while endFlag:
    count += 1
    dIEvent.clear()
    lIEvent.clear()
    peAEvent.clear()
    for event in peEventList:
        event.clear()

    if not dramIntersector.endFlag:
        dIEvent.wait()
    if not llbIntersector.endFlag:
        lIEvent.wait()
    if not peArray.endFlag:
        peAEvent.wait()
    for pe in range(NUM_PES):
        if not peIntersectorList[pe].endFlag:
            peEventList[pe].wait()

    endFlag = not (dramIntersector.endFlag and llbIntersector.endFlag and peArray.endFlag)
    for x in range(NUM_PES):
        endFlag = endFlag or not peIntersectorList[x].endFlag
print(count)
for x in range(NUM_PES):
    print("PE#" + str(x) + " Wasted Cycles:" + str(peIntersectorList[x].numEmptyCycles))

269551
PE#0 Wasted Cycles:3
PE#1 Wasted Cycles:3
PE#2 Wasted Cycles:4
PE#3 Wasted Cycles:5
PE#4 Wasted Cycles:6
PE#5 Wasted Cycles:8
PE#6 Wasted Cycles:8
PE#7 Wasted Cycles:10
PE#8 Wasted Cycles:10
PE#9 Wasted Cycles:12
PE#10 Wasted Cycles:13
PE#11 Wasted Cycles:14
PE#12 Wasted Cycles:15
PE#13 Wasted Cycles:16
PE#14 Wasted Cycles:17


In [36]:
r = []
c = []
v = []
for i,output in enumerate(peIntersectorList):
    for o in output.output:
        v.append(o[0])
        r.append(o[1])
        c.append(o[2])
    
est = coo_matrix((v,(r,c)),(1000,1000)).toarray()
actual = np.matmul(i1.toarray(),i2.toarray())
print(est)
print(actual)
print(np.equal(actual, est))
print(np.allclose(actual,est,0.0001,0.0001))

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 ...
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]]
True


In [27]:
def coo_to_small_csf(coo_matrix):
    root = CSFNode(None)

    for i, j, data in zip(coo_matrix.row, coo_matrix.col, coo_matrix.data):
        current_node = root
        
        # Traverse the tree based on the coordinates
        # NOTICE: b comes before a because THE LLB TILES ARE COLUMN MAJOR, NOT ROW MAJOR. BECAUSE THE MULTIPLICATION IS B-STATIONARY, THE MATRIX MUST BE COL MAJOR FOR THE FIRST TWO COORDINATES
        # WHEN DOING A-STATIONARY, WE WANT BOTH AS ROW-MAJOR, AND WHEN OUTPUT STATIONARY WE WANT A AS ROW MAJOR, AND B AS COL MAJOR
        for coord in [j, i]:
            if coord not in current_node.children:
                current_node.children[coord] = CSFNode(None)
            current_node = current_node.children[coord]

        # Set the leaf node value to the data value
        if current_node.value != None:
            current_node.value += data
        else:
            current_node.value = data

    return root

In [28]:
LLB_TILE_SIZE = 2
PE_TILE_SIZE = 1
NUM_PES = 15

data1 = [1,1,1,1,1]
row1 = [1, 2, 3, 4, 0]
col1 = [1, 1, 1, 2, 4]

data2 = [1,1,1,1,1]
row2 = [1, 1, 3, 2, 0]
col2 = [1, 2, 1, 2, 4]
i1 = coo_matrix((data1, (row1, col1)), shape=(5, 5))
i2 = coo_matrix((data2, (row2, col2)), shape=(5, 5))

input1 = coo_to_small_csf(i1)
input2 = coo_to_small_csf(i2)

LLB_TILE_SIZE = 100
PE_TILE_SIZE = 50
NUM_PES = 15

endFlag = True
dramIntersector = DRAMIntersector(True)
llbIntersector = LLBIntersector(True)
peArray = PEArray(NUM_PES)
peIntersectorList = []
for x in range(NUM_PES):
    peIntersectorList.append(PEIntersector(True, LLB_TILE_SIZE, PE_TILE_SIZE,x))
dramIntersector.setNext(llbIntersector)
llbIntersector.setNext(peArray)
peArray.setNext(peIntersectorList)

dramIntersector.input(input1,input2)
print(i1.toarray())
print(i2.toarray())
print_csf_tree(input1, depth=0)
print_csf_tree(input2, depth=0)

[[0 0 0 0 1]
 [0 1 0 0 0]
 [0 1 0 0 0]
 [0 1 0 0 0]
 [0 0 1 0 0]]
[[0 0 0 0 1]
 [0 1 1 0 0]
 [0 0 1 0 0]
 [0 1 0 0 0]
 [0 0 0 0 0]]
Coordinate 1:
  Coordinate 1:
    Leaf: 1
  Coordinate 2:
    Leaf: 1
  Coordinate 3:
    Leaf: 1
Coordinate 2:
  Coordinate 4:
    Leaf: 1
Coordinate 4:
  Coordinate 0:
    Leaf: 1
Coordinate 1:
  Coordinate 1:
    Leaf: 1
  Coordinate 3:
    Leaf: 1
Coordinate 2:
  Coordinate 1:
    Leaf: 1
  Coordinate 2:
    Leaf: 1
Coordinate 4:
  Coordinate 0:
    Leaf: 1


In [29]:
print(dramIntersector)
dramIntersector.cycle()
print(dramIntersector)

DRAM StreamOne: [], StreamTwo: [], endFlag: False
DRAM StreamOne: [4, 2], StreamTwo: [3], endFlag: False
