In [None]:
from ExTensorClasses import DRAMIntersector
from ExTensorClasses import LLBIntersector
from ExTensorClasses import PEArray
from ExTensorClasses import PEIntersector
from ExTensorClasses import CSFNode
from ExTensorClasses import coo_to_csf
from ExTensorClasses import print_csf_tree
import numpy as np
from scipy.sparse import coo_matrix
from threading import Thread
from threading import Event

In [None]:
LLB_TILE_SIZE = 50
PE_TILE_SIZE = 25
NUM_PES = 128
I = 100
K = 100
J = 100
NUM_INTS = 100
MODE = "Skip"

# Options: NoMerge, Skip, NoSkip

endFlag = True
dramIntersector = DRAMIntersector("NoMerge")
llbIntersector = LLBIntersector(MODE, LLB_TILE_SIZE, PE_TILE_SIZE)
peArray = PEArray(NUM_PES)
peIntersectorList = []
for x in range(NUM_PES):
    peIntersectorList.append(PEIntersector(MODE, LLB_TILE_SIZE, min(PE_TILE_SIZE, NUM_INTS),x))
dramIntersector.setNext(llbIntersector)
llbIntersector.setNext(peArray)
peArray.setNext(peIntersectorList)



gen = np.random.default_rng()
data1 = gen.integers(1,10,NUM_INTS)
row1 = gen.integers(0,I,NUM_INTS)
col1 = gen.integers(0,K,NUM_INTS)

data2 = gen.integers(1,10,NUM_INTS)
row2 = gen.integers(0,K,NUM_INTS)
col2 = gen.integers(0,J,NUM_INTS)
i1 = coo_matrix((data1, (row1, col1)), shape=(I, K))
i2 = coo_matrix((data2, (row2, col2)), shape=(K, J))

input1 = coo_to_csf(i1,LLB_TILE_SIZE,LLB_TILE_SIZE,PE_TILE_SIZE,PE_TILE_SIZE,False)
input2 = coo_to_csf(i2,LLB_TILE_SIZE,LLB_TILE_SIZE,PE_TILE_SIZE,PE_TILE_SIZE,True)


In [None]:
dramIntersector.input(input1, input2, K, LLB_TILE_SIZE)

dIEvent = Event()
dIEvent.set()
Thread(target=dramIntersector.running,args=[dIEvent]).start()

lIEvent = Event()
lIEvent.set()
Thread(target=llbIntersector.running,args=[lIEvent]).start()

peAEvent = Event()
peAEvent.set()
Thread(target=peArray.running,args=[peAEvent]).start()

#peThreadList = [None] * NUM_PES
#peEventList = [None] * NUM_PES
#for x in range(NUM_PES):
#    peEventList[x] = Event()
#    peEventList[x].set()
#    Thread(target=peIntersectorList[x].running,args=[peEventList[x]]).start()
count = 0
while endFlag:
    count += 1
    dIEvent.clear()
    lIEvent.clear()
    peAEvent.clear()
    #for event in peEventList:
    #    event.clear()

    if not dramIntersector.endFlag:
        dIEvent.wait()
    else:
        dramIntersector.numEmptyCycles += 1
    if not llbIntersector.endFlag:
        lIEvent.wait()
    if not peArray.endFlag:
        peAEvent.wait()
    #for pe in range(NUM_PES):
     #   if peIntersectorList[pe].endFlag:
     #       peIntersectorList[pe].numEmptyCycles += 1 # if the PE is at EOF but the overall processing isn't finished, increment the number of wasted cycles

    endFlag = not (dramIntersector.endFlag and llbIntersector.endFlag and peArray.endFlag)
    for x in range(NUM_PES):
        endFlag = endFlag or not peIntersectorList[x].endFlag
    
print("TOTAL CYCLES TAKEN: ", count)
total = 0
for x in range(NUM_PES):
    print("PE#" + str(x) + " Hardware Utilization:" + '%.2f' % ((1 -round(peIntersectorList[x].numEmptyCycles/count,4)) * 100) + "%")
    total += (1 -round(peIntersectorList[x].numEmptyCycles/count,4)) * 100
    
print("Average PE Hardware Utilization: " + str(total/NUM_PES))

print("LLB Intersector Hardware Utilization (Including memory-related wasted cycles):" + '%.2f' % ((1 -round(llbIntersector.numEmptyCycles/count,4)) * 100) + "%")
print("DRAM Intersector Hardware Utilization (Including memory-related wasted cycles):" + '%.2f' % ((1 -round(dramIntersector.numEmptyCycles/count,4)) * 100) + "%")

print("LLB Memory Usage:", llbIntersector.memoryAccessBytes, "LLB memory wasted cycles: ", llbIntersector.memoryWastedCycles )
print("DRAM Memory Usage:", dramIntersector.memoryAccessBytes, "DRAM memory wasted cycles: ", dramIntersector.memoryWastedCycles )

In [None]:
r = []
c = []
v = []
for i,output in enumerate(peIntersectorList):
    for o in output.output:
        v.append(o[0])
        r.append(o[1])
        c.append(o[2])
    
est = coo_matrix((v,(r,c)),(I,K)).toarray()
actual = np.matmul(i1.toarray(),i2.toarray())
print(est)
print(actual)
print(np.equal(actual, est))
print(np.allclose(actual,est,0.0001,0.0001))

In [None]:
def coo_to_small_csf(coo_matrix):
    root = CSFNode(None)

    for i, j, data in zip(coo_matrix.row, coo_matrix.col, coo_matrix.data):
        current_node = root
        
        # Traverse the tree based on the coordinates
        # NOTICE: b comes before a because THE LLB TILES ARE COLUMN MAJOR, NOT ROW MAJOR. BECAUSE THE MULTIPLICATION IS B-STATIONARY, THE MATRIX MUST BE COL MAJOR FOR THE FIRST TWO COORDINATES
        # WHEN DOING A-STATIONARY, WE WANT BOTH AS ROW-MAJOR, AND WHEN OUTPUT STATIONARY WE WANT A AS ROW MAJOR, AND B AS COL MAJOR
        for coord in [j, i]:
            if coord not in current_node.children:
                current_node.children[coord] = CSFNode(None)
            current_node = current_node.children[coord]

        # Set the leaf node value to the data value
        if current_node.value != None:
            current_node.value += data
        else:
            current_node.value = data

    return root

In [None]:
"""sumary_line



LLB_TILE_SIZE = 2
PE_TILE_SIZE = 1
NUM_PES = 15

data1 = [1,1,1,1,1]
row1 = [1, 2, 3, 4, 0]
col1 = [1, 1, 1, 2, 4]

data2 = [1,1,1,1,1]
row2 = [1, 1, 3, 2, 0]
col2 = [1, 2, 1, 2, 4]
i1 = coo_matrix((data1, (row1, col1)), shape=(5, 5))
i2 = coo_matrix((data2, (row2, col2)), shape=(5, 5))

input1 = coo_to_small_csf(i1)
input2 = coo_to_small_csf(i2)

LLB_TILE_SIZE = 100
PE_TILE_SIZE = 50
NUM_PES = 15

endFlag = True
dramIntersector = DRAMIntersector(True)
llbIntersector = LLBIntersector(True)
peArray = PEArray(NUM_PES)
peIntersectorList = []
for x in range(NUM_PES):
    peIntersectorList.append(PEIntersector(True, LLB_TILE_SIZE, PE_TILE_SIZE,x))
dramIntersector.setNext(llbIntersector)
llbIntersector.setNext(peArray)
peArray.setNext(peIntersectorList)

dramIntersector.input(input1,input2)
print(i1.toarray())
print(i2.toarray())
print_csf_tree(input1, depth=0)
print_csf_tree(input2, depth=0)
"""

In [None]:
#print(dramIntersector)
#dramIntersector.cycle()
#print(dramIntersector)