In [None]:
from MatRaptorClasses import PE
from MatRaptorClasses import csr_to_c2sr
from MatRaptorClasses import SpAL
from MatRaptorClasses import SpBL
import numpy as np
from scipy.sparse import csr_matrix, coo_matrix
import time
from threading import Thread
from threading import Event

In [None]:
NUM_CHANNELS = 5
NUM_QUEUES = 10
endFlag = True
#dramIntersector = DRAMIntersector(True)
#llbIntersector = LLBIntersector(True)
#peArray = PEArray(12)
#peIntersectorList = []
#for x in range(0,12):
#    peIntersectorList.append(PEIntersector(True, LLB_TILE_SIZE, PE_TILE_SIZE,x))
#dramIntersector.setNext(llbIntersector)
#llbIntersector.setNext(peArray)
#peArray.setNext(peIntersectorList)


gen = np.random.default_rng()
data1 = gen.integers(1,100,1000)
row1 = gen.integers(0,1000,1000)
col1 = gen.integers(0,1000,1000)

data2 = gen.integers(1,100,1000)
row2 = gen.integers(0,1000,1000)
col2 = gen.integers(0,1000,1000)
i1 = csr_matrix(coo_matrix((data1, (row1, col1)), shape=(1000, 1000)).toarray())
i2 = csr_matrix(coo_matrix((data2, (row2, col2)), shape=(1000, 1000)).toarray())

inputA = csr_to_c2sr(i1.data,i1.indices,i1.indptr,NUM_CHANNELS)
inputB = csr_to_c2sr(i2.data,i2.indices,i2.indptr,NUM_CHANNELS)
    
print(i1.toarray())
print(i2.toarray())



In [None]:
# Code going here: have each individual set of (SpAL, SpBL, PE) in a thread
# remember that the events are used ONLY to wait for other PEs to stop
# Best idea might be to create a very simple wrapper class for all three of them to increase clarity
# We can basically have a wrapper class with an SpAL, SpBL, PE, as well as a "running" method that is used in the multithreading, and cycles
# each part once every time its allowed!

class Wrapper:
    def __init__(self, SpAL, SpBL, PE) -> None:
        self.SpAL = SpAL
        self.SpBL = SpBL
        self.PE = PE
        self.endFlag = False
    
    
    def running(self, event):
        while not self.endFlag:
            time.sleep(0.0001)  
            if not event.is_set():
                self.PE.cycle()
                self.SpBL.cycle()
                self.SpAL.cycle()
                self.endFlag = self.PE.endFlag and self.SpAL.endFlag and self.SpBL.endFlag
                event.set()
        event.set()
        
WrapperArray = []
WrapperEventArray = []
for x in range(NUM_CHANNELS):
    A = SpAL(x, NUM_CHANNELS)
    B = SpBL(x)
    P = PE(10,x)
    
    A.setNext(B)
    B.setNext(P)
    
    A.loadMatrixA(inputA)
    B.loadMatrixB(inputB)
    
    W = Wrapper(A, B, P)
    WrapperArray.append(W)
    WrapperEventArray.append(Event())
    Thread(target=W.running,args=[WrapperEventArray[-1]]).start()


In [None]:
endFlag = False
cycleCount = 0
while not endFlag:
    endFlag = True # if any of the wrappers have a false EndFlag, this turns false (since we do boolean and with each of them)
    cycleCount += 1
    for x in range(NUM_CHANNELS):
        WrapperEventArray[x].clear()
    for x in range(NUM_CHANNELS):
        if not WrapperArray[x].endFlag:
            WrapperEventArray[x].wait()
        endFlag = endFlag and WrapperArray[x].endFlag # IF it goes through all the wrappers and they are all at EOF, then endFlag is True at the end of the cycle
     
for x in range(NUM_CHANNELS):
    print("PE" + str(x) + ", Wasted Cycles: " + str(WrapperArray[x].PE.numWastedCycles) + " , Part I Wasted Cycles: " + str(WrapperArray[x].PE.partIWastedCycles) + " , Part II Wasted Cycles: " + str(WrapperArray[x].PE.partIIWastedCycles))
print("total Cycles", cycleCount)

In [None]:

r = []
c = []
v = []
for wrapper in WrapperArray:
    for o in wrapper.PE.outputBuffer:
        v.append(o[0])
        r.append(o[1])
        c.append(o[2])

est = coo_matrix((v,(r,c)),(1000,1000)).toarray()
actual = np.dot(i1.toarray(),i2.toarray())
print(np.equal(actual, est))
print(np.allclose(actual,est,0.0001,0.0001))



Test handwritten example on PE

In [None]:
for wrapper in WrapperArray:
    print(wrapper.PE.outputBuffer)

In [None]:
comp = PE(3,1)
comp.input(1,1,0,1,3)
comp.input(1,1,0,2,3)
comp.input(1,1,0,3,3)
comp.input(1,1,0,4,3)

comp.input(1,1,1,1,1)
comp.input(1,1,1,3,1)
comp.input(1,1,1,2,2)
comp.input(1,1,1,4,2)

comp.input(1,1,2,1,1)
comp.input(1,1,2,3,1)

comp.input(1,1,4,1,1)
comp.input(1,1,4,3,1)
comp.input(1,1,4,2,2)
comp.input(1,1,4,4,2)
comp.input(1,1,4,1,3)
comp.input(1,1,4,2,3)
comp.input(1,1,4,3,3)
comp.input(1,1,4,4,3)
comp.input(1,1,4,1,4)
comp.input(1,1,4,3,4)

comp.input(None,None,None,None,None)


In [None]:
for x in range(0,32): #Should finish running in 31 cycles
    print(x)
    comp.cycle()
    print(str(comp))

In [None]:
print(comp.outputBuffer)