In [None]:
from MatRaptorClasses import PE
from MatRaptorClasses import csr_to_c2sr
from MatRaptorClasses import SpAL
from MatRaptorClasses import SpBL
from MatRaptorClasses import Memory
import numpy as np
from scipy.sparse import csr_matrix, coo_matrix
import time
from threading import Thread
from threading import Event

In [None]:
NUM_CHANNELS = 8
PEAK_BANDWDITH_PER_CHANNEL = 16
NUM_QUEUES = 10
endFlag = True
I = 1000
K = 1000
J = 1000
NUM_INTS = 10000
MODE = "None"

gen = np.random.default_rng()
data1 = gen.integers(1,10,NUM_INTS)
row1 = gen.integers(0,I,NUM_INTS)
col1 = gen.integers(0,K,NUM_INTS)

data2 = gen.integers(1,10,NUM_INTS)
row2 = gen.integers(0,K,NUM_INTS)
col2 = gen.integers(0,J,NUM_INTS)
i1 = csr_matrix(coo_matrix((data1, (row1, col1)), shape=(I, K)))
i2 = csr_matrix(coo_matrix((data2, (row2, col2)), shape=(K, J)))

inputA = csr_to_c2sr(i1.data,i1.indices,i1.indptr,NUM_CHANNELS)
inputB = csr_to_c2sr(i2.data,i2.indices,i2.indptr,NUM_CHANNELS)
    
print(i1.toarray())
print(i2.toarray())

In [None]:
# Code going here: have each individual set of (SpAL, SpBL, PE) in a thread
# remember that the events are used ONLY to wait for other PEs to stop
# Best idea might be to create a very simple wrapper class for all three of them to increase clarity
# We can basically have a wrapper class with an SpAL, SpBL, PE, as well as a "running" method that is used in the multithreading, and cycles
# each part once every time its allowed!
cycleCount = 0

class Wrapper:
    def __init__(self, SpAL, SpBL, PE) -> None:
        self.SpAL = SpAL
        self.SpBL = SpBL
        self.PE = PE
        self.endFlag = False
    
    
    def running(self, event):
        while not self.endFlag:
            time.sleep(0.0001)  
            if not event.is_set():
                self.PE.cycle()
                self.SpBL.cycle()
                self.SpAL.cycle()
                self.endFlag = self.PE.endFlag and self.SpAL.endFlag and self.SpBL.endFlag
                event.set()
        event.set()
        
WrapperArray = []
WrapperEventArray = []

memory = Memory(NUM_CHANNELS,PEAK_BANDWDITH_PER_CHANNEL)
for x in range(NUM_CHANNELS):
    A = SpAL(x, NUM_CHANNELS,MODE)
    B = SpBL(MODE)
    P = PE(10,x, MODE)
    
    A.setNext(B)
    B.setNext(P)

    A.setMemory(memory)
    B.setMemory(memory)
    
    if MODE == "CSR":
        A.loadMatrixA((i1.data,i1.indices,i1.indptr))
        B.loadMatrixB((i2.data,i2.indices,i2.indptr))
    else:
        A.loadMatrixA(inputA)
        B.loadMatrixB(inputB)
    
    W = Wrapper(A, B, P)
    WrapperArray.append(W)
    WrapperEventArray.append(Event())
    Thread(target=W.running,args=[WrapperEventArray[-1]]).start()
memory.cycle()
cycleCount += 1 # one cycle when setting up so it works right (too lazy to code it better lol)

In [None]:
endFlag = False
cycleCount = 0
while not endFlag:
    endFlag = True # if any of the wrappers have a false EndFlag, this turns false (since we do boolean and with each of them)
    cycleCount += 1
    for x in range(NUM_CHANNELS):
        WrapperEventArray[x].clear()
    for x in range(NUM_CHANNELS):
        if not WrapperArray[x].endFlag:
            WrapperEventArray[x].wait()
        else:
            WrapperArray[x].PE.numWastedCycles += 1
            # increment the wasted cycles for its PE, since the Wrapper isn't running
        endFlag = endFlag and WrapperArray[x].endFlag # IF it goes through all the wrappers and they are all at EOF, then endFlag is True at the end of the cycle
    memory.cycle()
# CYCLES
print("total Cycles", cycleCount)

# HARDWARE UTLIZATION, MEMORY USAGE
for x in range(NUM_CHANNELS):
    print("PE" + str(x) + ":")
    print("Wasted Cycles: " + str(round(WrapperArray[x].PE.numWastedCycles/cycleCount,2)) + " , Part I Waiting for Part II: " + str(round(WrapperArray[x].PE.partIWastedCycles/cycleCount,2)) + " , Part II Waiting for Part I: " + str(round(WrapperArray[x].PE.partIIWastedCycles/cycleCount,2)))
    print("SpAL Memory Use: " + str(WrapperArray[x].SpAL.MemoryUsage) + " , SpAL Memory Wasted Cycles (%): " + str(round(WrapperArray[x].SpAL.memoryWastedCycles/cycleCount,2)) + " SpBL Memory Use: " + str(WrapperArray[x].SpBL.MemoryUsage) + " , SpBL Memory Wasted Cycles(%): " + str(round(WrapperArray[x].SpBL.memoryWastedCycles/cycleCount,2)))

#Memory Bandwidth Utilization
print("Average Bandwidth Utilization: " + str(memory.TotalMemoryPulled/cycleCount) + ", Bandwidth Utilization When Memory Is In Use: " + str(memory.TotalMemoryPulled/memory.NumCyclesInUse))


In [None]:

r = []
c = []
v = []
for wrapper in WrapperArray:
    for o in wrapper.PE.outputBuffer:
        v.append(o[0])
        r.append(o[1])
        c.append(o[2])

est = coo_matrix((v,(r,c)),(I,K)).toarray()
actual = np.dot(i1.toarray(),i2.toarray())
print(est)
print(actual)
print(np.equal(actual,est))
print(np.allclose(actual,est,0.0001,0.0001))

Test handwritten example on PE

In [None]:
#for wrapper in WrapperArray:
#    print(wrapper.PE.outputBuffer)

In [None]:
"""

comp = PE(3,1)
comp.input(1,1,0,1,3)
comp.input(1,1,0,2,3)
comp.input(1,1,0,3,3)
comp.input(1,1,0,4,3)

comp.input(1,1,1,1,1)
comp.input(1,1,1,3,1)
comp.input(1,1,1,2,2)
comp.input(1,1,1,4,2)

comp.input(1,1,2,1,1)
comp.input(1,1,2,3,1)

comp.input(1,1,4,1,1)
comp.input(1,1,4,3,1)
comp.input(1,1,4,2,2)
comp.input(1,1,4,4,2)
comp.input(1,1,4,1,3)
comp.input(1,1,4,2,3)
comp.input(1,1,4,3,3)
comp.input(1,1,4,4,3)
comp.input(1,1,4,1,4)
comp.input(1,1,4,3,4)

comp.input(None,None,None,None,None)

"""


In [None]:
#for x in range(0,32): #Should finish running in 31 cycles
#    print(x)
#    comp.cycle()
#    print(str(comp))

In [None]:
#print(comp.outputBuffer)