In [10]:
from MatRaptorClasses import PE
from MatRaptorClasses import csr_to_c2sr
from MatRaptorClasses import SpAL
from MatRaptorClasses import SpBL
from MatRaptorClasses import Memory
import numpy as np
from scipy.sparse import csr_matrix, coo_matrix
import time
from threading import Thread
from threading import Event

In [11]:
NUM_CHANNELS = 8
NUM_QUEUES = 10
endFlag = True
I = 100
K = 100
J = 100
NUM_INTS = 100

gen = np.random.default_rng()
data1 = gen.integers(1,10,NUM_INTS)
row1 = gen.integers(0,I,NUM_INTS)
col1 = gen.integers(0,K,NUM_INTS)

data2 = gen.integers(1,10,NUM_INTS)
row2 = gen.integers(0,K,NUM_INTS)
col2 = gen.integers(0,J,NUM_INTS)
i1 = csr_matrix(coo_matrix((data1, (row1, col1)), shape=(I, K)))
i2 = csr_matrix(coo_matrix((data2, (row2, col2)), shape=(K, J)))

inputA = csr_to_c2sr(i1.data,i1.indices,i1.indptr,NUM_CHANNELS)
inputB = csr_to_c2sr(i2.data,i2.indices,i2.indptr,NUM_CHANNELS)
    
print(i1.toarray())
print(i2.toarray())

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [12]:
# Code going here: have each individual set of (SpAL, SpBL, PE) in a thread
# remember that the events are used ONLY to wait for other PEs to stop
# Best idea might be to create a very simple wrapper class for all three of them to increase clarity
# We can basically have a wrapper class with an SpAL, SpBL, PE, as well as a "running" method that is used in the multithreading, and cycles
# each part once every time its allowed!

class Wrapper:
    def __init__(self, SpAL, SpBL, PE) -> None:
        self.SpAL = SpAL
        self.SpBL = SpBL
        self.PE = PE
        self.endFlag = False
    
    
    def running(self, event):
        while not self.endFlag:
            time.sleep(0.0001)  
            if not event.is_set():
                self.PE.cycle()
                self.SpBL.cycle()
                self.SpAL.cycle()
                self.endFlag = self.PE.endFlag and self.SpAL.endFlag and self.SpBL.endFlag
                event.set()
        event.set()
        
WrapperArray = []
WrapperEventArray = []

memory = Memory(NUM_CHANNELS)
for x in range(NUM_CHANNELS):
    A = SpAL(x, NUM_CHANNELS)
    B = SpBL(x)
    P = PE(10,x)
    
    A.setNext(B)
    B.setNext(P)

    A.setMemory(memory)
    B.setMemory(memory)
    
    
    A.loadMatrixA(inputA)
    B.loadMatrixB(inputB)
    
    W = Wrapper(A, B, P)
    WrapperArray.append(W)
    WrapperEventArray.append(Event())
    Thread(target=W.running,args=[WrapperEventArray[-1]]).start()
memory.cycle()


In [13]:
endFlag = False
cycleCount = 0
while not endFlag:
    endFlag = True # if any of the wrappers have a false EndFlag, this turns false (since we do boolean and with each of them)
    cycleCount += 1
    for x in range(NUM_CHANNELS):
        WrapperEventArray[x].clear()
    for x in range(NUM_CHANNELS):
        if not WrapperArray[x].endFlag:
            WrapperEventArray[x].wait()
        else:
            WrapperArray[x].PE.numWastedCycles += 1
            # increment the wasted cycles for its PE, since the Wrapper isn't running
        endFlag = endFlag and WrapperArray[x].endFlag # IF it goes through all the wrappers and they are all at EOF, then endFlag is True at the end of the cycle
    memory.cycle()
     
for x in range(NUM_CHANNELS):
    print("PE" + str(x) + ":")
    print("Wasted Cycles: " + str(round(WrapperArray[x].PE.numWastedCycles/cycleCount,2)) + " , Part I Waiting for Part II: " + str(round(WrapperArray[x].PE.partIWastedCycles/cycleCount,2)) + " , Part II Waiting for Part I: " + str(round(WrapperArray[x].PE.partIIWastedCycles/cycleCount,2)))
    print("SpAL Memory Use: " + str(WrapperArray[x].SpAL.MemoryUsage) + " , SpAL Memory Wasted Cycles: " + str(round(WrapperArray[x].SpAL.memoryWastedCycles/cycleCount,2)) + " SpBL Memory Use: " + str(WrapperArray[x].SpBL.MemoryUsage) + " , SpBL Memory Wasted Cycles: " + str(round(WrapperArray[x].SpBL.memoryWastedCycles/cycleCount,2)))
print("total Cycles", cycleCount)

PE0:
Wasted Cycles: 0.87 , Part I Waiting for Part II: 0.0 , Part II Waiting for Part I: 0.05
SpAL Memory Use: 960 , SpAL Memory Wasted Cycles: 0.57 SpBL Memory Use: 968 , SpBL Memory Wasted Cycles: 0.73
PE1:
Wasted Cycles: 0.86 , Part I Waiting for Part II: 0.0 , Part II Waiting for Part I: 0.05
SpAL Memory Use: 1008 , SpAL Memory Wasted Cycles: 0.73 SpBL Memory Use: 1000 , SpBL Memory Wasted Cycles: 0.73
PE2:
Wasted Cycles: 0.82 , Part I Waiting for Part II: 0.0 , Part II Waiting for Part I: 0.06
SpAL Memory Use: 1120 , SpAL Memory Wasted Cycles: 0.75 SpBL Memory Use: 1336 , SpBL Memory Wasted Cycles: 0.84
PE3:
Wasted Cycles: 0.84 , Part I Waiting for Part II: 0.0 , Part II Waiting for Part I: 0.05
SpAL Memory Use: 1080 , SpAL Memory Wasted Cycles: 0.6 SpBL Memory Use: 1176 , SpBL Memory Wasted Cycles: 0.77
PE4:
Wasted Cycles: 0.88 , Part I Waiting for Part II: 0.0 , Part II Waiting for Part I: 0.04
SpAL Memory Use: 944 , SpAL Memory Wasted Cycles: 0.6 SpBL Memory Use: 792 , SpBL Mem

In [14]:

r = []
c = []
v = []
for wrapper in WrapperArray:
    for o in wrapper.PE.outputBuffer:
        v.append(o[0])
        r.append(o[1])
        c.append(o[2])

est = coo_matrix((v,(r,c)),(I,K)).toarray()
actual = np.dot(i1.toarray(),i2.toarray())
print(est)
print(actual)
print(np.equal(actual, est))
print(np.allclose(actual,est,0.0001,0.0001))



[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[[ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 ...
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]]
True


Test handwritten example on PE

In [15]:
for wrapper in WrapperArray:
    print(wrapper.PE.outputBuffer)

deque([(18, 24, 28), (30, 24, 107), (72, 32, 79), (27, 32, 686), (9, 32, 721), (4, 48, 55), (8, 48, 567), (12, 48, 845), (35, 80, 575), (49, 80, 195), (7, 80, 492), (14, 88, 944), (28, 88, 46), (24, 88, 121), (8, 88, 624), (8, 96, 426), (6, 96, 763), (30, 112, 500), (10, 128, 690), (9, 128, 321), (9, 144, 544), (14, 144, 148), (12, 144, 768), (54, 152, 190), (36, 152, 141), (81, 152, 262), (6, 160, 646), (10, 168, 435), (30, 168, 314), (12, 168, 998), (28, 176, 560), (15, 192, 291), (18, 192, 812), (12, 208, 701), (21, 216, 37), (7, 216, 800), (16, 224, 892), (4, 224, 847), (36, 232, 577), (6, 264, 718), (24, 264, 462), (63, 272, 124), (27, 272, 247), (10, 288, 679), (27, 328, 154), (45, 328, 636), (27, 328, 799), (9, 352, 17), (27, 376, 69), (27, 376, 776), (63, 408, 124), (27, 408, 247), (8, 408, 98), (64, 408, 337), (72, 408, 835), (63, 424, 536), (21, 424, 554), (49, 424, 883), (24, 424, 73), (16, 424, 442), (27, 456, 618), (24, 464, 733), (35, 480, 470), (15, 480, 847), (21, 496, 

In [16]:
comp = PE(3,1)
comp.input(1,1,0,1,3)
comp.input(1,1,0,2,3)
comp.input(1,1,0,3,3)
comp.input(1,1,0,4,3)

comp.input(1,1,1,1,1)
comp.input(1,1,1,3,1)
comp.input(1,1,1,2,2)
comp.input(1,1,1,4,2)

comp.input(1,1,2,1,1)
comp.input(1,1,2,3,1)

comp.input(1,1,4,1,1)
comp.input(1,1,4,3,1)
comp.input(1,1,4,2,2)
comp.input(1,1,4,4,2)
comp.input(1,1,4,1,3)
comp.input(1,1,4,2,3)
comp.input(1,1,4,3,3)
comp.input(1,1,4,4,3)
comp.input(1,1,4,1,4)
comp.input(1,1,4,3,4)

comp.input(None,None,None,None,None)


In [17]:
for x in range(0,32): #Should finish running in 31 cycles
    print(x)
    comp.cycle()
    print(str(comp))

0
inputQueues: 0: deque([], maxlen=20000), 1: deque([], maxlen=20000), 2: deque([], maxlen=20000), 
 outputQueues: 0: deque([], maxlen=20000), 1: deque([], maxlen=20000), 2: deque([], maxlen=20000), 
 inputQueueLengths: 0: 0, 1: 0, 2: 0, 
 inputFlag: True
 inputBuffer: deque([(1, 1, 0, 1, 3), (1, 1, 0, 2, 3), (1, 1, 0, 3, 3), (1, 1, 0, 4, 3), (1, 1, 1, 1, 1), (1, 1, 1, 3, 1), (1, 1, 1, 2, 2), (1, 1, 1, 4, 2), (1, 1, 2, 1, 1), (1, 1, 2, 3, 1), (1, 1, 4, 1, 1), (1, 1, 4, 3, 1), (1, 1, 4, 2, 2), (1, 1, 4, 4, 2), (1, 1, 4, 1, 3), (1, 1, 4, 2, 3), (1, 1, 4, 3, 3), (1, 1, 4, 4, 3), (1, 1, 4, 1, 4), (1, 1, 4, 3, 4), (None, None, None, None, None)])
 prevI: -1
 currentI: -1
 currentQN: 1
 helperQN: 0
1
inputQueues: 0: deque([(1, 1)], maxlen=20000), 1: deque([], maxlen=20000), 2: deque([], maxlen=20000), 
 outputQueues: 0: deque([], maxlen=20000), 1: deque([], maxlen=20000), 2: deque([], maxlen=20000), 
 inputQueueLengths: 0: 1, 1: 0, 2: 0, 
 inputFlag: True
 inputBuffer: deque([(1, 1, 0, 2, 3)

In [18]:
print(comp.outputBuffer)

deque([(1, 0, 1), (1, 0, 2), (1, 0, 3), (1, 0, 4), (1, 1, 2), (1, 1, 4), (1, 1, 1), (1, 1, 3), (1, 2, 1), (1, 2, 3), (3, 4, 1), (2, 4, 2), (3, 4, 3), (2, 4, 4)])
