In [1]:
from sa import *
import os, json
import numpy as np

WEIGHT_FILE = "./qm.json"
TEST_INPUT = "./img0.npy"
RANDOM_MEM_GEN = False
DO_PRINT = False
GENERATE_ISA = False
with open(os.path.join(WEIGHT_FILE)) as fp:
    QM_DATA = json.loads(fp.read())
INPUT_DATA = np.array(np.load(os.path.join(TEST_INPUT))[0])

ADDR_MIN = 0x0000_0000
ADDR_MAX = 0x0002_0000
OFF_MEM_WB_BASE_ADDR = 0x0000_0000
OFF_MEM_UB_BASE_ADDR = 0x0001_AA80
UB_RESLUT_BASE_ADDR  = 32

FC1_SIZE = 0x0001_8800
FC2_SIZE = 0x0000_2000
FC3_SIZE = 0x0000_0280
UB_SIZE  = 0x0000_0310

FC1_OUTPUT_GOLDEN = np.load("./FC1_GOLDEN_VECTOR.npy")
FC2_OUTPUT_GOLDEN = np.load("./FC2_GOLDEN_VECTOR.npy")
FC3_OUTPUT_GOLDEN = np.load("./FC3_GOLDEN_VECTOR.npy")

Q = 5   # Q-Num format (Q-2.5)

In [2]:
# Instantiation
SA = SYSTOLIC_ARRAY(gen_isa=GENERATE_ISA, USE_Q_NUMBER=True, Q=4)
OFF_MEM = BRAM(depth=8192, data_num=16, nbits=8)

#Load weight

Prepare off-mem file(.coe)

In [3]:
# Weights
addr = 0

# FC1
FC1_weight = np.array(QM_DATA['weight']['FC1'])
print(FC1_weight.shape)
FC1_FROM = addr*16
print(f"FC1 FROM {FC1_FROM}[{tohex(FC1_FROM, 8)}]")
for j in range(0, 784, 16):
    for i in range(0, 128, 16):
        tmp2 = FC1_weight[i:i+16]
        tmp2 = (tmp2.transpose()[j:j+16]).transpose()
        for k in range(15, -1, -1):
            OFF_MEM.write(addr=addr, val=encode(data=tmp2[k], data_num=16, nbits=8))
            addr = addr + 1
FC1_TO = (addr-1)*16
print(f"FC1 TO {FC1_TO}[{tohex(FC1_TO, 8)}]")

# Input Data
INPUT_DATA_FROM = addr*16
print(f"INPUT_DATA FROM {INPUT_DATA_FROM}[{tohex(INPUT_DATA_FROM, 8)}]")
for i in range(0, 784, 16):
#for i in range(784-16, -1, -16):
    OFF_MEM.write(addr=addr, val=encode(data=np.flip(INPUT_DATA[i:i+16]), data_num=16, nbits=8))
    addr = addr + 1
INPUT_DATA_TO = addr*16
print(f"INPUT_DATA TO {INPUT_DATA_TO}[{tohex(INPUT_DATA_TO, 8)}]")



(128, 784)
FC1 FROM 0[00]
FC1 TO 100336[187f0]
FC2 FROM 100352[18800]
FC2 TO 108528[1a7f0]
FC3 FROM 108544[1a800]
FC3 TO 109552[1abf0]
INPUT_DATA FROM 109568[1ac00]
INPUT_DATA TO 110352[1af10]


Execute ISAs

In [4]:
# 1) Write weight to WB
WB_ADDR_TO = int(FC3_TO/16)
for addr in range(WB_ADDR_TO):
    SA.AXI_TO_WB_INST(OFF_MEM, addr, addr)

# 2) Write data to UB
UB_ADDR_TO = int((INPUT_DATA_TO - INPUT_DATA_FROM)/16)
for addr in range(UB_ADDR_TO):
    SA.AXI_TO_UB_INST(OFF_MEM, addr, addr+int(INPUT_DATA_FROM/16))



In [5]:
# 1. Initial loading for FIFO
UB_ADDR = 0
WB_ADDR = 0

for i in range(4):
    SA.LOAD_WEIGHT(UB_ADDR)
    UB_ADDR = UB_ADDR + 1

print(SA.WEIGHT_FIFO_PRINT(dec=True))

for i in range(4):
    SA.UB_TO_DATA_FIFO_INST(WB_ADDR)
    WB_ADDR = WB_ADDR + 1

#print(SA.DATA_FIFO_PRINT(dec=True))
print(FC1_weight.transpose()[15][0:16])

0:[-1, -1, 0, 0, -1, -1, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0]
1:[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1]
2:[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0]
3:[0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1]
[-1, -1, 0, 0, -1, -1, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1]
[ 0  0  0  0  1  0 -1 -1  0  0 -1  0  0  1  0 -1]


In [6]:
# 1. Fully Connected Layer 1
ACC_ADDR = 0

for j in range(0, 784, 16):
    for i in range(0, 128, 16):
        # Prepare Weights
        for k in range(16):
            SA.LOAD_WEIGHT(WB_ADDR)
            WB_ADDR = WB_ADDR + 1

        # Matrix Multiplication
        ACC_ADDR = int(i/16)
        if (i == 0):
            SA.MAT_MUL(addra=ACC_ADDR)
        else:
            SA.MAT_MUL_ACC(addra=ACC_ADDR)
    # Load new data
    SA.UB_TO_DATA_FIFO_INST(UB_ADDR)
    UB_ADDR = UB_ADDR + 1        

for i in range(8):
    SA.WRITE_RESULT(UB_RESLUT_BASE_ADDR+i, i)


a:[-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16], w:[0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1]
a:[-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16], w:[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0]
a:[-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16], w:[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1]
a:[-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16], w:[-1, -1, 0, 0, -1, -1, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0]
a:[-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16], w:[-1, -1, -1, 0, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, -1, 0]
a:[-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16], w:[-1, 0, -1, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, 0, -1]
a:[-16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16], w:[-1, -1, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, 0, 0]
a:[-16, -16, -16, -16, -1

In [7]:
FC1_out = list()
for i in range(8):
    FC1_out = FC1_out + decode(SA.UB.data[UB_RESLUT_BASE_ADDR+i], 16, 8)
FC1_out = np.array(FC1_out)
print(FC1_out)
print(FC1_OUTPUT_GOLDEN[0])

[   2    2    3    4    0    2    2   -1    4    3    5    1   -1    1
    3    2    3   57   18   -8   15   19   14   -3  -31   39  -17   12
   -1   33   26   32 -101  -32   28  -58  -14   56  -16  -48   46   44
    6  -20  -35    0  -35  -39   64   34  -11   -1  -12  -86  -50   78
   -4    6   -1   88   -5   23   -5   42   23   -2    2   47    7  -43
  -26  -21  -47    0  -35  -10   17  -22    6  -47   10   24   -5  -26
  -93  -61  -33  -91  -92 -111  -49  -15  118  103   62   40   30  -28
  -40  -51  -32   -7   25  -43  -11   64  119   30    2  -27  -45  -37
   45    4   27   65   54   -1   39   90   -2  -34  -39  -52 -128  -37
  -30  -16]
[  35   73   39   68  -11   57    4   90  -68    4   42   98  -47 -128
  104   85  122   74   48   -2  127  -14  -29  -18  -40   35    3  -16
  -21    6   -4   65  -24  -26   15  113   32 -102   52    4  127   41
  -61   79  -81   31  -16   24  -33   80  108   44   75  -12  -14   63
   98  -75   57  -79  -36 -105  -38  -80  -17  -98  -79   15   98

In [11]:
print(INPUT_DATA)
print(FC1_weight)
np.round(np.matmul(INPUT_DATA, FC1_weight.transpose())/16)

[-16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16
 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16
 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16
 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16
 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16
 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16
 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16
 -12  11  16   5 -10 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16
 -16 -16 -16 -16 -16 -16 -16 -16 -12   3  15  16  16  16  -5 -16 -16 -16
 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -12
  11  16  16  16  16  16  -5 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16
 -16 -16 -16 -16 -16 -16 -16 -16   1  14  16  16  13   5  16  16  -5 -16
 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16 -16   1
  15  16  16   2 -13  -3  16  16  -5 -16 -16 -16 -1

array([  35.,   72.,   39.,   68.,  -12.,   57.,    4.,   90.,  -69.,
          4.,   42.,   98.,  -48., -166.,  104.,   84.,  122.,   74.,
         48.,   -3.,  138.,  -15.,  -30.,  -19.,  -41.,   35.,    2.,
        -17.,  -22.,    6.,   -5.,   65.,  -25.,  -27.,   15.,  113.,
         32., -103.,   52.,    4.,  294.,   41.,  -62.,   79.,  -82.,
         31.,  -17.,   24.,  -34.,   80.,  108.,   44.,   75.,  -13.,
        -15.,   63.,   98.,  -76.,   57.,  -80.,  -37., -106.,  -39.,
        -81.,  -18.,  -99.,  -80.,   14.,   98.,  -30.,  112.,   -5.,
        -90.,  124.,  -25.,  -48.,   58.,  -22.,  -29.,   14.,  -51.,
         23.,  -61.,  -63.,  178.,   69., -137.,   -2.,   36.,  -70.,
        -27.,   16.,  -31.,  -80.,   29.,    7.,  -50.,  110.,   38.,
        117.,   75.,  102.,  -41.,  -42.,   10.,  -91.,  -30.,   77.,
        -84.,  -64., -313.,  -83.,   -6.,  156.,   13.,   -2.,   29.,
         70.,  -64.,   54.,   46.,  -50.,   -7.,   72.,  -21.,   54.,
        -28.,  -32.]