## Driver

In [6]:

from pynq import DefaultIP
import numpy as np
import time
import struct

class matmulDriver(DefaultIP):

    def __init__(self, description):
        super().__init__(description=description)
        self.result = np.zeros((1024*1024),dtype=np.int32)
        self.idx=0
        
    bindto = ["xilinx.com:hls:matmul8x8:1.0"]

    def multiply(self, ra, rb, rc, rd, re,rf,rg,rh, ca,cb,cc,cd,ce,cf,cg,ch):
        '''
        ra, rb, rc, rd, re,rf,rg,rh are row of first matrix
        ca,cb,cc,cd,ce,cf,cg,ch are column of second matrix
        
        use transposed second matrix
        '''
        self.write(0x1000,struct.pack(f"{1024}i", *ra))
        self.write(0x2000,struct.pack(f"{1024}i", *rb))
        self.write(0x3000,struct.pack(f"{1024}i", *rc))
        self.write(0x4000,struct.pack(f"{1024}i", *rd))
        self.write(0x5000,struct.pack(f"{1024}i", *re))
        self.write(0x6000,struct.pack(f"{1024}i", *rf))
        self.write(0x7000,struct.pack(f"{1024}i", *rg))
        self.write(0x8000,struct.pack(f"{1024}i", *rh))
        
        self.write(0x9000,struct.pack(f"{1024}i", *ca))
        self.write(0xa000,struct.pack(f"{1024}i", *cb))
        self.write(0xb000,struct.pack(f"{1024}i", *cc))
        self.write(0xc000,struct.pack(f"{1024}i", *cd))
        self.write(0xd000,struct.pack(f"{1024}i", *ce))
        self.write(0xe000,struct.pack(f"{1024}i", *cf))
        self.write(0xf000,struct.pack(f"{1024}i", *cg))
        self.write(0x10000,struct.pack(f"{1024}i", *ch))

        #result
        for i in range(64):
            res = self.read(0x11000+4*i)
            self.result[self.idx]=res
            self.idx=self.idx+1


## Load Overlay

In [7]:
from pynq import Overlay
from pynq import allocate
overlay = Overlay('/home/xilinx/pynq/overlays/matmul/matmul1x8x1x8.bit')
overlay?

## SW Execute

In [3]:

a=np.ones((1024),dtype=np.int32)
b=np.ones((1024),dtype=np.int32)
start_time=time.time()
print("start")
c=np.dot(a,b)
stop_time=time.time()
sw_exec_time = stop_time-start_time
print('SW execution time: ',sw_exec_time)

start
SW execution time:  0.008124589920043945


### HW Test
Do not Excute

In [6]:

matA = np.ones((1024*1024),dtype=np.int32)
matB = np.ones((1024*1024),dtype=np.int32)
i=0
j=0
overlay.matmul8x8_0.multiply(matA[i*1024:(i+1)*1024],matA[(i+1)*1024:(i+2)*1024],matA[(i+2)*1024:(i+3)*1024],matA[(i+3)*1024:(i+4)*1024],
                             matA[(i+4)*1024:(i+5)*1024],matA[(i+5)*1024:(i+6)*1024],matA[(i+6)*1024:(i+7)*1024],matA[(i+7)*1024:(i+8)*1024],
                             matB[j*1024:(j+1)*1024],matB[(j+1)*1024:(j+2)*1024],matB[(j+2)*1024:(j+3)*1024],matB[(j+3)*1024:(j+4)*1024],
                             matB[(j+4)*1024:(j+5)*1024],matB[(j+5)*1024:(j+6)*1024],matB[(j+6)*1024:(j+7)*1024],matB[(j+7)*1024:(j+8)*1024],)
print(overlay.matmul8x8_0.result.reshape(16384,64))

[[1024 1024 1024 ..., 1024 1024 1024]
 [1024 1024 1024 ..., 1024 1024 1024]
 [1024 1024 1024 ..., 1024 1024 1024]
 ..., 
 [   0    0    0 ...,    0    0    0]
 [   0    0    0 ...,    0    0    0]
 [   0    0    0 ...,    0    0    0]]


## HW Execute

In [8]:
matA = np.ones((1024*1024),dtype=np.int32)
matB = np.ones((1024*1024),dtype=np.int32)
print("start")
start_time= time.time()
for i in range(128):
    i=i*8
    #print(i)
    for j in range(128):
        j=j*8
        overlay.matmul8x8_0.multiply(matA[i*1024:(i+1)*1024],matA[(i+1)*1024:(i+2)*1024],matA[(i+2)*1024:(i+3)*1024],matA[(i+3)*1024:(i+4)*1024],
                             matA[(i+4)*1024:(i+5)*1024],matA[(i+5)*1024:(i+6)*1024],matA[(i+6)*1024:(i+7)*1024],matA[(i+7)*1024:(i+8)*1024],
                             matB[j*1024:(j+1)*1024],matB[(j+1)*1024:(j+2)*1024],matB[(j+2)*1024:(j+3)*1024],matB[(j+3)*1024:(j+4)*1024],
                             matB[(j+4)*1024:(j+5)*1024],matB[(j+5)*1024:(j+6)*1024],matB[(j+6)*1024:(j+7)*1024],matB[(j+7)*1024:(j+8)*1024],)
stop_time=time.time()
HW_time =stop_time-start_time
print("done")
print('HW execution time: ',HW_time)

start
done
HW execution time:  1249.9198815822601


In [2]:
print(overlay.matmul8x8_0.result)

NameError: name 'overlay' is not defined