## Driver

In [1]:

from pynq import DefaultIP
import numpy as np
import time
import struct

class matmulDriver(DefaultIP):

    def __init__(self, description):
        super().__init__(description=description)
        self.result = np.zeros((1024*1024),dtype=np.int32)
        self.idx=0
        
    bindto = ["xilinx.com:hls:matmul1x1:1.0"]

    def multiply(self, a, b, c, d, e):
        '''
        a is a row of first matrix
        b,c,d,e, is a column of second matrix
        '''
        self.write(0x1000,struct.pack(f"{1024}i", *a))
        self.write(0x10000,struct.pack(f"{16384}i", *b))
        self.write(0x20000,struct.pack(f"{16384}i", *c))
        self.write(0x30000,struct.pack(f"{16384}i", *d))
        self.write(0x40000,struct.pack(f"{16384}i", *e))

        #result
        for i in range(64):
            res = self.read(0x50000+4*i)
            self.result[self.idx]=res
            self.idx=self.idx+1


## Load Overlay

In [2]:
from pynq import Overlay
from pynq import allocate
overlay = Overlay('/home/xilinx/pynq/overlays/matmul/matmul1x16x4.bit')
overlay?

## SW Execute

In [3]:

a=np.ones((1024),dtype=np.int32)
b=np.ones((1024),dtype=np.int32)
start_time=time.time()
print("start")
c=np.dot(a,b)
stop_time=time.time()
sw_exec_time = stop_time-start_time
print('SW execution time: ',sw_exec_time)

start
SW execution time:  0.005561351776123047


### HW Test
Do not Excute

In [5]:

matA = np.ones((1024*1024),dtype=np.int32)
matB = np.ones((1024*1024),dtype=np.int32)
i=0
j=0
overlay.matmul1x1_0.multiply(matA[i*1024:(i+1)*1024],matB[j*64*1024:(j*64+16)*1024],matB[(j*64+16)*1024:(j*64+32)*1024]
                                     ,matB[(j*64+32)*1024:(j*64+48)*1024],matB[(j*64+48)*1024:(j*64+64)*1024])
print(overlay.matmul1x1_0.result.reshape(16384,64))

[[1024 1024 1024 ..., 1024 1024 1024]
 [1024 1024 1024 ..., 1024 1024 1024]
 [   0    0    0 ...,    0    0    0]
 ..., 
 [   0    0    0 ...,    0    0    0]
 [   0    0    0 ...,    0    0    0]
 [   0    0    0 ...,    0    0    0]]


## HW Execute

In [5]:
matA = np.ones((1024*1024),dtype=np.int32)
matB = np.ones((1024*1024),dtype=np.int32)
print("start")
start_time= time.time()
for i in range(1024):
    print(i)
    for j in range(16):
        #print(i,j)
        overlay.matmul1x1_0.multiply(matA[i*1024:(i+1)*1024],matB[j*64*1024:(j*64+16)*1024],matB[(j*64+16)*1024:(j*64+32)*1024]
                                     ,matB[(j*64+32)*1024:(j*64+48)*1024],matB[(j*64+48)*1024:(j*64+64)*1024])
stop_time=time.time()
HW_time =stop_time-start_time
print("done")
print('HW execution time: ',HW_time)

start
0
1
2
3


KeyboardInterrupt: 

In [11]:
print(result)

[1 1 1 ..., 1 1 1]
