In [1]:
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import time

# Generate random test data
in1 = np.random.randint(-128, 128, size=(1,65536))
in2 = np.random.randint(-128, 128, size=(1,65536))

in1_ps = np.resize(in1,(256,256))
in2_ps = np.resize(in2,(256,256))

print("in1: ", in1_ps)
print("in2: ", in2_ps)

in1:  [[ -79 -107  -68 ...   42  -62  -17]
 [  49   38 -128 ... -119  -89    8]
 [ 117 -110   40 ...  -58   92  -75]
 ...
 [ -17  114  -15 ...  -59  -77   88]
 [-123  -67  -86 ...  -36   44  121]
 [ -64 -109   10 ...  118   37  -66]]
in2:  [[ -91  -19  -51 ...   65  -65  -32]
 [  18  -88   67 ...   61  -28   90]
 [  47   25  109 ...  105   11 -128]
 ...
 [ -26  125    0 ...  -86  -83   -1]
 [ -49  -57   85 ... -121  114  -78]
 [ -27  -19  -63 ...   15  109  107]]


In [2]:
# PS matrix multiplication
out_ps = np.dot(in1_ps, in2_ps)
ps_time = %timeit -o out_ps = np.dot(in1_ps, in2_ps)

print("\nPS Result:\n", out_ps)

print("\nPS Execution Time:")
print("    Best:    {}s".format(ps_time.best))
print("    Average: {}s".format(ps_time.average))
print("    Worst:   {}s".format(ps_time.worst))

15.4 ms ± 89.7 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)

PS Result:
 [[ -21182  -25833   49206 ...  -22871  -57533 -257551]
 [  61948 -126898   19630 ...  152142  -67389  107503]
 [  65642 -106230   84399 ...   39385  -25042   74667]
 ...
 [  -3934 -119082  -89093 ...  135616  -55117   97162]
 [ -42686   60528  111161 ... -102450   82966  308932]
 [ 119369  118445   23059 ...  -47615   26388  -66221]]

PS Execution Time:
    Best:    0.015305418000007193s
    Average: 0.01543849757143107s
    Worst:   0.01561669200000324s


In [None]:
# Import PYNQ libraries
from pynq import Overlay, allocate

# Load overlay
overlay_matrix_mult = Overlay("overlay/matrix_mult.bit")
matrix_mult = overlay_matrix_mult.matrix_mult_0

# Allocate buffers
in1_buffer0 = allocate(shape=(256*256,), dtype='i4')
in2_buffer0 = allocate(shape=(256*256,), dtype='i4')
out_buffer0 = allocate(shape=(256*256,), dtype='i4')

matrix_mult.write(matrix_mult.register_map.in1_1.address, in1_buffer0.physical_address)
matrix_mult.write(matrix_mult.register_map.in2_1.address, in2_buffer0.physical_address)
matrix_mult.write(matrix_mult.register_map.out_r_1.address, out_buffer0.physical_address)

In [None]:
# PL matrix multiplication
pl_time = %%timeit -o

np.copyto(in1_buffer0, np.int8(in1))
np.copyto(in2_buffer0, np.int8(in2))

matrix_mult.write(0x00, 0x01)
while True:
    reg = matrix_mult.read(0x00)
    if reg != 1:
        break

In [None]:
print("\nPL Result:\n", out_buffer0.reshape(256,256))

out_py_re = out_ps.reshape(256*256,)
cmp = out_py_re==out_buffer0
if(cmp.all()):
    print("\nPL result is CORRECT!")
else:
    print("\nPL result is INCORRECT!")

print("\nPL Execution Time:")
print("    Best:    {}s".format(pl_time.best))
print("    Average: {}s".format(pl_time.average))
print("    Worst:   {}s".format(pl_time.worst))

In [None]:
# Plotting the results
x_data = ['PS','PL']
y_data = [ps_time.average,pl_time.average]

for i in range(len(x_data)):
    plt.bar(x_data[i], y_data[i])

for a,b in zip(x_data,y_data):   
    plt.text(a,b,'%.4f'%b,ha='center',va='bottom',fontsize=11);
    
plt.title("Time used of different types")
plt.xlabel("Type")
plt.ylabel("Time(s)")

plt.show()

In [None]:
del in1_buffer0
del in2_buffer0
del out_buffer0