In [None]:
from pynq import Overlay
from pynq import allocate
import numpy as np
overlay = Overlay('attention.bit')

In [None]:
from pynq import ps

print(ps.Clocks.fclk0_mhz)
ps.Clocks.fclk0_mhz = 375
print(ps.Clocks.fclk0_mhz)
print(ps.Clocks.cpu_mhz)

In [None]:
ip = overlay.CNN_0
mmio = ip.mmio
register_map = ip.register_map
registers = register_map._register_classes

In [None]:
for name, reg in registers.items():
    print(name, reg)

In [None]:
# Allocated buffer (m_axi)
input_buffer_size = 7840000
output_buffer_size = 10000

input_buffer = allocate(shape=(input_buffer_size,), dtype=np.int8) 
output_buffer = allocate(shape=(output_buffer_size,), dtype=np.int8) 
register_map.im_1.im = input_buffer.device_address
register_map.out_r_1.out_r = output_buffer.device_address


In [None]:
import glob

bias = []
weight = []
scales = [512, 256, 256,128,128]
layers = [150528,802816,401408,200704,100352,25088]

x_test = (np.load('x_test.npy')//32).astype(np.int8)
y_test = np.load('y_test.npy')

for filename in sorted(glob.glob("../VitisAI/dump_results/dump_results_weights/quant_conv_*_bias.txt")):
    bias.append(np.loadtxt(filename))
for filename in sorted(glob.glob("../VitisAI/dump_results/dump_results_weights/quant_conv_*_kernel.txt")):
    weight.append(np.loadtxt(filename))

for filename in sorted(glob.glob("../VitisAI/dump_results/dump_results_weights/quant_attention_*_bias.txt")):
    bias.append(np.loadtxt(filename))
for filename in sorted(glob.glob("../VitisAI/dump_results/dump_results_weights/quant_attention_*_kernel.txt")):
    weight.append(np.loadtxt(filename))

for filename in sorted(glob.glob("../VitisAI/dump_results/dump_results_weights/quant_dense_*_bias.txt")):
    bias.append(np.loadtxt(filename))
for filename in sorted(glob.glob("../VitisAI/dump_results/dump_results_weights/quant_dense_*_kernel.txt")):
    weight.append(np.loadtxt(filename))

for i in range(3):
   weight[i] = weight[i].reshape(layers[i],layers[i+1])

In [None]:
# Hardware accelerated function
def attention_hw(im):
    # Write to input buffer
    input_buffer[:len(im)] = im
    # Send start signal
    register_map.CTRL.AP_START = 1
    
    # Wait until algorithm has completed
    while (register_map.CTRL.AP_DONE == 0):
        pass

    return output_buffer

In [None]:
def attention_sw(im):
    result = []
    for i in range(10000):
        data = im[i]
        for j in range(3):
            data = (data@weight[j]+bias[j])//scales[j]
            if j != 2:
                data = data*(data>0)
        result.append(np.argmax(data))
    return result

In [None]:
import numpy as np
import matplotlib.pyplot as plt

res_hls = []
res_py = []
err_hls = 0
err_py = 0

res_hls = attention_hw(x_test.flatten())
res_py = attention_sw(x_test)
    
for i in range(10000):            
    if res_hls[i] != y_test[i]:
        err_hls +=1

    if res_py[i] != y_test[i]:
        err_py +=1
        
print("acc hls {}".format(1-err_hls/10000))                 
print("acc py {}".format(1-err_py/10000))

In [None]:
hw_time = %timeit -n 1 -r 10 -o mnist_hw(x_test.flatten())
sw_time = %timeit -n 1 -r 10 -o mnist_sw(x_test)

print('Performance gain:', sw_time.average / hw_time.average) 

In [None]:
print("hw fps = {:.1f}".format((hw_time.average/10000)**-1))
print("sw fps = {:.1f}".format((sw_time.average/10000)**-1))