In [37]:
"""Deploy SRCNN Accellerator."""
import numpy as np
import pynq

# load bitstream
overlay = pynq.Overlay('./srcnn.bit')
overlay.download()

# get srcnn IP reference
srcnn = overlay.srcnn_0
srcnn.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, INTERRUPT=0, RESERVED_3=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED_0=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED_0=0),
  input_ftmap_1 = Register(input_ftmap=write-only),
  input_ftmap_2 = Register(input_ftmap=write-only),
  conv1_weights_1 = Register(conv1_weights=write-only),
  conv1_weights_2 = Register(conv1_weights=write-only),
  conv1_biases_1 = Register(conv1_biases=write-only),
  conv1_biases_2 = Register(conv1_biases=write-only),
  conv1_output_ftmap_1 = Register(conv1_output_ftmap=write-only),
  conv1_output_ftmap_2 = Register(conv1_output_ftmap=write-only),
  conv2_weights_1 = Register(conv2_weights=write-only),
  conv2_weights_2 = Register(conv2_weights=write-only),
  conv2_biases_1 = Register(conv2_biases=write-only),
  conv2_biases_2 = Register(conv2_biases=write-only),

In [38]:
def normalise(pixel):
    return np.single(pixel/255)

In [39]:
from pynq import allocate

H = 255
W = 255
N0 = 1
N1 = 64
N2 = 32
N3 = 1
F1 = 9
F2 = 1
F3 = 5


#allocate buffers
input_image =  allocate((N0, H, W), np.single)
conv1_w =  allocate((N1, N0, F1, F1), np.single)
conv1_b =  allocate((N1), np.single)
conv1_output =  allocate((N1, H, W), np.single)
conv2_w = allocate((N2, N1, F2, F2), np.single)
conv2_b = allocate((N2), np.single)
conv2_output =  allocate((N2, H, W), np.single)
conv3_w = allocate((N3, N2, F3, F3), np.single)
conv3_b = allocate((N3), np.single)
output_image =  allocate((N3, H, W), np.single)

#instruct IP the DRAM memroy location of allocated vectors
srcnn.write(srcnn.register_map.input_ftmap_1.address, input_image.physical_address)
srcnn.write(srcnn.register_map.conv1_weights_1.address, conv1_w.physical_address)
srcnn.write(srcnn.register_map.conv1_biases_1.address, conv1_b.physical_address)
srcnn.write(srcnn.register_map.conv1_output_ftmap_1.address, conv1_output.physical_address)
srcnn.write(srcnn.register_map.conv2_weights_1.address, conv2_w.physical_address)
srcnn.write(srcnn.register_map.conv2_biases_1.address, conv2_b.physical_address)
srcnn.write(srcnn.register_map.conv2_output_ftmap_1.address, conv2_output.physical_address)
srcnn.write(srcnn.register_map.conv3_weights_1.address, conv3_w.physical_address)
srcnn.write(srcnn.register_map.conv3_biases_1.address, conv3_b.physical_address)
srcnn.write(srcnn.register_map.output_ftmap_1.address, output_image.physical_address)

#load data
conv1_w[:] = np.array(np.fromfile(open("./weights/conv1_weights_3x_flp.bin", "r"), dtype=np.single)).reshape(N1, N0, F1, F1)
conv1_b[:] = np.array(np.fromfile(open("./weights/conv1_biases_3x_flp.bin", "r"), dtype=np.single)).reshape(N1)
conv2_w[:] = np.array(np.fromfile(open("./weights/conv2_weights_3x_flp.bin", "r"), dtype=np.single)).reshape(N2, N1, F2, F2)
conv2_b[:] = np.array(np.fromfile(open("./weights/conv2_biases_3x_flp.bin", "r"), dtype=np.single)).reshape(N2)
conv3_w[:] = np.array(np.fromfile(open("./weights/conv3_weights_3x_flp.bin", "r"), dtype=np.single)).reshape(N3, N2, F3, F3)
conv3_b[:] = np.array(np.fromfile(open("./weights/conv3_biases_3x_flp.bin", "r"), dtype=np.single)).reshape(N3)

def load_input(filename):
    input_image[:] = np.array([normalise(x) for x in np.fromfile(open(filename, "r"), dtype=np.uint8)]).reshape(N0,H,W)



In [41]:
#sanity check
load_input("./tests/set5/butterfly_3x_LR_u8.bin")

In [42]:
%%timeit -r 1 -n 1

ctrl = srcnn.register_map.CTRL
ctrl.AP_START = 1
while ctrl.AP_DONE != 1:
    pass


1min 46s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [43]:
golden_output = np.fromfile(open("./tests/set5/butterfly_3x_GR_flp.bin", "r"), dtype=np.single).reshape(N3,H,W)
mse = np.square(np.subtract(golden_output, output_image)).mean()
print(mse)

2.1787502e-13


In [None]:
import time

def test_srcnn(filename):
    file_input_prefix = "./tests/set14/inputs/"
    file_output_prefix = "./tests/set14/outputs/"
    load_input(file_input_prefix + filename)
    output_file = file_output_prefix + filename[:-9] + "GT_u8.bin"
    tic = time.perf_counter()
    ctrl = srcnn.register_map.CTRL
    ctrl.AP_START = 1
    while ctrl.AP_DONE != 1:
        pass
    toc = time.perf_counter()
    print(filename, "image complete: Elapsed time: ", toc - tic, " seconds")
    golden_output = np.array([normalise(x) for x in np.fromfile(open(output_file, "r"), dtype=np.uint8)]).reshape(N3,H,W)
    mse = np.square(np.subtract(golden_output, output_image)).mean()
    print("Mean squared error: ", mse)
    return (toc - tic, mse)
    

In [None]:
from os import listdir
from os.path import isfile, join


inputs = [f for f in listdir("./tests/set14/inputs") if isfile(join("./tests/set14/inputs", f))]
ave_mse = 0
fps = 0

for i in inputs:
    result = test_srcnn(i)
    ave_mse += result[1]
    fps += result[0]

ave_mse /= len(inputs)
fps /= len(inputs)

print("Average MSE: ", ave_mse)
print("FPS: ", fps)
                             

Image complete: Elapsed time:  106.70784482399995  seconds
Mean squared error:  0.0015902378
Image complete: Elapsed time:  106.70834523999929  seconds
Mean squared error:  0.0011378034
Image complete: Elapsed time:  106.70824777899998  seconds
Mean squared error:  0.0020689496
