In [1]:
"""Test HLS vector addition accelerator."""
import numpy as np
import pynq

N = 100 # array size

# load bitstream
overlay = pynq.Overlay('./vadd_hls.bit')

# get vadd IP reference
vadd = overlay.vadd_0
vadd.register_map

RegisterMap {
  CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, INTERRUPT=0, RESERVED_3=0),
  GIER = Register(Enable=0, RESERVED=0),
  IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED_0=0),
  IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED_0=0),
  a_1 = Register(a=write-only),
  a_2 = Register(a=write-only),
  b_1 = Register(b=write-only),
  b_2 = Register(b=write-only),
  s_1 = Register(s=write-only),
  s_2 = Register(s=write-only)
}

In [2]:
# allocate arrays in DRAM memory
a = pynq.allocate((N), dtype=np.int32)
b = pynq.allocate((N), dtype=np.int32)
s = pynq.allocate((N), dtype=np.int32)

# tell vadd IP the DRAM memory location of allocated vectors
vadd.write(vadd.register_map.a_1.address, a.physical_address)
vadd.write(vadd.register_map.b_1.address, b.physical_address)
vadd.write(vadd.register_map.s_1.address, s.physical_address)

In [3]:
# initialize input vectors a and b in DRAM from PS
for i in range(100):
    a[i] = i
    b[i] = i + 1

In [4]:
%%timeit -r 10 -n 1000

# send vadd start signal and wait until done
ctrl = vadd.register_map.CTRL
ctrl.AP_START = 1
while ctrl.AP_DONE != 1:
    pass

115 µs ± 359 ns per loop (mean ± std. dev. of 10 runs, 1,000 loops each)


In [5]:
# check the results
for i in range(10):
    print('a:{} + b:{} = s:{}'.format(a[i], b[i], s[i]))

a:0 + b:1 = s:1
a:1 + b:2 = s:3
a:2 + b:3 = s:5
a:3 + b:4 = s:7
a:4 + b:5 = s:9
a:5 + b:6 = s:11
a:6 + b:7 = s:13
a:7 + b:8 = s:15
a:8 + b:9 = s:17
a:9 + b:10 = s:19
