In [1]:
from pynq import Overlay
from pynq import allocate
import numpy as np

# Program bitstream to FPGA
overlay = Overlay('/home/xilinx/simple_forward_vae.bit')

In [2]:
# Access to AXI DMA
dma = overlay.axi_dma_0
dma_send = overlay.axi_dma_0.sendchannel
dma_recv = overlay.axi_dma_0.recvchannel

In [3]:
# Allocate physical memory for AXI DMA MM2S
in_length = 40
input_buffer = allocate(shape=(in_length,), dtype=np.uint32)

In [4]:
# Input
# in1 - in4
input_buffer[0] = 0x00000100
input_buffer[1] = 0x00000100

# in5 - in8
input_buffer[2] = 0x00000100
input_buffer[3] = 0x00000100

# in9 and wc11
input_buffer[4] = 0x00040100

# wc12 - wc15
input_buffer[5] = 0xfffbffff
input_buffer[6] = 0xfffb0000

# wc16 - wc19
input_buffer[7] = 0xffad0003
input_buffer[8] = 0x003f020d

# wd11 - wd14
input_buffer[9] = 0xfffa000f
input_buffer[10] = 0x0003fffa

# wd15 - wd18
input_buffer[11] = 0xfffc0006
input_buffer[12] = 0xffeeffe9

# wd19 and wc21
input_buffer[13] = 0xffffffa8

# wc22 - wc25
input_buffer[14] = 0xffff0009
input_buffer[15] = 0xfff60009

# wc26 - wc29
input_buffer[16] = 0x00650009
input_buffer[17] = 0x008affd6

# wd21 - wd24
input_buffer[18] = 0xfff6000b
input_buffer[19] = 0xfff7000b

# wd25 - wd28
input_buffer[20] = 0xfff40015
input_buffer[21] = 0x002b00dd

# wd29 and b21
input_buffer[22] = 0xff230028

# b22 and b23
input_buffer[23] = 0xff20ff16

# b24 and w11
input_buffer[24] = 0x0024ff60

# w12 and w21
input_buffer[25] = 0x04fb0044

# w22 and w31
input_buffer[26] = 0x0023ffe4

# w32 and w41
input_buffer[27] = 0x04f90043

# w42 and w51
input_buffer[28] = 0xfb10ffe4

# w52 and w61
input_buffer[29] = 0x04fa001d

# w62 and w71
input_buffer[30] = 0x0025ffe4

# w72 and w81
input_buffer[31] = 0x04fb0047

# w82 and w91
input_buffer[32] = 0x0022ffe4

# w92 and b31
input_buffer[33] = 0x04f00041

# b32 - b35
input_buffer[34] = 0x04efffcc
input_buffer[35] = 0x0032ffcc

# b36 - b39
input_buffer[36] = 0x04f1ffcc
input_buffer[37] = 0x04edffcc 
 

In [5]:
# Check the written data
for i in range(in_length):
    print("0x%08X" % (input_buffer[i]))

0x00000100
0x00000100
0x00000100
0x00000100
0x00040100
0xFFFBFFFF
0xFFFB0000
0xFFAD0003
0x003F020D
0xFFFA000F
0x0003FFFA
0xFFFC0006
0xFFEEFFE9
0xFFFFFFA8
0xFFFF0009
0xFFF60009
0x00650009
0x008AFFD6
0xFFF6000B
0xFFF7000B
0xFFF40015
0x002B00DD
0xFF230028
0xFF20FF16
0x0024FF60
0x04FB0044
0x0023FFE4
0x04F90043
0xFB10FFE4
0x04FA001D
0x0025FFE4
0x04FB0047
0x0022FFE4
0x04F00041
0x04EFFFCC
0x0032FFCC
0x04F1FFCC
0x04EDFFCC
0x00000000
0x00000000


In [6]:
# Do AXI DMA MM2S transfer
dma_send.transfer(input_buffer)

In [7]:
# Allocate physical memory for AXI DMA S2MM
out_length = 10
output_buffer = allocate(shape=(out_length,), dtype=np.uint32)

In [8]:
# Check the memory content
for i in range(out_length):
    print("0x%08X" % (output_buffer[i]))

0x00000000
0x00000000
0x00000000
0x00000000
0x00000000
0x00000000
0x00000000
0x00000000
0x00000000
0x00000000


In [9]:
# Do AXI DMA S2MM transfer
dma_recv.transfer(output_buffer)

In [10]:
# Check the memory content after DMA transfer
for i in range(out_length):
    print("0x%08X" % (output_buffer[i]))

0x00000000
0x00000000
0x00800080
0x00800080
0x00800080
0x00800080
0x00000000
0x00FE0000
0x00000000
0x00000000


In [11]:
out = np.zeros(shape=(3, 3))
# out

out[0][0] = ((output_buffer[3] & 0xFFFF0000) >> 16) / 256.0
out[0][1] = (output_buffer[3] & 0x0000FFFF) / 256.0
out[0][2] = ((output_buffer[2] & 0xFFFF0000) >> 16) / 256.0
out[1][0] = (output_buffer[2] & 0x0000FFFF) / 256.0
out[1][1] = ((output_buffer[5] & 0xFFFF0000) >> 16) / 256.0
out[1][2] = (output_buffer[5] & 0x0000FFFF) / 256.0
out[2][0] = ((output_buffer[4] & 0xFFFF0000) >> 16) / 256.0
out[2][1] = (output_buffer[4] & 0x0000FFFF) / 256.0
out[2][2] = ((output_buffer[7] & 0xFFFF0000) >> 16) / 256.0

out

array([[0.5      , 0.5      , 0.5      ],
       [0.5      , 0.5      , 0.5      ],
       [0.5      , 0.5      , 0.9921875]])

In [12]:
np.round(out)

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 1.]])

In [20]:
functional_val = np.zeros(shape=(10,), dtype=np.uint32)

functional_val[2] = 0x00fd0007
functional_val[3] = 0x00fd0007
functional_val[4] = 0x00fe0003
functional_val[5] = 0x00fc0003
functional_val[6] = 0x00000000
functional_val[7] = 0x00fe0000

In [21]:
func_out = np.zeros(shape=(3, 3))
# func_out

func_out[0][0] = ((functional_val[3] & 0xFFFF0000) >> 16) / 256.0
func_out[0][1] = (functional_val[3] & 0x0000FFFF) / 256.0
func_out[0][2] = ((functional_val[2] & 0xFFFF0000) >> 16) / 256.0
func_out[1][0] = (functional_val[2] & 0x0000FFFF) / 256.0
func_out[1][1] = ((functional_val[5] & 0xFFFF0000) >> 16) / 256.0
func_out[1][2] = (functional_val[5] & 0x0000FFFF) / 256.0
func_out[2][0] = ((functional_val[4] & 0xFFFF0000) >> 16) / 256.0
func_out[2][1] = (functional_val[4] & 0x0000FFFF) / 256.0
func_out[2][2] = ((functional_val[7] & 0xFFFF0000) >> 16) / 256.0

func_out

array([[0.98828125, 0.02734375, 0.98828125],
       [0.02734375, 0.984375  , 0.01171875],
       [0.9921875 , 0.01171875, 0.9921875 ]])

In [22]:
np.round(func_out)

array([[1., 0., 1.],
       [0., 1., 0.],
       [1., 0., 1.]])

In [13]:
# Delete buffer to prevent memory leak
del input_buffer, output_buffer