# DFX + DMA + self trigger

## import section

In [1]:
# import the library
from pynq import Overlay     # import the overlay
from pynq import allocate    # import for CMA (contingeous memory allocation)
from pynq import DefaultIP   # import the ip connector library for extension
import numpy as np
import os
import subprocess

PRJ_DIR    = '/home/xilinx/jupyter_notebooks/dfx4ml/'
PRJ_HW_DIR = '/home/xilinx/jupyter_notebooks/dfx4ml/hw/'

FULL_BS_NAME    = 'system.bin'
PAR_BS_NAME_0   = 'add.bin'
PAR_BS_NAME_1   = 'sub.bin'
PAR_BS_NAME_ML1 = 'ml1.bin'
PAR_BS_NAME_ML2 = 'ml2.bin'
PAR_BS_NAME_ML3 = 'ml3.bin'

PAR_BS_NAME_ALL = 'mlx.bin'

AMT_QUERY       = 5
INPUT_SHAPE     = (AMT_QUERY, 6, 8 )
ML2IN_SHAPE     = (AMT_QUERY, 6, 16)
ML3IN_SHAPE     = (AMT_QUERY, 6, 8 )
DES_SHAPE       = (AMT_QUERY, 5)

In [2]:
#### load overlay
overlay  = Overlay(PRJ_HW_DIR + FULL_BS_NAME)

In [3]:
#### get the device
dmaIp         = overlay.axi_dma_0
decoupleCtrl  = overlay.axi_gpio_0
resetCtrl     = overlay.axi_gpio_1

In [4]:
def decouple():
    decoupleCtrl.write(0, 1)

def couple():
    decoupleCtrl.write(0, 0)

def HoldReset():
    resetCtrl.write(0, 0)

def HoldOperate():
    resetCtrl.write(0, 1)

In [5]:
def changeDfx(path):
    HoldReset()
    decouple()
    overlay.par0.download(path)
    couple()
    HoldOperate()

In [6]:
def allocDataUint(allocShape = (16, ), allocType = np.float32, inputX = None):
    buf0 = allocate(shape=allocShape, dtype=allocType)
    #### copy the data
    if inputX is not None:
        print("start copy from input to allocate buffer")
        if (allocShape != inputX.shape) or (allocType != inputX.dtype):
            raise Exception("the specified shape and inputX shape is mismatch")
        np.copyto(buf0, inputX)
        print("copy finish")

    return buf0, buf0.physical_address, buf0.nbytes

In [7]:
def sendData(bufSrc, bufDes):
    print("----------> set sending channel")
    #print(X_input)
    dmaIp.sendchannel.transfer(bufSrc)
    print("----------> set receiving channel")
    dmaIp.recvchannel.transfer(bufDes)
    print("----------> wait for sending channel")
    dmaIp.sendchannel.wait()
    print("----------> wait for receiving channel")
    dmaIp.recvchannel.wait()
    print("----------> dma finish")


In [8]:
inputX = np.random.rand(*INPUT_SHAPE).astype(np.float32)
buf_input   , buf_input_phya   , buf_input_sz    = allocDataUint(allocShape= INPUT_SHAPE, allocType= np.float32, inputX = inputX)
buf_ml2_in  , buf_ml2_in_phya  , buf_ml2_in_sz   = allocDataUint(allocShape= ML2IN_SHAPE, allocType= np.float32)
buf_ml3_in  , buf_ml3_in_phya  , buf_ml3_in_sz   = allocDataUint(allocShape= ML3IN_SHAPE  , allocType= np.float32)
buf_out     , buf_out_phy      , buf_out_sz      = allocDataUint(allocShape= DES_SHAPE  , allocType= np.float32)

start copy from input to allocate buffer
copy finish


In [9]:
changeDfx(PRJ_HW_DIR + PAR_BS_NAME_ML1)
sendData(buf_input , buf_ml2_in)
changeDfx(PRJ_HW_DIR + PAR_BS_NAME_ML2)
sendData(buf_ml2_in, buf_ml3_in)
changeDfx(PRJ_HW_DIR + PAR_BS_NAME_ML3)
sendData(buf_ml3_in, buf_out   )

----------> set sending channel
----------> set receiving channel
----------> wait for sending channel
----------> wait for receiving channel
----------> dma finish
----------> set sending channel
----------> set receiving channel
----------> wait for sending channel
----------> wait for receiving channel
----------> dma finish
----------> set sending channel
----------> set receiving channel
----------> wait for sending channel
----------> wait for receiving channel
----------> dma finish


In [10]:
np_parRes = np.array(buf_out, dtype=np.float32)

## normal flow

In [None]:
buf_mlx_in , buf_mlx_in_phya , buf_mlx_in_sz  = allocDataUint(allocShape= INPUT_SHAPE, allocType= np.float32, inputX = inputX)
buf_mlx_out, buf_mlx_out_phya, buf_mlx_out_sz = allocDataUint(allocShape= DES_SHAPE  , allocType= np.float32)


In [None]:
changeDfx(PRJ_HW_DIR + PAR_BS_NAME_ALL)
sendData(buf_mlx_in, buf_mlx_out)

In [None]:
np_fullRes = np.array(buf_mlx_out, dtype=np.float32)