In [None]:
# import the library
from pynq import Overlay     # import the overlay
from pynq import allocate    # import for CMA (contingeous memory allocation)
from pynq import DefaultIP   # import the ip connector library for extension
from pynq import Interrupt
import asyncio
import numpy as np
import os
import subprocess
import re
import dfx4ml.magicSeq as magicSeq  # import the magic sequence library
import dfx4ml.dfxCtrl as dfxCtrl  # import the dfx control library
import dfx4ml.cap     as cap
import dfx4ml.memAlloc as dataAlloc  # import the memory allocation library
import time

PRJ_DIR    = '/home/xilinx/jupyter_notebooks/tanawin/dfx4ml_magicSkipper/'
PRJ_HW_DIR = '/home/xilinx/jupyter_notebooks/tanawin/dfx4ml_magicSkipper/hw/'
PRJ_TC_DIR = '/home/xilinx/jupyter_notebooks/tanawin/dfx4ml_magicSkipper/sw/'

DFX_CONFIG_FILE = 'dfxCtrlMeta.txt'

FULL_BS_NAME    = 'system.bin'
PAR_BS_NAME_0   = 'skipper1.bin' ###### dma to magic stream 1
PAR_BS_NAME_1   = 'skipper2.bin' ###### magic stream 1 to magic stream 2
INPUT_DATA_NAME      = "inputX1023.npy"


AMT_QUERY       = 1023
INPUT_SHAPE     = (AMT_QUERY, 4,4,1)  # 4*4* float32 = 64 bytes
# intermediate layer = (4*4*8* float32 = 512 bytes)
# intermediate layer = (4*4*8* float32 = 512 bytes)
OUTPUT_SHAPE    = (AMT_QUERY, 4,4,1)  # 4*4* float32 = 64 bytes
AMT_SLOT = 2

In [None]:
cap.changePLconfigMode("pcap", True)

In [None]:
#### load the overlay
overlay  = Overlay(PRJ_HW_DIR + FULL_BS_NAME)

In [None]:
#### create the interrupt pin
overlay.interrupt_pins

In [None]:
my_interrupt = Interrupt('magicSeq/MagicSeqTopIntr_0/hw_intr')  # index 0 from your mapping

In [None]:
#### get the device
dmaIp      = overlay.dataMovement.axi_dma_0
dfxCtrlIp  = overlay.PRcontroller.dfx_controller_0
magicSeqIp = overlay.magicSeq.MagicSeqTopIntr_0

In [None]:
#### configure the dfx controller ip to match the address space
dfxCtrlIp.config(PRJ_HW_DIR + DFX_CONFIG_FILE)
print("regIdxSize = ", dfxCtrlIp.BLS_REGID)

In [None]:
### change reconfigure mode
cap.changePLconfigMode("icap", True)

In [None]:
dfxCtrlIp.printStatus()

In [None]:
#### shutdown all system

magicSeqIp.shutdownEngine()
dfxCtrlIp .shutdownEngine()

In [None]:
# get physical address of dma and dfx controller
dmaPhyAddr     =  overlay.ip_dict['dataMovement/axi_dma_0']['phys_addr']
dfxCtrlPhyAddr =  overlay.ip_dict['PRcontroller/dfx_controller_0']['phys_addr']

print("dma physical address: ", hex(dmaPhyAddr))
print("dfx  Ctrl physical address: ", hex(dfxCtrlPhyAddr))

In [None]:
##### initialize magic seq
print("------ before init magic seq------")
print(magicSeqIp.printDebug())

print("------ init magic sequence METADATA bank 0 -------------------------")
magicSeqIp.setEndCnt(AMT_SLOT-1) ### use the last index
magicSeqIp.setDmaAddr(dmaPhyAddr)
magicSeqIp.setDfxAddr(dfxCtrlPhyAddr)
magicSeqIp.setIntrEna(1)
magicSeqIp.setIntr(1)  # woc  command 1 to set the interrupt to 0
magicSeqIp.setRoundTrip(0)  # set round trip to 0, no need to wait for the dma to finish
inputX = np.load(PRJ_TC_DIR + INPUT_DATA_NAME)
if(inputX.shape != INPUT_SHAPE):
    raise Exception(f"inputX shape is {inputX} expect {INPUT_SHAPE}")

#inputX = np.random.rand(*INPUT_SHAPE).astype(np.float32)
print("-------------init all data buffer -------------")
buf_input   , buf_input_phya   , buf_input_sz    = dataAlloc.allocDataUint(allocShape= INPUT_SHAPE, allocType= np.float32, inputX = inputX)
buf_out     , buf_out_phy      , buf_out_sz      = dataAlloc.allocDataUint(allocShape= OUTPUT_SHAPE  , allocType= np.float32)
buf_input.flush()
print("------------- init all bank 1 ------------------")
######                      srcPhyAddr    ,        srcSz,  dstPhyAddr,      dstSz,st,pr,loadMask, storeMask, intrMask 
magicSeqIp.setWholeSlot(0, [buf_input_phya, buf_input_sz,           0,          0, 0, 0,  0b0001,    0b0110, 0])
magicSeqIp.setWholeSlot(1, [             0,            0, buf_out_phy, buf_out_sz, 0, 0,  0b0110,    0b0001, 0])

print("------------- after init magic seq------")
print(magicSeqIp.printDebug())

In [None]:
##### initialize dfx controller
print("------ allocate bit steram CMA for each trigger ------")

######## set trigger 0
d0_ip_buf, d0_addr, d0_size = \
    dfxCtrlIp.allocateBitStreamCMA(PRJ_HW_DIR + PAR_BS_NAME_0)
######## set trigger 1
d1_ip_buf, d1_addr, d1_size = \
    dfxCtrlIp.allocateBitStreamCMA(PRJ_HW_DIR + PAR_BS_NAME_1)

In [None]:
##### initialize dfx controller2
dfxCtrlIp.setSimpleMetaData(0, d0_addr, d0_size)
dfxCtrlIp.setSimpleMetaData(1, d1_addr, d1_size)

In [None]:
##### check dfx controller3
dfxCtrlIp.printStatus()
dfxCtrlIp.printSimpleMetaData(0)
dfxCtrlIp.printSimpleMetaData(1)

In [None]:
dfxCtrlIp.trigger(0)
dfxCtrlIp.restartNoStatus()

In [None]:
dfxCtrlIp.printStatus()

In [None]:
##### start dfx controller3
async def startExecAndWait4Intr():
    start_time = time.perf_counter()  # Start timing
    magicSeqIp.clearIntr()
    magicSeqIp.startEngine()
    while True:
        await my_interrupt.wait()
        end_time = time.perf_counter()
        print("interrupt")
        print(f"Elapsed time: {end_time - start_time:.6f} seconds")
        break

In [None]:
loop2 = asyncio.get_event_loop()

In [None]:
task2 = loop2.create_task(startExecAndWait4Intr())
loop2.run_until_complete(task2)

In [None]:
print(magicSeqIp.printDebug())

In [None]:
magicSeqIp.shutdownEngine()

In [None]:
print(magicSeqIp.printDebug())

In [None]:
buf_out.invalidate()
np_parRes = np.array(buf_out, dtype=np.float32)
print(np_parRes)

In [None]:
print(buf_input)

In [None]:
stream1 = overlay.dataMovement.streamDbg_1
print("store element ", stream1.read(0), " load element ",stream1.read(8))

In [None]:
stream2 = overlay.dataMovement.streamDbg_2
print("store element ", stream2.read(0), " load element ", stream2.read(8))

In [None]:
streamStatus = overlay.dataMovement.streamDbg_state
print("stream 1 status: ", streamStatus.read(0)& 0xF, " stream 2 status: ", (streamStatus.read(0)>> 4) & 0xF)

In [None]:
print("dma write status: ", dmaIp.read(0x34))

In [None]:
np.save("zcuOutput1023.npy", np_parRes)