In [None]:
#Setup to allow resume at any step
#Note: Copy tmp folder to retain data
#To start running from any cells, run this setup cell first
#Necessary
import inspect
import netron
from finn.util.basic import make_build_dir
from IPython.display import IFrame
import onnx
import brevitas.onnx as bo
from finn.util.basic import pynq_part_map
from pkgutil import get_data
import onnx.numpy_helper as nph
import matplotlib.pyplot as plt
import numpy as np
from finn.core.onnx_exec import execute_onnx

#API for model tidy up and HLS synthesis
from finn.core.modelwrapper import ModelWrapper
from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
from finn.transformation.infer_shapes import InferShapes
from finn.transformation.infer_datatypes import InferDataTypes
from finn.transformation.fold_constants import FoldConstants
from finn.transformation.streamline import Streamline
from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
from finn.transformation.fpgadataflow.create_dataflow_partition import CreateDataflowPartition
from finn.custom_op.registry import getCustomOp
from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen
from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen
from finn.transformation.fpgadataflow.codegen_ipstitch import CodeGen_ipstitch
from finn.transformation.fpgadataflow.replace_verilog_relpaths import ReplaceVerilogRelPaths
from finn.transformation.fpgadataflow.make_pynq_proj import MakePYNQProject
from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject
from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ

#For pretrained network
from pkgutil import get_data
from finn.util.test import get_test_model_trained

#Code inspecction for debugging
def showSrc(what):
    print("".join(inspect.getsourcelines(what)[0]))
    
#Vizulization for design recheck
def showInNetron(model_filename):
    netron.start(model_filename, port=8081, host="0.0.0.0")
    return IFrame(src="http://0.0.0.0:8081/", width="100%", height=400)

#All HLS files location: Workspace default is mounted to /tmp/
build_dir = "/workspace/finn"
model_name = build_dir + "/sfc_w1_a1"
model_extension = ".onnx"

#Board definition
pynq_board = "Ultra96"
fpga_part = pynq_part_map[pynq_board]
target_clk_ns = 5
ip = "192.168.3.1"
username = "xilinx"
password = "xilinx"
target_dir = "/home/xilinx/dance_dance"

#all model name use in the process
model_name_original = model_name + model_extension
model_name_tidy = model_name + "_tidy" +  model_extension
model_name_streamlined = model_name + "_streamlined" +  model_extension
model_name_hls_ready = model_name + "_hls_ready" +  model_extension
model_name_hls_layers = model_name + "_hls_layers" +  model_extension
model_name_data_flow = model_name + "_dataflow_parent" +  model_extension 
model_name_set_folding = model_name + "_set_folding" + model_extension
model_name_ipgen = model_name + "_ipgen" + model_extension
model_name_ipstitch = model_name + "_ipstitch" + model_extension
model_name_pynq_proj = model_name + "_pynq_project" + model_extension
model_name_post_synthesis = model_name + "_post_synthesis" + model_extension
model_name_deploy = model_name + "_deploy" + model_extension
model_name_deploy_integrated = model_name + "_deploy_integrated" + model_extension

In [None]:
#Load model and weight, can skip with mode already export as onnx
tfc = get_test_model_trained("TFC", 1, 1)
bo.export_finn_onnx(tfc, (1, 1, 28, 28), model_name + model_extension)
showInNetron(build_dir+"/tfc_w1_a1.onnx")

In [None]:
#Model tidy up

model = ModelWrapper(model_name_original)
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())

model.save(model_name_tidy)
showInNetron(model_name_tidy)

In [None]:
#Model streamline
model = ModelWrapper(model_name_tidy)
model = model.transform(Streamline())
model.save(model_name_streamlined)
showInNetron(model_name_streamlined)

In [None]:
#Model HLS conversion prepare, need to modified for 8 bit quantization, current 1 bit polar
model = ModelWrapper(model_name_streamlined)
model = model.transform(ConvertBipolarMatMulToXnorPopcount())
model = model.transform(absorb.AbsorbAddIntoMultiThreshold())
model = model.transform(absorb.AbsorbMulIntoMultiThreshold())
model = model.transform(RoundAndClipThresholds())

model.save(model_name_hls_ready)
showInNetron(model_name_hls_ready)

In [None]:
#Model HLS layers conversion, need to fix depend on quantization type
model = ModelWrapper(model_name_hls_ready)
model = model.transform(to_hls.InferBinaryStreamingFCLayer())
model.save(model_name_hls_layers)
showInNetron(model_name_hls_layers)

In [None]:
#Model create dataflow partition 
model = ModelWrapper(model_name_hls_layers)
parent_model = model.transform(CreateDataflowPartition())
parent_model.save(model_name_data_flow)
showInNetron(model_name_data_flow)

#To show child dataflow layer that are wrap together in the parent graph
#Use to check if process finished correctly
sdp_node = getCustomOp(parent_model.graph.node[2]) #need to change here depend on the graph
dataflow_model_filename = sdp_node.get_nodeattr("model")
showInNetron(dataflow_model_filename)

In [None]:
#Stream layers folding
parent_model = ModelWrapper(model_name_data_flow)
sdp_node = getCustomOp(parent_model.graph.node[2]) #need to change here depend on the graph
dataflow_model_filename = sdp_node.get_nodeattr("model")

model = ModelWrapper(dataflow_model_filename)

#Extract all StreamingFCLayer_Batch to assign folding, in the testing model is 4 layers
fc0 = model.graph.node[0]
fc1 = model.graph.node[1]
fc2 = model.graph.node[2]
fc3 = model.graph.node[3]

fc0w = getCustomOp(fc0)
fc1w = getCustomOp(fc1)
fc2w = getCustomOp(fc2)
fc3w = getCustomOp(fc3)

#Set depend on paper experiment result and actual network
fc0w.set_nodeattr("inFIFODepth", 50)
fc0w.set_nodeattr("SIMD", 16)
fc0w.set_nodeattr("PE", 16)
fc0w.set_nodeattr("outFIFODepth", 4)

fc1w.set_nodeattr("SIMD", 16)
fc1w.set_nodeattr("PE", 16)
fc1w.set_nodeattr("outFIFODepth", 4)

fc2w.set_nodeattr("SIMD", 16)
fc2w.set_nodeattr("PE", 16)
fc2w.set_nodeattr("outFIFODepth", 4)

fc3w.set_nodeattr("SIMD", 16)
fc3w.set_nodeattr("PE", 10)
fc3w.set_nodeattr("outFIFODepth", 50)

model = model.transform(InsertTLastMarker())
model.save(model_name_set_folding)
showInNetron(model_name_set_folding)

In [None]:
#HLS code generation and synthesis
model = ModelWrapper(model_name_set_folding)
model = model.transform(GiveUniqueNodeNames())
model = model.transform(CodeGen_ipgen(fpga_part, target_clk_ns))

model = model.transform(HLSSynth_IPGen())
model.save(model_name_ipgen)
showInNetron(model_name_ipgen)

#For debugging
# fc0w = getCustomOp(model.graph.node[0])
# code_gen_dir = fc0w.get_nodeattr("code_gen_dir_ipgen")
# !ls {code_gen_dir}

# shell_script = code_gen_dir + "/ipgen.sh"
# !cat {shell_script}

# tcl_script = code_gen_dir + "/hls_syn_StreamingFCLayer_Batch_0.tcl"
# !cat {tcl_script}

In [None]:
#IP stitching, a.k.a block design. This will create a vivado project with block condition that can be view and modified
model = ModelWrapper(model_name_ipgen)
model = model.transform(ReplaceVerilogRelPaths())
model = model.transform(CodeGen_ipstitch(fpga_part))

#model.model.metadata_props
#model.get_metadata_prop("vivado_stitch_proj")

model.save(model_name_ipstitch)

In [None]:
#Create PYNQ overlay
model = ModelWrapper(model_name_ipstitch)
model = model.transform(MakePYNQProject(pynq_board))

#model.model.metadata_props
#! ls {model.get_metadata_prop("vivado_pynq_proj")}

model.save(model_name_pynq_proj)

In [None]:
#Bitstream synthesis, really really long
model = ModelWrapper(model_name_pynq_proj)
model = model.transform(SynthPYNQProject())

#model.model.metadata_props

model.save(model_name_post_synthesis)

In [None]:
#Driver generation to load bitfile to Ultra96 and deploy to the board
model = ModelWrapper(build_dir + "/tfc_w1_a1_post_synthesis.onnx")
model = model.transform(MakePYNQDriver())

# driver_dir = model.get_metadata_prop("pynq_driver_dir")
# ! cat {driver_dir}/driver.py

model = model.transform(DeployToPYNQ(ip, username, password, target_dir))
model.save(model_name_deploy)

#model.model.metadata_props
#! sshpass -p {password} ssh {username}@{ip} 'ls -l {target_dir}/*'


In [None]:
#Load pre_processed data here for testing, will use Kynwhye scripts
raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
x = nph.to_array(onnx.load_tensor_from_string(raw_i))
plt.imshow(x.reshape(28,28), cmap='gray')

In [None]:
#Replace StreamBatchLayer_wrapper with the deploy model
parent_model = ModelWrapper(model_name_data_flow)
sdp_node = parent_model.graph.node[2]
remote_exec_model = model_name_deploy
getCustomOp(sdp_node).set_nodeattr("model", remote_exec_model)
parent_model.save(model_name_deploy_integrated)

iname = parent_model.graph.input[0].name
oname = parent_model.graph.output[0].name
ishape = parent_model.get_tensor_shape(iname)
input_dict = {iname: x.reshape(ishape)}
ret = execute_onnx(parent_model, input_dict, True)

#Output function for classification task
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

logits = ret[oname].flatten()
prob = softmax(logits)

plt.bar(np.arange(10), prob)