In [1]:
from finn.util.basic import make_build_dir
from finn.util.visualization import showInNetron
import os

build_dir = "Fast-QNN/outputs/txaviour/cnv"

import torch
import onnx
from finn.util.test import get_test_model_trained
from brevitas.export import export_qonnx
from qonnx.util.cleanup import cleanup as qonnx_cleanup
from qonnx.core.modelwrapper import ModelWrapper
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.fold_constants import FoldConstants
from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs

cnv = get_test_model_trained("CNV", 1, 1)
export_onnx_path = build_dir + "/end2end_cnv_w1a1_export.onnx"
export_qonnx(cnv, torch.randn(1, 3, 32, 32), export_onnx_path)
qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
showInNetron(build_dir+"/end2end_cnv_w1a1_export.onnx")

Downloading: "https://github.com/Xilinx/brevitas/releases/download/bnn_pynq-r0/cnv_1w1a-758c8fef.pth" to /tmp/home_dir/.cache/torch/hub/checkpoints/cnv_1w1a-758c8fef.pth


  0%|          | 0.00/5.94M [00:00<?, ?B/s]

Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_w1a1_export.onnx' at http://0.0.0.0:8081


In [2]:
model = ModelWrapper(export_onnx_path)
model = model.transform(ConvertQONNXtoFINN())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [3]:
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(RemoveStaticGraphInputs())
model.save(build_dir + "/end2end_cnv_w1a1_tidy.onnx")

showInNetron(build_dir+"/end2end_cnv_w1a1_tidy.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_w1a1_tidy.onnx' at http://0.0.0.0:8081


In [4]:
from finn.util.pytorch import ToTensor
from qonnx.transformation.merge_onnx_models import MergeONNXModels
from qonnx.core.datatype import DataType

model = ModelWrapper(build_dir+"/end2end_cnv_w1a1_tidy.onnx")
global_inp_name = model.graph.input[0].name
ishape = model.get_tensor_shape(global_inp_name)
# preprocessing: torchvision's ToTensor divides uint8 inputs by 255
totensor_pyt = ToTensor()
chkpt_preproc_name = build_dir+"/end2end_cnv_w1a1_preproc.onnx"
export_qonnx(totensor_pyt, torch.randn(ishape), chkpt_preproc_name)
qonnx_cleanup(chkpt_preproc_name, out_file=chkpt_preproc_name)
showInNetron(chkpt_preproc_name)

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_w1a1_preproc.onnx' at http://0.0.0.0:8081


In [5]:
pre_model = ModelWrapper(chkpt_preproc_name)
pre_model = pre_model.transform(ConvertQONNXtoFINN())

pre_model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [6]:
# join preprocessing and core model
model = model.transform(MergeONNXModels(pre_model))
# add input quantization annotation: UINT8 for all BNN-PYNQ models
global_inp_name = model.graph.input[0].name
model.set_tensor_datatype(global_inp_name, DataType["UINT8"])

model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081




In [7]:
from qonnx.transformation.insert_topk import InsertTopK
from qonnx.transformation.infer_datatypes import InferDataTypes

# postprocessing: insert Top-1 node at the end
model = model.transform(InsertTopK(k=1))
chkpt_name = build_dir+"/end2end_cnv_w1a1_pre_post.onnx"
# tidy-up again
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
model.save(chkpt_name)

showInNetron(build_dir+"/end2end_cnv_w1a1_pre_post.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_w1a1_pre_post.onnx' at http://0.0.0.0:8081


In [8]:
from finn.transformation.streamline import Streamline
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.general import RemoveUnusedTensors

model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_pre_post.onnx")
model = model.transform(MoveScalarLinearPastInvariants())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [9]:
model = model.transform(Streamline())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [10]:
model = model.transform(LowerConvsToMatMul())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [11]:
model = model.transform(MakeMaxPoolNHWC())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [12]:
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [13]:
model = model.transform(ConvertBipolarMatMulToXnorPopcount())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [14]:
model = model.transform(Streamline())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [15]:
# absorb final add-mul nodes into TopK
model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [16]:
model = model.transform(InferDataLayouts())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [17]:
model = model.transform(RemoveUnusedTensors())
model.save(build_dir + "/end2end_cnv_w1a1_streamlined.onnx")
showInNetron(build_dir+"/end2end_cnv_w1a1_streamlined.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_w1a1_streamlined.onnx' at http://0.0.0.0:8081


In [18]:
from finn.util.basic import pynq_part_map
# change this if you have a different PYNQ board, see list above
pynq_board = "Pynq-Z2"
fpga_part = pynq_part_map[pynq_board]
target_clk_ns = 10

import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers
from qonnx.custom_op.registry import getCustomOp
from qonnx.transformation.infer_data_layouts import InferDataLayouts

model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_streamlined.onnx")
model = model.transform(to_hw.InferBinaryMatrixVectorActivation())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [19]:
model = model.transform(to_hw.InferQuantizedMatrixVectorActivation())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [20]:
# TopK to LabelSelect
model = model.transform(to_hw.InferLabelSelectLayer())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [21]:
# input quantization (if any) to standalone thresholding
model = model.transform(to_hw.InferThresholdingLayer())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [22]:
model = model.transform(to_hw.InferConvInpGen())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [23]:
model = model.transform(to_hw.InferStreamingMaxPool())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [24]:
# get rid of Reshape(-1, 1) operation between hw nodes
model = model.transform(RemoveCNVtoFCFlatten())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [25]:
# get rid of Tranpose -> Tranpose identity seq
model = model.transform(absorb.AbsorbConsecutiveTransposes())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [26]:
# infer tensor data layouts
model = model.transform(InferDataLayouts())
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [27]:
parent_model = model.transform(CreateDataflowPartition())
parent_model.save(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx")
showInNetron(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_w1a1_dataflow_parent.onnx' at http://0.0.0.0:8081


In [28]:
sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
sdp_node = getCustomOp(sdp_node)
dataflow_model_filename = sdp_node.get_nodeattr("model")
# save the dataflow partition with a different name for easier access
# and specialize the layers to HLS variants
dataflow_model = ModelWrapper(dataflow_model_filename)
dataflow_model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [29]:
dataflow_model = dataflow_model.transform(SpecializeLayers(fpga_part))
dataflow_model.save(build_dir + "/end2end_cnv_w1a1_dataflow_model.onnx")
showInNetron(build_dir + "/end2end_cnv_w1a1_dataflow_model.onnx")

# showInNetron(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx")

# showInNetron(build_dir + "/end2end_cnv_w1a1_dataflow_model.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_w1a1_dataflow_model.onnx' at http://0.0.0.0:8081


In [30]:
model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_model.onnx")
fc_layers = model.get_nodes_by_op_type("MVAU_hls")
# each tuple is (PE, SIMD, in_fifo_depth) for a layer
folding = [
    (16, 3, [128]),
    (32, 32, [128]),
    (16, 32, [128]),
    (16, 32, [128]),
    (4, 32, [81]),
    (1, 32, [2]),
    (1, 4, [2]),
    (1, 8, [128]),
    (5, 1, [3]),
]
for fcl, (pe, simd, ififodepth) in zip(fc_layers, folding):
    fcl_inst = getCustomOp(fcl)
    fcl_inst.set_nodeattr("PE", pe)
    fcl_inst.set_nodeattr("SIMD", simd)
    fcl_inst.set_nodeattr("inFIFODepths", ififodepth)
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [31]:
# use same SIMD values for the sliding window operators
swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator_rtl")
for i in range(len(swg_layers)):
    swg_inst = getCustomOp(swg_layers[i])
    simd = folding[i][1]
    swg_inst.set_nodeattr("SIMD", simd)
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_print.onnx' at http://0.0.0.0:8081


In [32]:
model = model.transform(GiveUniqueNodeNames())
model.save(build_dir + "/end2end_cnv_w1a1_folded.onnx")

showInNetron(build_dir + "/end2end_cnv_w1a1_folded.onnx")

Stopping http://0.0.0.0:8081
Serving 'Fast-QNN/outputs/txaviour/cnv/end2end_cnv_w1a1_folded.onnx' at http://0.0.0.0:8081


In [None]:

from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
model = ModelWrapper(build_dir+"/end2end_cnv_w1a1_folded.onnx")
model = model.transform(ZynqBuild(platform = pynq_board, period_ns = target_clk_ns))
model.save(build_dir + "/end2end_cnv_print.onnx")
showInNetron(build_dir + "/end2end_cnv_print.onnx")

In [None]:
from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
model = model.transform(MakePYNQDriver("zynq-iodma"))
model.save(build_dir + "/end2end_cnv_w1a1_synth.onnx")
showInNetron(build_dir + "/end2end_cnv_w1a1_synth.onnx")