In [1]:
%%bash

pwd

/tools/FINN2/notebooks/basics


In [2]:
 # Install brevitas first to get the earlier version of numpy
!pip install brevitas
import brevitas.nn as qn

import torch
from torch import nn
from torch.utils.data import DataLoader
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'
IMAGE_DEPTH = 64

PROJECT_PATH = "/tools/FINN2/Project/"
MODEL_PATH = PROJECT_PATH + "Model/NonSquareConv2D_AE_2Stride_OutputQuantised_10_3_2025.model"

[0m

In [3]:
# Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause

from dependencies import value

from brevitas.core.bit_width import BitWidthImplType
from brevitas.core.quant import QuantType
from brevitas.core.restrict_val import FloatToIntImplType
from brevitas.core.restrict_val import RestrictValueType
from brevitas.core.scaling import ScalingImplType
from brevitas.core.zero_point import ZeroZeroPoint
from brevitas.inject import ExtendedInjector
from brevitas.quant.solver import ActQuantSolver
from brevitas.quant.solver import WeightQuantSolver


class CommonQuant(ExtendedInjector):
    bit_width_impl_type = BitWidthImplType.CONST
    scaling_impl_type = ScalingImplType.CONST
    restrict_scaling_type = RestrictValueType.FP
    zero_point_impl = ZeroZeroPoint
    float_to_int_impl_type = FloatToIntImplType.ROUND
    scaling_per_output_channel = False
    narrow_range = True
    signed = True

    @value
    def quant_type(bit_width):
        if bit_width is None:
            return QuantType.FP
        elif bit_width == 1:
            return QuantType.BINARY
        else:
            return QuantType.INT


class CommonWeightQuant(CommonQuant, WeightQuantSolver):
    scaling_const = 1.0


class CommonActQuant(CommonQuant, ActQuantSolver):
    min_val = -1.0
    max_val = 1.0

In [4]:
# Use non square conv2d

BIT_WIDTH = 8

class AE(nn.Module):
    def __init__(self):
        super().__init__()
        self.encode = nn.Sequential(
            qn.QuantIdentity(act_quant=CommonActQuant, bit_width=BIT_WIDTH),
            qn.QuantConv2d(11, 32, (3, 1),
                           stride=(1, 1), padding=(1, 0),
                           weight_bit_width=BIT_WIDTH,
                           weight_quant=CommonWeightQuant,
                           bias=False),
            nn.MaxPool2d((2,1), stride=(2,1)),

            qn.QuantIdentity(act_quant=CommonActQuant, bit_width=BIT_WIDTH),
            qn.QuantConv2d(32, 64, (3, 1),
                           stride=(1, 1), padding=(1, 0),
                           weight_bit_width=BIT_WIDTH,
                           weight_quant=CommonWeightQuant,
                           bias=False),
            nn.MaxPool2d((2,1), stride=(2,1)),
        )
        self.decode = nn.Sequential(
            nn.Upsample(scale_factor=(2,1)),
            qn.QuantIdentity(act_quant=CommonActQuant, bit_width=BIT_WIDTH),
            qn.QuantConv2d(64, 32, (3, 1),
                           stride=1, padding=(1, 0),
                           weight_quant=CommonWeightQuant,
                           weight_bit_width=BIT_WIDTH,
                           bias=False),
            nn.Upsample(scale_factor=(2,1)),
            qn.QuantIdentity(act_quant=CommonActQuant, bit_width=BIT_WIDTH),
            qn.QuantConv2d(32, 11, (3, 1),
                           stride=1, padding=(1, 0),
                           weight_quant=CommonWeightQuant,
                           weight_bit_width=BIT_WIDTH,
                           bias=False),
            qn.QuantIdentity(act_quant=CommonActQuant, bit_width=BIT_WIDTH),
        )

    def forward(self, x):
        x = self.encode(x)
        x = self.decode(x)
        return x

In [5]:
AutoEncoder = AE().to(device)
AutoEncoder.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device(device), weights_only=True))

<All keys matched successfully>

In [None]:
AutoEncoder.state_dict()

In [7]:
_inpt = torch.randn((1, 11, 64, 1))
AutoEncoder(_inpt).shape

  return super(Tensor, self).rename(names)


torch.Size([1, 11, 64, 1])

In [6]:
import torch
import onnx
from finn.util.test import get_test_model_trained
from brevitas.export import export_qonnx
from qonnx.util.cleanup import cleanup as qonnx_cleanup
from qonnx.core.modelwrapper import ModelWrapper
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.fold_constants import FoldConstants
from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs

import onnx
from finn.util.visualization import showSrc, showInNetron

from qonnx.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors


BUILD_PATH = PROJECT_PATH + "Build/"

In [7]:
from brevitas.export import export_qonnx
ONNX_PATH = PROJECT_PATH + "ONNX/"
EXPORT_ONNX_PATH = ONNX_PATH + "AutoEncoder_QONNX.onnx"
INPUT_SHAPE = (1, 11, 64, 1)

export_qonnx(AutoEncoder, torch.randn(INPUT_SHAPE), EXPORT_ONNX_PATH);
qonnx_cleanup(EXPORT_ONNX_PATH, out_file=EXPORT_ONNX_PATH)

In [8]:
showInNetron(EXPORT_ONNX_PATH)

Serving '/tools/FINN2/Project/ONNX/AutoEncoder_QONNX.onnx' at http://0.0.0.0:8081


In [9]:
model = ModelWrapper(EXPORT_ONNX_PATH)

model = model.transform(ConvertQONNXtoFINN())
model = model.transform(InferShapes())

# print(model.check_all_tensor_shapes_specified(fix_missing_init_shape=True))
# print(model.check_all_tensor_shapes_specified())
# print(model.check_compatibility())

model.save(BUILD_PATH + "FINN_RAW.onnx")

In [10]:
from qonnx.transformation.general import RemoveUnusedTensors
# model = model.transform(RemoveUnusedTensors())

# model = model.transform(RemoveUnusedTensors())

model = model.transform(RemoveUnusedTensors())

# model = model.transform(GiveReadableTensorNames())

In [11]:
showInNetron(BUILD_PATH + "FINN_RAW.onnx")

Stopping http://0.0.0.0:8081
Serving '/tools/FINN2/Project/Build/FINN_RAW.onnx' at http://0.0.0.0:8081


In [12]:
model = ModelWrapper(BUILD_PATH + "FINN_RAW.onnx")

# model = model.transform(Change3DTo4DTensors()) # Pre streamline for the conv1ds - https://github.com/Xilinx/finn/discussions/418
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(RemoveStaticGraphInputs())
model.save(BUILD_PATH + "FINN_1_tidy.onnx")

In [13]:
showInNetron(BUILD_PATH + "FINN_1_tidy.onnx")

Stopping http://0.0.0.0:8081
Serving '/tools/FINN2/Project/Build/FINN_1_tidy.onnx' at http://0.0.0.0:8081


In [14]:
from finn.transformation.streamline import Streamline
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.general import RemoveUnusedTensors


model = ModelWrapper(BUILD_PATH + "FINN_1_tidy.onnx")

# model = model.transform(MoveScalarLinearPastInvariants())
model = model.transform(Streamline())
model = model.transform(LowerConvsToMatMul())
model = model.transform(MakeMaxPoolNHWC())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())

model = model.transform(absorb.AbsorbConsecutiveTransposes())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
model = model.transform(absorb.AbsorbMulIntoMultiThreshold())
model = model.transform(Streamline())

model = model.transform(absorb.AbsorbTransposeIntoResize())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
model = model.transform(absorb.AbsorbConsecutiveTransposes())


# absorb final add-mul nodes into TopK
# model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())

model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())

model.save(BUILD_PATH + "FINN_2_Streamlined.onnx")



In [15]:
showInNetron(BUILD_PATH + "FINN_2_Streamlined.onnx")

Stopping http://0.0.0.0:8081
Serving '/tools/FINN2/Project/Build/FINN_2_Streamlined.onnx' at http://0.0.0.0:8081


## *Conversion To Hardware*

In [16]:
from finn.util.basic import pynq_part_map
# Using the Pynq-Z2 board
pynq_board = "Pynq-Z2"
fpga_part = pynq_part_map[pynq_board]
target_clk_ns = 10                           # Changed from 10 to 100

import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers
from qonnx.custom_op.registry import getCustomOp
from qonnx.transformation.infer_data_layouts import InferDataLayouts
# from qonnx.transformation.resize_conv_to_deconv import ResizeConvolutionToDeconvolution
# from finn.transformation.streamline.absorb import AbsorbTransposeIntoResize
from finn.transformation.streamline.reorder import MoveTransposePastScalarMul

from finn.transformation.streamline.collapse_repeated import CollapseRepeatedMul, CollapseRepeatedOp

model = ModelWrapper(BUILD_PATH + "FINN_2_Streamlined.onnx")

model = model.transform(to_hw.InferQuantizedMatrixVectorActivation())
# TopK to LabelSelect
# model = model.transform(to_hw.InferLabelSelectLayer())
# input quantization (if any) to standalone thresholding
model = model.transform(to_hw.InferThresholdingLayer())

model = model.transform(to_hw.InferConvInpGen())
model = model.transform(to_hw.InferStreamingMaxPool())
model = model.transform(to_hw.InferUpsample())           # <- Non square 2D upsampling not supported
# get rid of Reshape(-1, 1) operation between hw nodes
# model = model.transform(RemoveCNVtoFCFlatten())
# model = model.transform(ResizeConvolutionToDeconvolution())

# get rid of Tranpose -> Tranpose identity seq
model = model.transform(absorb.AbsorbConsecutiveTransposes())

model = model.transform(MoveTransposePastScalarMul())
model = model.transform(absorb.AbsorbMulIntoMultiThreshold())

# model = model.transform(CollapseRepeatedOp())
model = model.transform(Streamline())


# infer tensor data layouts
model = model.transform(InferDataLayouts())


# Look at before partition
# model.save(BUILD_PATH + "temp.onnx")
# showInNetron(BUILD_PATH + "temp.onnx")


parent_model = model.transform(CreateDataflowPartition())
parent_model.save(BUILD_PATH + "FINN_3_ConversionToHardware_dataflow_parent.onnx")

sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
sdp_node = getCustomOp(sdp_node)
dataflow_model_filename = sdp_node.get_nodeattr("model")
# save the dataflow partition with a different name for easier access
# and specialize the layers to HLS variants
dataflow_model = ModelWrapper(dataflow_model_filename)

# node_types = ["Thresholding", "FMPadding", "ConvolutionInputGenerator", "MVAU", "StreamingMaxPool", "UpsampleNearestNeighbour"]
# for node_type in node_types:
#     nodes = dataflow_model.get_nodes_by_op_type(node_type)
#     for node in nodes:
#         node_inst = getCustomOp(node)
#         node_inst.set_nodeattr("preferred_impl_style", "hls")

dataflow_model = dataflow_model.transform(SpecializeLayers(fpga_part))
dataflow_model.save(BUILD_PATH + "/FINN_3_ConversionToHardware_dataflow_model.onnx")


In [17]:
showInNetron(BUILD_PATH + "FINN_3_ConversionToHardware_dataflow_parent.onnx")

Stopping http://0.0.0.0:8081
Serving '/tools/FINN2/Project/Build/FINN_3_ConversionToHardware_dataflow_parent.onnx' at http://0.0.0.0:8081


In [18]:
showInNetron(BUILD_PATH + "/FINN_3_ConversionToHardware_dataflow_model.onnx")

Stopping http://0.0.0.0:8081
Serving '/tools/FINN2/Project/Build//FINN_3_ConversionToHardware_dataflow_model.onnx' at http://0.0.0.0:8081


In [19]:
from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation

In [20]:
model = ModelWrapper(BUILD_PATH + "FINN_3_ConversionToHardware_dataflow_model.onnx")

fc_layers = dataflow_model.get_nodes_by_op_type("MVAU_rtl")

# This caused overusage of hardware in PYNQ compilation stage
# PE_SIMD_inFIFO = [
#     (32,11,[128]),
#     (64,32,[128]),
#     (32,64,[128]),
#     (11,32,[128])
# ]

PE_SIMD_inFIFO = [
    (4,1,[8]),
    (8,4,[8]), # We Want the most PE in the largest layer which is this one (64 filters) 
    (4,8,[8]),
    (1,4,[8])
]

for layer, (PE, SIMD, inFIFO) in zip(fc_layers, PE_SIMD_inFIFO):
    fcl_inst = getCustomOp(layer)

    fcl_inst.set_nodeattr("PE", PE)
    fcl_inst.set_nodeattr("SIMD", SIMD)
    fcl_inst.set_nodeattr("inFIFODepths", inFIFO)

    

# Need to set the output (PE) of the prev layer to the first MVAU to that MVAU's SIMD
swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator_rtl")
for i in range(len(swg_layers)):
    swg_inst = getCustomOp(swg_layers[i])
    SIMD = PE_SIMD_inFIFO[i][1]
    swg_inst.set_nodeattr("SIMD", SIMD)

dataflow_model.save(BUILD_PATH + "/FINN_3_Post_PE_SIMD.onnx")

In [21]:
showInNetron(BUILD_PATH + "/FINN_3_Post_PE_SIMD.onnx")

Stopping http://0.0.0.0:8081
Serving '/tools/FINN2/Project/Build//FINN_3_Post_PE_SIMD.onnx' at http://0.0.0.0:8081


In [22]:
from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
model = ModelWrapper(BUILD_PATH + "/FINN_3_Post_PE_SIMD.onnx")
model = model.transform(ZynqBuild(platform = pynq_board, period_ns = target_clk_ns))

                        be created. This may cause RTL simulation issues.
                        
                        be created. This may cause RTL simulation issues.
                        
                You may experience incorrect stitched-IP rtlsim or hardware
                behavior. It is strongly recommended to insert FIFOs prior to
                calling CreateStitchedIP.


In [23]:
model.save(BUILD_PATH + "/FINN_3_POST_ZYNQ_BUILD.onnx")

In [24]:
showInNetron(BUILD_PATH + "/FINN_3_POST_ZYNQ_BUILD.onnx")

Stopping http://0.0.0.0:8081
Serving '/tools/FINN2/Project/Build//FINN_3_POST_ZYNQ_BUILD.onnx' at http://0.0.0.0:8081


In [None]:
import pprint
from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
from finn.analysis.fpgadataflow.op_and_param_counts import op_and_param_counts
from finn.analysis.fpgadataflow.post_synth_res import post_synth_res
from finn.analysis.fpgadataflow.res_estimation import res_estimation_complete

pp = pprint.PrettyPrinter(depth=6)
# pp.pprint(mydict)

model = ModelWrapper(getCustomOp(ModelWrapper(BUILD_PATH + "/FINN_3_POST_ZYNQ_BUILD.onnx").graph.node[1]).get_nodeattr("model"))

print("Dataflow Performance: ")
pp.pprint(model.analysis(dataflow_performance))
print("")
# print("HLS Resource Estimation")
# pp.pprint(model.analysis(hls_synth_res_estimation))
print("")
print("OP and Param Counts:")
pp.pprint(model.analysis(op_and_param_counts))
print("")
print("Estimates required resources for model")
pp.pprint(res_estimation_complete(model, pynq_board))



In [25]:
from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
model = ModelWrapper(BUILD_PATH + "FINN_3_POST_ZYNQ_BUILD.onnx")
model = model.transform(MakePYNQDriver("zynq-iodma"))

In [26]:
model.save(BUILD_PATH + "/FINN_3_PYNQ_DRIVER.onnx")

In [27]:
showInNetron(BUILD_PATH + "/FINN_3_PYNQ_DRIVER.onnx")

Stopping http://0.0.0.0:8081
Serving '/tools/FINN2/Project/Build//FINN_3_PYNQ_DRIVER.onnx' at http://0.0.0.0:8081


In [28]:
model = ModelWrapper(BUILD_PATH + "/FINN_3_PYNQ_DRIVER.onnx")
sdp_node_middle = getCustomOp(model.graph.node[1])
postsynth_layers = sdp_node_middle.get_nodeattr("model")

showInNetron(postsynth_layers)

Stopping http://0.0.0.0:8081
Serving '/tools/FINN2/Project/FINN_Build/dataflow_partition_9eyy7xa5/partition_2.onnx' at http://0.0.0.0:8081


In [95]:
model = ModelWrapper(postsynth_layers)
model.model.metadata_props

[key: "floorplan_json"
value: "/tools/FINN2/Project/FINN_Build/vitis_floorplan_vuclmw5u/floorplan.json"
, key: "vivado_stitch_proj"
value: "/tools/FINN2/Project/FINN_Build/vivado_stitch_proj_mwi_88pp"
, key: "clk_ns"
value: "10"
, key: "wrapper_filename"
value: "/tools/FINN2/Project/FINN_Build/vivado_stitch_proj_mwi_88pp/finn_vivado_stitch_proj.gen/sources_1/bd/StreamingDataflowPartition_1/hdl/StreamingDataflowPartition_1_wrapper.v"
, key: "vivado_stitch_vlnv"
value: "xilinx_finn:finn:StreamingDataflowPartition_1:1.0"
, key: "vivado_stitch_ifnames"
value: "{\"clk\": [\"ap_clk\"], \"rst\": [\"ap_rst_n\"], \"s_axis\": [[\"s_axis_0\", 88]], \"m_axis\": [[\"m_axis_0\", 8]], \"aximm\": [], \"axilite\": []}"
, key: "platform"
value: "zynq-iodma"
]

In [96]:
model = ModelWrapper(BUILD_PATH + "/FINN_3_PYNQ_DRIVER.onnx")
model.model.metadata_props

[key: "floorplan_json"
value: "/tools/FINN2/Project/FINN_Build/vitis_floorplan_vuclmw5u/floorplan.json"
, key: "vivado_pynq_proj"
value: "/tools/FINN2/Project/FINN_Build/vivado_zynq_proj_jm3rm6rv"
, key: "bitfile"
value: "/tools/FINN2/Project/FINN_Build/vivado_zynq_proj_jm3rm6rv/resizer.bit"
, key: "hw_handoff"
value: "/tools/FINN2/Project/FINN_Build/vivado_zynq_proj_jm3rm6rv/resizer.hwh"
, key: "vivado_synth_rpt"
value: "/tools/FINN2/Project/FINN_Build/vivado_zynq_proj_jm3rm6rv/synth_report.xml"
, key: "platform"
value: "zynq-iodma"
, key: "pynq_driver_dir"
value: "/tools/FINN2/Project/FINN_Build/pynq_driver_dhs854le"
]

## PYNQ Deployment

In [29]:
from shutil import copy
from distutils.dir_util import copy_tree
from finn.util.basic import make_build_dir


# create directory for deployment files
deployment_dir = make_build_dir(prefix="pynq_deployment_")
model.set_metadata_prop("pynq_deployment_dir", deployment_dir)

# get and copy necessary files
# .bit and .hwh file
bitfile = model.get_metadata_prop("bitfile")
hwh_file = model.get_metadata_prop("hw_handoff")
deploy_files = [bitfile, hwh_file]

for dfile in deploy_files:
    if dfile is not None:
        copy(dfile, deployment_dir)

# driver.py and python libraries
pynq_driver_dir = model.get_metadata_prop("pynq_driver_dir")
copy_tree(pynq_driver_dir, deployment_dir)

['/tools/FINN2/Project/FINN_Build/pynq_deployment_189aqoct/driver_base.py',
 '/tools/FINN2/Project/FINN_Build/pynq_deployment_189aqoct/validate.py',
 '/tools/FINN2/Project/FINN_Build/pynq_deployment_189aqoct/qonnx/util/basic.py',
 '/tools/FINN2/Project/FINN_Build/pynq_deployment_189aqoct/qonnx/util/__init__.py',
 '/tools/FINN2/Project/FINN_Build/pynq_deployment_189aqoct/qonnx/core/datatype.py',
 '/tools/FINN2/Project/FINN_Build/pynq_deployment_189aqoct/qonnx/core/__init__.py',
 '/tools/FINN2/Project/FINN_Build/pynq_deployment_189aqoct/driver.py',
 '/tools/FINN2/Project/FINN_Build/pynq_deployment_189aqoct/finn/util/data_packing.py',
 '/tools/FINN2/Project/FINN_Build/pynq_deployment_189aqoct/finn/util/__init__.py']

In [30]:
import numpy as np

x = np.random.randn(11, 64, 1).transpose(1, 2, 0)


model = ModelWrapper(BUILD_PATH + "/FINN_3_PYNQ_DRIVER.onnx")
iname = model.graph.input[0].name
oname = parent_model.graph.output[0].name
ishape = model.get_tensor_shape(iname)
print("Expected network input shape is " + str(ishape))
np.save(deployment_dir + "/input.npy", x.reshape(ishape).astype('int8'))

Expected network input shape is [1, 64, 1, 11]


In [31]:
! echo {deployment_dir}

/tools/FINN2/Project/FINN_Build/pynq_deployment_189aqoct


In [33]:
! ls {deployment_dir}

driver_base.py	finn	   qonnx	resizer.hwh	 validate.py
driver.py	input.npy  resizer.bit	runtime_weights


In [34]:
from shutil import make_archive
make_archive('deploy-on-pynq-AutoEncoder', 'zip', deployment_dir)

'/tools/FINN2/notebooks/basics/deploy-on-pynq-AutoEncoder.zip'