# CIFAR - 10 Network Synthesis
This code needs to be run within the FINN docker. This code will go through the synthesis of trained networks. 

In [1]:
import torch
## Load the network trainer script to get a reading on the accuracy of the network before the full script is run
from NetworkTrainer import Networktrainer

trinary = False # Change to the type of network you want to synth
folding_amount = 50000 # Manually set the folding amount
if trinary:
    from CIFAR10 import Lenet5v8Tri
    Network = Lenet5v8Tri()
    inputVector = (1,3,32,32)
    network_name = "TrinaryLenet" # Allow for easy changing of the network name
    PATH = f'./Trained Networks/CIFAR-10 10 hours/Lenetv8 Trinary.pth' # Path to the trained network
    Network.load_state_dict(torch.load(PATH)) # THIS WAS COMMENTED OUT
    # Checking to see if the network loaded acutally has been trained
    # Initialise the network trainer
    bnnTrainTime = 0.02 # will leave hard coded in.
    Trainer = Networktrainer(bnnTrainTime)
    Trainer.load_dataset("CIFAR10")
    with open('dumy.txt','w') as dummyLogFile:
        print(f'Network Accuracy: {Trainer.Test_Accuracy(Network,dummyLogFile)}')
    Network.to('cpu')
else:
    # the 8 bit one
    from CIFAR10 import Lenet5v9 #Lenet5v4,Lenet5v8Tri, Lenet5v5withBias, Lenet5v6withoutBias,Lenet5v7withBias,Quant8BitFCMNISTOld,Lenet5v9
    quantisation = 9
    Network = Lenet5v9()
    inputVector = (1,3,32,32)
    network_name = f"Lenet5v9 v10_folding{folding_amount}"  # Allow for easy changing of the network name
    PATH = f'./Trained Networks/CIFAR-10 10 hours/Lenetv9 8-bit.pth' # Path to the trained network
    Network.load_state_dict(torch.load(PATH)) # THIS WAS COMMENTED OUT
    # Checking to see if the network loaded actually has been trained
    # Initialise the network trainer
    bnnTrainTime = 0.02 # will leave hard coded in.
    Trainer = Networktrainer(0.001)
    Trainer.load_dataset("CIFAR10")
    with open('dumy.txt','w') as dummyLogFile:
        print(f'Network Accuracy: {Trainer.Test_Accuracy(Network,dummyLogFile)}')
    Network.to('cpu')

The Amount of time that the BNN will have to train is: 0.001 hours
Training on: cuda:0
Training Class initalised at: 2023-10-07 04:42:34.891804
loading dataset: CIFAR10
Files already downloaded and verified
Files already downloaded and verified
Dataset loaded
Network Accuracy: 48


In [2]:
# Determine the total number of trainable parameters
total_params = sum(p.numel() for p in Network.parameters())
print(f"Number of parameters: {total_params}")


Number of parameters: 4324


## Converting model into Qonnx
Will need to export the model into a qonnx version.

In [3]:
from finn.util.basic import make_build_dir
from finn.util.visualization import showInNetron
import os

import onnx
from finn.util.test import get_test_model_trained
import brevitas.onnx as bo
from qonnx.core.modelwrapper import ModelWrapper
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.fold_constants import FoldConstants
from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs
    
build_dir = f'{os.getcwd()}/TestSynth' # Will allow a record of the Netron Models to be saved

In [4]:
# Saving the network - taken from the demo
bo.export_finn_onnx(Network, inputVector, build_dir + f"/{network_name}_export.onnx")
model = ModelWrapper(build_dir + f"/{network_name}_export.onnx")
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(RemoveStaticGraphInputs())
model.save(build_dir + f"/{network_name}_tidy.onnx")

                i.e. domain=finn to domain=qonnx.custom_op.<general|fpgadataflow|...>


Display the imported qonnx model. No operations have taken place at the moment besides inital set up seen in last block.

In [5]:
showInNetron(build_dir + f"/{network_name}_tidy.onnx")

Serving '/home/julien/finn/notebooks/Thesis/TestSynth/Lenet5v9 v10_folding50000_tidy.onnx' at http://0.0.0.0:8081


## The pre and post processing steps.

In [6]:
# Just loading all the used modules
from finn.util.pytorch import ToTensor
from qonnx.transformation.merge_onnx_models import MergeONNXModels
from qonnx.core.datatype import DataType
from qonnx.transformation.insert_topk import InsertTopK
from qonnx.transformation.infer_datatypes import InferDataTypes

In [7]:
# Although not doing preprocessing will still do the model calls so that if you do use it you can just insert
# your instructions here.
model = ModelWrapper(build_dir+f"/{network_name}_tidy.onnx")
# add input quantization annotation: UINT8 for all BNN-PYNQ models
global_inp_name = model.graph.input[0].name
model.set_tensor_datatype(global_inp_name, DataType["UINT8"])


### Post Processing,
Inserting a topK layer that will allow the classification to pick a label

In [8]:
# postprocessing: insert Top-1 node at the end
model = model.transform(InsertTopK(k=1))

### Tidy up the model again

In [9]:
# tidy-up again
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
model.save(build_dir+f"/{network_name}_pre_post.onnx")
# Show the network again
showInNetron(build_dir+f"/{network_name}_pre_post.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/Lenet5v9 v10_folding50000_pre_post.onnx' at http://0.0.0.0:8081


### Streamlining and lowering layers
This process is highly dependent on the topography of the network. As such it will differ from each type of network

In [10]:
from finn.transformation.streamline import Streamline
import finn
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
import finn.transformation.streamline.absorb as absorb
import finn.transformation.streamline.reorder as reorder
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.general import RemoveUnusedTensors

model = ModelWrapper(build_dir + f"/{network_name}_pre_post.onnx")

model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold())
model = model.transform(MoveScalarLinearPastInvariants())
model = model.transform(Streamline())
model = model.transform(LowerConvsToMatMul())

model = model.transform(MakeMaxPoolNHWC())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
model = model.transform(ConvertBipolarMatMulToXnorPopcount())

model = model.transform(Streamline())
# absorb final add-mul nodes into TopK
model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())
# Gets rid of the repeated Transposes
model = model.transform(absorb.AbsorbConsecutiveTransposes())
model = model.transform(Streamline())

model.save(build_dir+f"/{network_name}_streamlined.onnx")



In [11]:
showInNetron(build_dir+f"/{network_name}_streamlined.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/Lenet5v9 v10_folding50000_streamlined.onnx' at http://0.0.0.0:8081


### Converting the layers into the HW equivalent

It is this stage which will be the hardest. I will need to ensure that each node is able to be converted a compatable version.

In [12]:
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition
)
import finn.builder.build_dataflow 
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from qonnx.custom_op.registry import getCustomOp
from qonnx.transformation.infer_data_layouts import InferDataLayouts

# choose the memory mode for the MVTU units, decoupled or const
mem_mode = "decoupled" # smaller memory foot print. Longer synth times use 'const'

model = ModelWrapper(build_dir + f"/{network_name}_streamlined.onnx")
model = model.transform(to_hls.InferBinaryMatrixVectorActivation(mem_mode))
model = model.transform(to_hls.InferQuantizedMatrixVectorActivation(mem_mode))

# TopK to LabelSelect
model = model.transform(to_hls.InferLabelSelectLayer())
# input quantization (if any) to standalone thresholding
model = model.transform(to_hls.InferThresholdingLayer())
model = model.transform(to_hls.InferConvInpGen())
model = model.transform(to_hls.InferStreamingMaxPool())
# get rid of Reshape(-1, 1) operation between hlslib nodes
model = model.transform(RemoveCNVtoFCFlatten()) # comment out when not using any conv layers
# get rid of Tranpose -> Tranpose identity seq
# Deal with the max pool between the conv and fc layers
model = model.transform(MakeMaxPoolNHWC())
model = model.transform(absorb.AbsorbConsecutiveTransposes())
model = model.transform(to_hls.InferStreamingMaxPool())


# infer tensor data layouts
model = model.transform(InferDataLayouts())
model.save(build_dir+f"/{network_name}_hls.onnx")
showInNetron(build_dir+f"/{network_name}_hls.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/Lenet5v9 v10_folding50000_hls.onnx' at http://0.0.0.0:8081


In [13]:
# Partitioning the network
parent_model = model.transform(CreateDataflowPartition())
parent_model.save(build_dir + f"/{network_name}_dataflow_parent.onnx")
sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
#print(sdp_node)
sdp_node = getCustomOp(sdp_node)
dataflow_model_filename = sdp_node.get_nodeattr("model")
# save the dataflow partition with a different name for easier access
dataflow_model = ModelWrapper(dataflow_model_filename)
dataflow_model.save(build_dir + f"/{network_name}_dataflow_model.onnx")

In [14]:
showInNetron(build_dir + f"/{network_name}_dataflow_model.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/Lenet5v9 v10_folding50000_dataflow_model.onnx' at http://0.0.0.0:8081


In [15]:
showInNetron(build_dir + f"/{network_name}_dataflow_parent.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/Lenet5v9 v10_folding50000_dataflow_parent.onnx' at http://0.0.0.0:8081


# Folding the network
Uses the c++ synthesis tool to determine folding settings. Not optimal but will be procedual for this thesis.

In [16]:
# automatic setting of folding
import finn.transformation.fpgadataflow.set_folding as SetFolding
import finn.transformation.fpgadataflow.set_fifo_depths as InsertFIFO
from finn.util.basic import pynq_part_map
fpga = "Pynq-Z2"
fpgapart = pynq_part_map[fpga]
model = ModelWrapper(build_dir + f"/{network_name}_dataflow_model.onnx")
#model = model.transform(InsertFIFO.RemoveShallowFIFOs())
# model = model.transform(InsertFIFO.InsertFIFO()) # this seems to generate actual hls layers. I am currently of the opinion that these layers actually don't work with the system?
model = model.transform(SetFolding.SetFolding(target_cycles_per_frame=folding_amount))
model.save(build_dir + f"/{network_name}_folded.onnx")

             actual latency!


In [17]:
showInNetron(build_dir + f"/{network_name}_folded.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/Lenet5v9 v10_folding50000_folded.onnx' at http://0.0.0.0:8081


In [19]:
showInNetron(build_dir+f"/{network_name}_folded.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/Lenet5v9 v10_folding50000_folded.onnx' at http://0.0.0.0:8081


In [20]:
from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
import finn.transformation.fpgadataflow.prepare_ip as prepare_ip
import finn.transformation.fpgadataflow.insert_iodma as insert_iodma
pynq_board = "Pynq-Z2"
target_clk_ns = 10

model = ModelWrapper(build_dir+f"/{network_name}_folded.onnx")
#the best effort generator.
model = model.transform(ZynqBuild(platform = pynq_board, period_ns = target_clk_ns)) # handles all the synthesis parts
model.save(build_dir+f"/{network_name}_synthesised.onnx")

                        be created. This may cause RTL simulation issues.
                        
                        be created. This may cause RTL simulation issues.
                        
                You may experience incorrect stitched-IP rtlsim or hardware
                behavior. It is strongly recommended to insert FIFOs prior to
                calling CreateStitchedIP.


In [21]:
showInNetron(build_dir+f"/{network_name}_synthesised.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/Lenet5v9 v10_folding50000_synthesised.onnx' at http://0.0.0.0:8081


In [22]:
model = ModelWrapper(build_dir+f"/{network_name}_synthesised.onnx")
sdp_node_middle = getCustomOp(model.graph.node[1])
postsynth_layers = sdp_node_middle.get_nodeattr("model")

showInNetron(postsynth_layers)

Stopping http://0.0.0.0:8081
Serving '/tmp/finn_dev_julien/dataflow_partition_t7h3emnf/partition_2.onnx' at http://0.0.0.0:8081


### Recover the amount of resources used
This will recover the amount of resources used for each partition after synth is done.

In [23]:
model = ModelWrapper(build_dir+f"/{network_name}_synthesised.onnx")
from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
model = model.transform(AnnotateResources('synth'))
model.save(build_dir+f"/{network_name}_synthesised_resources.onnx")



In [24]:
 showInNetron(build_dir+f"/{network_name}_synthesised_resources.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/Lenet5v9 v10_folding50000_synthesised_resources.onnx' at http://0.0.0.0:8081


## Generate the Pynq driver zip file.

In [25]:
from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
model = model.transform(MakePYNQDriver("zynq-iodma"))
model.save(build_dir + f"/{network_name}_synth.onnx")

In [26]:
from shutil import copy
from distutils.dir_util import copy_tree

# create directory for deployment files
deployment_dir = make_build_dir(prefix="pynq_deployment_")
model.set_metadata_prop("pynq_deployment_dir", deployment_dir)

# get and copy necessary files
# .bit and .hwh file
bitfile = model.get_metadata_prop("bitfile")
hwh_file = model.get_metadata_prop("hw_handoff")
deploy_files = [bitfile, hwh_file]

for dfile in deploy_files:
    if dfile is not None:
        copy(dfile, deployment_dir)

# driver.py and python libraries
pynq_driver_dir = model.get_metadata_prop("pynq_driver_dir")
copy_tree(pynq_driver_dir, deployment_dir)

from shutil import make_archive
make_archive(f'{network_name}', 'zip', deployment_dir)
print(f"done {network_name}")

done Lenet5v9 v10_folding50000


Validating the Accuracy on a PYNQ Board

Ensure that your PYNQ board has a working internet connecting for the next steps, since there is some downloading involved.

To validate the accuracy, we first need to install the dataset-loading Python package to the PYNQ board. This will give us a convenient way of downloading and accessing the MNIST dataset.

We can now use the validate.py script that was generated together with the driver to measure top-1 accuracy on the MNIST dataset.

Important to note: override the provided validate.py script with the custom one provided in the root folder.

Command to execute on PYNQ board:

sudo python3 validate.py --dataset mnist --batchsize 1000
