# MNIST
This will contain the steps to convert a MNIST Network into the pynq-z2 firmware. Needs to be run in the FINN docker

In [1]:
import torch

trinary = False # Select the type of network
if trinary:
    from MNIST import QuantTrinaryFCMNIST
    Network = QuantTrinaryFCMNIST()
    inputVector = (1,1,28,28)
    network_name = "QuantTrinaryFCMNIST_fast" # Allow for easy adaptability of the network
    PATH = f'/home/julien/finn/notebooks/Thesis/Training_log/MNIST NETWORKS2023-08-20/MNISTv2-2bit.pth' # Ensure Correct Path
    Network.load_state_dict(torch.load(PATH)) # THIS WAS COMMENTED OUT
    # Checking to see if the network loaded acutally has been trained
    # Initialise the network trainer
    from NetworkTrainer import Networktrainer
    bnnTrainTime = 0.02 # will leave hard coded in.


    Trainer = Networktrainer(bnnTrainTime)
    Trainer.load_dataset("MNIST")
    with open('dumy.txt','w') as dummyLogFile:
        print(f'Network Accuracy: {Trainer.Test_Accuracy(Network,dummyLogFile)}')
    Network.to('cpu')
else:
    # the 8 bit one
    from MNIST import FC8bit
    Network = FC8bit()
    inputVector = (1,1,28,28)
    network_name = "FC8bitv4_fast" # Allow for easy adaptability of the network
    PATH = f'/home/julien/finn/notebooks/Thesis/Training_log/MNIST NETWORKS2023-08-20/FC8bit.pth' # Ensure Correct path
    Network.load_state_dict(torch.load(PATH)) # THIS WAS COMMENTED OUT
    # Checking to see if the network loaded acutally has been trained
    # Initialise the network trainer
    from NetworkTrainer import Networktrainer
    bnnTrainTime = 0.02 # will leave hard coded in.


    Trainer = Networktrainer(bnnTrainTime)
    Trainer.load_dataset("MNIST")
    with open('dumy.txt','w') as dummyLogFile:
        print(f'Network Accuracy: {Trainer.Test_Accuracy(Network,dummyLogFile)}')
    Network.to('cpu')
    

    

The Amount of time that the BNN will have to train is: 0.02 hours
Training on: cuda:0
Training Class initalised at: 2023-10-06 23:21:07.729684
loading dataset: MNIST
Dataset loaded
Network Accuracy: 90


## Find Number of parameters

In [2]:
total_params = sum(p.numel() for p in Network.parameters())
print(f"Number of parameters: {total_params}")


Number of parameters: 25452


## Converting model into Qonnx
Will need to export the model into a qonnx version.

In [5]:
from finn.util.basic import make_build_dir
from finn.util.visualization import showInNetron
import os

import onnx
from finn.util.test import get_test_model_trained
import brevitas.onnx as bo
from qonnx.core.modelwrapper import ModelWrapper
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.fold_constants import FoldConstants
from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs

build_dir = f'{os.getcwd()}/TestSynth'

In [6]:
# Saving the network - taken from the demo
bo.export_finn_onnx(Network, inputVector, build_dir + f"/{network_name}_export.onnx")
model = ModelWrapper(build_dir + f"/{network_name}_export.onnx")
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(RemoveStaticGraphInputs())
model.save(build_dir + f"/{network_name}_tidy.onnx")

                i.e. domain=finn to domain=qonnx.custom_op.<general|fpgadataflow|...>


Display the imported qonnx model. No operations have taken place at the moment besides inital set up seen in last block.

In [7]:
showInNetron(build_dir + f"/{network_name}_tidy.onnx")

Serving '/home/julien/finn/notebooks/Thesis/TestSynth/FC8bitv4_fast_tidy.onnx' at http://0.0.0.0:8081


## The pre and post processing steps.
### Preprocessing
No Preprocessing done in this work.

In [8]:
# Just loading all the used modules
from finn.util.pytorch import ToTensor
from qonnx.transformation.merge_onnx_models import MergeONNXModels
from qonnx.core.datatype import DataType
from qonnx.transformation.insert_topk import InsertTopK
from qonnx.transformation.infer_datatypes import InferDataTypes

In [9]:
# Although not doing preprocessing will still do the model calls so that if you do use it you can just insert
# your instructions here.
model = ModelWrapper(build_dir+f"/{network_name}_tidy.onnx")
# add input quantization annotation: UINT8 for all BNN-PYNQ models
global_inp_name = model.graph.input[0].name
model.set_tensor_datatype(global_inp_name, DataType["UINT8"])

### Post Processing,
Inserting a topK layer that will allow the classification to pick a label

In [10]:
# postprocessing: insert Top-1 node at the end
model = model.transform(InsertTopK(k=1))

### Tidy up the model again

In [11]:
# tidy-up again
model = model.transform(InferShapes())
model = model.transform(FoldConstants())
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
model = model.transform(InferDataTypes())
model = model.transform(RemoveStaticGraphInputs())
model.save(build_dir+f"/{network_name}_pre_post.onnx")
# Show the network again
showInNetron(build_dir+f"/{network_name}_pre_post.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/FC8bitv4_fast_pre_post.onnx' at http://0.0.0.0:8081


### Streamlining and lowering layers
This process is highly dependent on the topography of the network. As such it will differ from each type of network

In [12]:
from finn.transformation.streamline import Streamline
import finn
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
import finn.transformation.streamline.absorb as absorb
from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants
from qonnx.transformation.infer_data_layouts import InferDataLayouts
from qonnx.transformation.general import RemoveUnusedTensors

model = ModelWrapper(build_dir + f"/{network_name}_pre_post.onnx")
model = model.transform(MoveScalarLinearPastInvariants())
model = model.transform(Streamline())
model = model.transform(LowerConvsToMatMul())
model = model.transform(MakeMaxPoolNHWC())
model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
model = model.transform(ConvertBipolarMatMulToXnorPopcount())
model = model.transform(Streamline())
# absorb final add-mul nodes into TopK
model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
model = model.transform(InferDataLayouts())
model = model.transform(RemoveUnusedTensors())
model.save(build_dir+f"/{network_name}_streamlined.onnx")



In [13]:
showInNetron(build_dir+f"/{network_name}_streamlined.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/FC8bitv4_fast_streamlined.onnx' at http://0.0.0.0:8081


### Converting the layers into the HW equivalent

It is this stage which will be the hardest. I will need to ensure that each node is able to be converted a compatable version.

In [14]:
import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
from finn.transformation.fpgadataflow.create_dataflow_partition import (
    CreateDataflowPartition,
)
import finn.builder.build_dataflow 
from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
from qonnx.custom_op.registry import getCustomOp
from qonnx.transformation.infer_data_layouts import InferDataLayouts

# choose the memory mode for the MVTU units, decoupled or const
mem_mode = "decoupled" # smaller memory foot print. Longer synth times use 'const'

model = ModelWrapper(build_dir + f"/{network_name}_streamlined.onnx")
model = model.transform(to_hls.InferBinaryMatrixVectorActivation(mem_mode))

model = model.transform(to_hls.InferQuantizedMatrixVectorActivation(mem_mode))
# TopK to LabelSelect
model = model.transform(to_hls.InferLabelSelectLayer())
# input quantization (if any) to standalone thresholding
model = model.transform(to_hls.InferThresholdingLayer())
model = model.transform(to_hls.InferConvInpGen())
model = model.transform(to_hls.InferStreamingMaxPool())
# get rid of Reshape(-1, 1) operation between hlslib nodes
#model = model.transform(RemoveCNVtoFCFlatten()) # comment out when not using any conv layers
# get rid of Tranpose -> Tranpose identity seq
model = model.transform(absorb.AbsorbConsecutiveTransposes())
# infer tensor data layouts
model = model.transform(InferDataLayouts())

In [15]:
# Partitioning the network
parent_model = model.transform(CreateDataflowPartition())
parent_model.save(build_dir + f"/{network_name}_dataflow_parent.onnx")
sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
#print(sdp_node)
sdp_node = getCustomOp(sdp_node)
dataflow_model_filename = sdp_node.get_nodeattr("model")
# save the dataflow partition with a different name for easier access
dataflow_model = ModelWrapper(dataflow_model_filename)
dataflow_model.save(build_dir + f"/{network_name}_dataflow_model.onnx")

In [16]:
showInNetron(build_dir + f"/{network_name}_dataflow_model.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/FC8bitv4_fast_dataflow_model.onnx' at http://0.0.0.0:8081


In [17]:
showInNetron(build_dir + f"/{network_name}_dataflow_parent.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/FC8bitv4_fast_dataflow_parent.onnx' at http://0.0.0.0:8081


# Folding the network
Uses the c++ synthesis tool to determine folding settings. Not optimal but will be procedual for this thesis.

In [18]:
# automatic setting of folding
import finn.transformation.fpgadataflow.set_folding as SetFolding
import finn.transformation.fpgadataflow.set_fifo_depths as InsertFIFO
from finn.util.basic import pynq_part_map
fpga = "Pynq-Z2"
fpgapart = pynq_part_map[fpga]
model = ModelWrapper(build_dir + f"/{network_name}_dataflow_model.onnx")
#model = model.transform(InsertFIFO.RemoveShallowFIFOs())
model = model.transform(SetFolding.SetFolding(target_cycles_per_frame=1))
model.save(build_dir + f"/{network_name}_folded.onnx")

                        be created. This may cause RTL simulation issues.
                        
                        be created. This may cause RTL simulation issues.
                        


In [19]:
showInNetron(build_dir + f"/{network_name}_folded.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/FC8bitv4_fast_folded.onnx' at http://0.0.0.0:8081


In [21]:
showInNetron(build_dir+f"/{network_name}_folded.onnx")

Stopping http://0.0.0.0:8081
Serving '/home/julien/finn/notebooks/Thesis/TestSynth/FC8bitv4_fast_folded.onnx' at http://0.0.0.0:8081


## Synthesis the network

In [22]:
from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
import finn.transformation.fpgadataflow.prepare_ip as prepare_ip
import finn.transformation.fpgadataflow.insert_iodma as insert_iodma
pynq_board = "Pynq-Z2"
target_clk_ns = 10

model = ModelWrapper(build_dir+f"/{network_name}_folded.onnx")
model = model.transform(ZynqBuild(platform = pynq_board, period_ns = target_clk_ns)) # handles all the synthesis parts
model.save(build_dir+f"/{network_name}_synthesised.onnx")

                        be created. This may cause RTL simulation issues.
                        
                        be created. This may cause RTL simulation issues.
                        
                You may experience incorrect stitched-IP rtlsim or hardware
                behavior. It is strongly recommended to insert FIFOs prior to
                calling CreateStitchedIP.
ERROR: [Common 17-69] Command failed: Run 'impl_1' failed. Unable to open


Exception: Synthesis failed, no bitfile found. Check logs under /tmp/finn_dev_julien/vivado_zynq_proj_xbzkimxw

In [None]:
showInNetron(build_dir+f"/{network_name}_synthesised.onnx")

### Recover the amount of resources used

In [None]:
model = ModelWrapper(build_dir+f"/{network_name}_synthesised.onnx")
sdp_node_middle = getCustomOp(model.graph.node[1])
postsynth_layers = sdp_node_middle.get_nodeattr("model")

showInNetron(postsynth_layers)

In [None]:
model = ModelWrapper(build_dir+f"/{network_name}_synthesised.onnx")
from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
model = model.transform(AnnotateResources('synth'))
model.save(build_dir+f"/{network_name}_synthesised_resources.onnx")


In [None]:
 showInNetron(build_dir+f"/{network_name}_synthesised_resources.onnx")

## Make the pynq driver zip

In [None]:
from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
model = model.transform(MakePYNQDriver("zynq-iodma"))
model.save(build_dir + f"/{network_name}_synth.onnx")

In [None]:
from shutil import copy
from distutils.dir_util import copy_tree

# create directory for deployment files
deployment_dir = make_build_dir(prefix="pynq_deployment_")
model.set_metadata_prop("pynq_deployment_dir", deployment_dir)

# get and copy necessary files
# .bit and .hwh file
bitfile = model.get_metadata_prop("bitfile")
hwh_file = model.get_metadata_prop("hw_handoff")
deploy_files = [bitfile, hwh_file]

for dfile in deploy_files:
    if dfile is not None:
        copy(dfile, deployment_dir)

# driver.py and python libraries
pynq_driver_dir = model.get_metadata_prop("pynq_driver_dir")
copy_tree(pynq_driver_dir, deployment_dir)

from shutil import make_archive
make_archive(f'{network_name}', 'zip', deployment_dir)
print(f"done {network_name}")

Validating the Accuracy on a PYNQ Board

Ensure that your PYNQ board has a working internet connecting for the next steps, since there is some downloading involved.

We can now use the validate.py script that was generated together with the driver to measure top-1 accuracy on the MNIST dataset.

Important: override the provided FINN validate.py with one provided in root.

Command to execute on PYNQ board:

sudo python3 validate.py --dataset mnist --batchsize 1000

