In [2]:
import torch.nn as nn
import torch.nn.functional as F
import brevitas.nn as qnn
import numpy as np

In [3]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
import os
import shutil

model_file = "ecgnet_w4a4_export.onnx"

estimates_output_dir = "output_brevitas_estimates"

#Delete previous run results if exist
if os.path.exists(estimates_output_dir):
    shutil.rmtree(estimates_output_dir)
    print("Previous run results deleted!")


cfg_estimates = build.DataflowBuildConfig(
    output_dir          = estimates_output_dir,
    mvau_wwidth_max     = 80,
    target_fps          = 10000,
    synth_clk_period_ns = 10.0,
    fpga_part           = "xc7z020clg400-1",
    board               = "Pynq-Z2",
    steps               = build_cfg.estimate_only_dataflow_steps,
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
    ]
)

In [4]:
%%time
build.build_dataflow_cfg(model_file, cfg_estimates)

Building dataflow accelerator from Brevitas_w4a4_export.onnx
Intermediate outputs will be generated in /home/rdl-ws2/tmp_finn
Final outputs will be generated in output_brevitas_estimates
Build log is at output_brevitas_estimates/build_dataflow.log
Running step: step_qonnx_to_finn [1/10]
Running step: step_tidy_up [2/10]
Running step: step_streamline [3/10]
Running step: step_convert_to_hw [4/10]
Running step: step_create_dataflow_partition [5/10]
Running step: step_specialize_layers [6/10]
Running step: step_target_fps_parallelization [7/10]
Running step: step_apply_folding_config [8/10]
Running step: step_minimize_bit_width [9/10]
Running step: step_generate_estimate_reports [10/10]
Completed successfully
CPU times: user 900 ms, sys: 1.79 ms, total: 902 ms
Wall time: 902 ms


0

In [5]:
import json
def read_json_dict(filename):
    with open(filename, "r") as f:
        ret = json.load(f)
    return ret

In [6]:
read_json_dict(estimates_output_dir + "/report/estimate_layer_resources.json")

{'Thresholding_rtl_0': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 72.0,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'ConvolutionInputGenerator_rtl_0': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 308,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'MVAU_hls_0': {'BRAM_18K': 1,
  'BRAM_efficiency': 0.017361111111111112,
  'LUT': 367,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'ConvolutionInputGenerator_rtl_1': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 324,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'MVAU_hls_1': {'BRAM_18K': 5,
  'BRAM_efficiency': 0.1111111111111111,
  'LUT': 2945,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'StreamingMaxPool_hls_0': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 0,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'ConvolutionInputGenerator_rtl_2': {'BRAM_18K': 0,
  'BRAM_efficiency': 1,
  'LUT': 324,
  'URAM': 0,
  'URAM_efficiency': 1,
  'DSP': 0},
 'MVAU_hls_2': {'BRAM_18K': 1,
  'BRAM_efficienc

In [7]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
import os
import shutil

model_file = "Brevitas_w4a4_export.onnx"

final_output_dir = "output_brevitas_final"

#Delete previous run results if exist
if os.path.exists(final_output_dir):
    shutil.rmtree(final_output_dir)
    print("Previous run results deleted!")


cfg = build.DataflowBuildConfig(
    output_dir          = final_output_dir,
    mvau_wwidth_max     = 80,
    target_fps          = 10000,
    synth_clk_period_ns = 10.0,
    board               = "Pynq-Z2",
    shell_flow_type     = build_cfg.ShellFlowType.VIVADO_ZYNQ,
    generate_outputs=[
        build_cfg.DataflowOutputType.BITFILE,
        build_cfg.DataflowOutputType.PYNQ_DRIVER,
        build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
    ]
)

Previous run results deleted!


In [8]:
%%time
build.build_dataflow_cfg(model_file, cfg)

Building dataflow accelerator from Brevitas_w4a4_export.onnx
Intermediate outputs will be generated in /home/rdl-ws2/tmp_finn
Final outputs will be generated in output_brevitas_final
Build log is at output_brevitas_final/build_dataflow.log
Running step: step_qonnx_to_finn [1/19]
Running step: step_tidy_up [2/19]
Running step: step_streamline [3/19]
Running step: step_convert_to_hw [4/19]
Running step: step_create_dataflow_partition [5/19]
Running step: step_specialize_layers [6/19]
Running step: step_target_fps_parallelization [7/19]
Running step: step_apply_folding_config [8/19]
Running step: step_minimize_bit_width [9/19]
Running step: step_generate_estimate_reports [10/19]
Running step: step_hw_codegen [11/19]
Running step: step_hw_ipgen [12/19]
Running step: step_set_fifo_depths [13/19]
Running step: step_create_stitched_ip [14/19]
Running step: step_measure_rtlsim_performance [15/19]
Running step: step_out_of_context_synthesis [16/19]
Running step: step_synthesize_bitfile [17/19]


0

In [10]:
read_json_dict(final_output_dir + "/report/estimate_layer_resources_hls.json")

{'MVAU_hls_0': {'BRAM_18K': '0',
  'FF': '3434',
  'LUT': '7588',
  'DSP48E': 0,
  'URAM': '0',
  'DSP': '1'},
 'MVAU_hls_1': {'BRAM_18K': '0',
  'FF': '5761',
  'LUT': '10300',
  'DSP48E': 0,
  'URAM': '0',
  'DSP': '25'},
 'StreamingMaxPool_hls_0': {'BRAM_18K': '0',
  'FF': '97',
  'LUT': '474',
  'DSP48E': 0,
  'URAM': '0',
  'DSP': '0'},
 'MVAU_hls_2': {'BRAM_18K': '0',
  'FF': '4846',
  'LUT': '8209',
  'DSP48E': 0,
  'URAM': '0',
  'DSP': '3'},
 'MVAU_hls_3': {'BRAM_18K': '0',
  'FF': '3955',
  'LUT': '7783',
  'DSP48E': 0,
  'URAM': '0',
  'DSP': '1'},
 'StreamingMaxPool_hls_1': {'BRAM_18K': '0',
  'FF': '90',
  'LUT': '466',
  'DSP48E': 0,
  'URAM': '0',
  'DSP': '0'},
 'MVAU_hls_4': {'BRAM_18K': '0',
  'FF': '2008',
  'LUT': '10651',
  'DSP48E': 0,
  'URAM': '0',
  'DSP': '1'}}