In [18]:
from finn.util.basic import make_build_dir
from finn.util.visualization import showInNetron
import os
    
build_dir = os.environ["FINN_BUILD_DIR"]

In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import brevitas.nn as qnn
from brevitas.quant.scaled_int import Int8ActPerTensorFloat

class QuantizedCNN(nn.Module):
    def __init__(self):
        super(QuantizedCNN, self).__init__()
        self.conv1 = qnn.QuantConv2d(4, 16, kernel_size=3, stride=2, padding=1, weight_bit_width=8, input_quant=Int8ActPerTensorFloat, output_quant=Int8ActPerTensorFloat)
        self.batch1 = nn.BatchNorm2d(16)
        self.relu1 = qnn.QuantReLU(return_quant_tensor=True, scaling_stats_permute_dims=(3,2,1,0))   
        
        self.conv2 = qnn.QuantConv2d(16, 8, kernel_size=3, stride=2, padding=1, weight_bit_width=8, input_quant=Int8ActPerTensorFloat, output_quant=Int8ActPerTensorFloat)
        self.batch2 = nn.BatchNorm2d(8)
        self.relu2 = qnn.QuantReLU(return_quant_tensor=True, scaling_stats_permute_dims=(3,2,1,0))
        
        self.conv3 = qnn.QuantConv2d(8, 16, kernel_size=3, stride=2, padding=1, weight_bit_width=8, input_quant=Int8ActPerTensorFloat, output_quant=Int8ActPerTensorFloat)
        self.batch3 = nn.BatchNorm2d(16)
        self.relu3 = qnn.QuantReLU(return_quant_tensor=True, scaling_stats_permute_dims=(3,2,1,0))
        
        self.conv4 = qnn.QuantConv2d(16, 8, kernel_size=3, stride=2, padding=1, weight_bit_width=8, input_quant=Int8ActPerTensorFloat, output_quant=Int8ActPerTensorFloat)
        self.batch4 = nn.BatchNorm2d(8)
        self.relu4 = qnn.QuantReLU(return_quant_tensor=True, scaling_stats_permute_dims=(3,2,1,0))
        
        self.global_pool = qnn.TruncAvgPool2d(kernel_size=2, float_to_int_impl_type="FLOOR")
        self.softmax = nn.Softmax(1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.batch1(x)
        x = self.relu1(x)

        x = self.conv2(x)
        x = self.batch2(x)
        x = self.relu2(x)

        x = self.conv3(x)
        x = self.batch3(x)
        x = self.relu3(x)

        x = self.conv4(x)
        x = self.batch4(x)
        x = self.relu4(x)

        x = self.global_pool(x)
        x = self.softmax(x) 
        
        x = x.view(x.size(0), -1)
        return x

In [21]:
import torch
import onnx
from finn.util.test import get_test_model_trained
from brevitas.export import export_qonnx
from qonnx.util.cleanup import cleanup as qonnx_cleanup
from qonnx.core.modelwrapper import ModelWrapper
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.fold_constants import FoldConstants
from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs

cnv = QuantizedCNN()
cnv.load_state_dict(torch.load("cnn-sat6-w8.pt"))

print(cnv)
export_onnx_path = build_dir + "/end2end_cnv_w1a1_export.onnx"
export_qonnx(cnv, torch.randn(1, 3, 32, 32), export_onnx_path)
qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
model = ModelWrapper(export_onnx_path)
model = model.transform(ConvertQONNXtoFINN())
model.save("sat6-cnn-8bits.onnx")

QuantizedCNN(
  (conv1): QuantConv2d(
    4, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)
    (input_quant): ActQuantProxyFromInjector(
      (_zero_hw_sentinel): StatelessBuffer()
      (fused_activation_quant_proxy): FusedActivationQuantProxy(
        (activation_impl): Identity()
        (tensor_quant): RescalingIntQuant(
          (int_quant): IntQuant(
            (float_to_int_impl): RoundSte()
            (tensor_clamp_impl): TensorClamp()
            (delay_wrapper): DelayWrapper(
              (delay_impl): _NoDelay()
            )
          )
          (scaling_impl): ParameterFromRuntimeStatsScaling(
            (stats_input_view_shape_impl): OverTensorView()
            (stats): _Stats(
              (stats_impl): AbsPercentile()
            )
            (restrict_scaling): _RestrictValue(
              (restrict_value_impl): FloatRestrictValue()
            )
            (clamp_scaling): _ClampValue(
              (clamp_min_ste): ScalarClampMinSte()
            

RuntimeError: Given groups=1, weight of size [16, 4, 3, 3], expected input[1, 3, 32, 32] to have 4 channels, but got 3 channels instead

In [22]:
import finn.builder.build_dataflow as build
import finn.builder.build_dataflow_config as build_cfg
import os
import shutil

model_file = "sat6-cnn-8bits.onnx"

estimates_output_dir = "output_estimates_only"

#Delete previous run results if exist
if os.path.exists(estimates_output_dir):
    shutil.rmtree(estimates_output_dir)
    print("Previous run results deleted!")


cfg_estimates = build.DataflowBuildConfig(
    output_dir          = estimates_output_dir,
    mvau_wwidth_max     = 80,
    target_fps          = 1000000,
    synth_clk_period_ns = 10.0,
    fpga_part           = "xc7a100tcsg324-1",
    steps               = build_cfg.estimate_only_dataflow_steps,    
    rtlsim_batch_size  = 1000,
    default_mem_mode = build_cfg.ComputeEngineMemMode.CONST,
    generate_outputs=[
        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
        build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE
    ]
)

Previous run results deleted!


In [23]:
%%time
build.build_dataflow_cfg(model_file, cfg_estimates)

Building dataflow accelerator from sat6-cnn-8bits.onnx
Intermediate outputs will be generated in /tmp/finn_dev_artti
Final outputs will be generated in output_estimates_only
Build log is at output_estimates_only/build_dataflow.log
Running step: step_qonnx_to_finn [1/9]
Running step: step_tidy_up [2/9]
Running step: step_streamline [3/9]
Running step: step_convert_to_hls [4/9]
Running step: step_create_dataflow_partition [5/9]
Running step: step_target_fps_parallelization [6/9]
Running step: step_apply_folding_config [7/9]
Running step: step_minimize_bit_width [8/9]
Running step: step_generate_estimate_reports [9/9]
Completed successfully
CPU times: user 2.68 s, sys: 3.21 ms, total: 2.69 s
Wall time: 2.69 s


0