In [1]:
import torch

from yolov3_pytorch.yolov3_tiny import Yolov3Tiny
from finn.util.visualization import showInNetron

In [2]:
model = Yolov3Tiny(80)
model.load_state_dict(torch.load("yolov3_tiny_coco_01.h5"))

dummy_input = torch.randn(1, 3, 416, 416)
result = model(dummy_input)
torch.onnx.export(model, dummy_input, "model.onnx")

  assert shape[1] == 3 and shape[2] % 32 == 0 and shape[3] % 32 == 0, f"Tensor shape should be [bs, 3, x*32, y*32], was {shape}"
  _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version)
  _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version)
  _C._jit_pass_onnx_graph_shape_type_inference(
  _C._jit_pass_onnx_graph_shape_type_inference(
  _C._jit_pass_onnx_graph_shape_type_inference(
  _C._jit_pass_onnx_graph_shape_type_inference(


In [3]:
showInNetron("model.onnx")

Serving 'model.onnx' at http://0.0.0.0:8081


In [4]:
import brevitas.nn as qnn
from brevitas.core.quant import QuantType
from brevitas.export import export_qonnx

def convert_to_quantized_layer(layer, weight_bit_width=8):
    # This function should be expanded to handle different types of layers and configurations
    quant_layer = None
    if isinstance(layer, torch.nn.Linear):
        quant_layer = qnn.QuantLinear(
            in_features=layer.in_features, 
            out_features=layer.out_features, 
            bias=layer.bias is not None,
            weight_bit_width=weight_bit_width)
    elif isinstance(layer, torch.nn.Conv2d):
        quant_layer = qnn.QuantConv2d(
            in_channels=layer.in_channels,
            out_channels=layer.out_channels,
            kernel_size=layer.kernel_size,
            stride=layer.stride,
            padding=layer.padding,
            dilation=layer.dilation,
            groups=layer.groups,
            bias=layer.bias is not None,
            weight_quant_type=QuantType.INT,  # Weight quantization type
            weight_bit_width=weight_bit_width  # Bit width for the weights
        )
    elif isinstance(layer, torch.nn.LeakyReLU):
        quant_layer = qnn.QuantReLU(
            bit_width=weight_bit_width,  # Bit width for the activation
            quant_type=QuantType.INT  # Activation quantization type
        )
    elif isinstance(layer, torch.nn.Upsample):
        quant_layer = QuantUpsample(
            size=layer.size, 
            scale_factor=layer.scale_factor, 
            mode=layer.mode, 
            align_corners=layer.align_corners
        )
    # Add other layer conversions here
    # Note: Batch normalization layers might not need to be quantized in many cases.
    # elif isinstance(layer, torch.nn.BatchNorm2d):
    else:
        quant_layer = layer  # Return the original layer if no conversion is defined

    if hasattr(layer, 'weight'):
        quant_layer.weight.data = layer.weight.data
    return quant_layer

def quantize_model(model):
    for name, module in model.named_children():
        model._modules[name] = convert_to_quantized_layer(module)
        quantize_model(model._modules[name])  # Recursively apply to submodules
    return model

# Example usage
quantized_model = quantize_model(model)


In [None]:
export_qonnx(quantized_model, export_path="quant_model.onnx", input_shape=dummy_input.shape)

In [6]:
showInNetron("quant_model.onnx")

Stopping http://0.0.0.0:8081
Serving 'quant_model.onnx' at http://0.0.0.0:8081


In [None]:
import torch
from sklearn.metrics import accuracy_score

def test(model, test_loader):    
    # ensure model is in eval mode
    model.eval() 
    y_true = []
    y_pred = []
   
    with torch.no_grad():
        for data in test_loader:
            inputs, target = data
            inputs, target = inputs.to(device), target.to(device)
            output_orig = model(inputs.float())
            # run the output through sigmoid
            output = torch.sigmoid(output_orig)  
            # compare against a threshold of 0.5 to generate 0/1
            pred = (output.detach().cpu().numpy() > 0.5) * 1
            target = target.cpu().float()
            y_true.extend(target.tolist()) 
            y_pred.extend(pred.reshape(-1).tolist())
        
    return accuracy_score(y_true, y_pred)

In [7]:
import json
from tqdm import tqdm
import cv2
import numpy as np

anno_json = '../coco/annotations/instances_val2017.json'
img_path = '../coco/images/val2017'
with open(anno_json) as f:
    data = json.load(f)

In [8]:
def img_fname(idx):
    return f"{img_path}/{idx:012d}.jpg"

In [9]:
def predict_all(data_imgs, sz=416, conf_thresh=.2, nms_thresh=.4):
    results = []
    img_ids = []
    for dat in tqdm(data_imgs):
        fname = dat['file_name']
        f_id = dat['id']
        img_ids.append(f_id)
        print(img_fname(f_id))
        img = cv2.imread(img_fname(f_id), cv2.IMREAD_COLOR)

        if sz:
            #img = img.resize((np.array(img), (sz, sz), interpolation=cv2.INTER_AREA)
            # img = img.resize((sz, sz))
            img = cv2.resize(img, (sz, sz))
        img = img.transpose((2, 0, 1)).reshape(1, 3, sz, sz)
        # img = np.ascontiguousarray(img)
        img = img.astype(np.uint8)

        print(img)
        
        # img_torch = torch.from_numpy(img).cuda()
        img_torch = torch.from_numpy(img)
        
        all_boxes = quantized_model.predict_img(img_torch, conf_thresh=conf_thresh)[0]
        boxes = nms(all_boxes, nms_thresh=nms_thresh)

        width = dat['width']
        height = dat['height']

        for pred in boxes:
            box = np.array(pred[:4])
            box[:2] -= box[2:4]/2
            # box[2:4] = box[2:4]/2 + box[:2]
            x,w = box[0]*dat['width'], box[2]*dat['width']
            y,h = box[1]*dat['height'], box[3]*dat['height']
            cat = class_conversion[int(pred[-1])]
            res = {"image_id":f_id, "category_id":cat,
                    "bbox":[x, y, w, h], "score": pred[-2]}

            results.append(res)
    
    print(f"Results total {len(results)}. N of files {len(img_ids)}")
    return results, img_ids

In [None]:
results, img_ids = predict_all(data['images'], conf_thresh=.2, nms_thresh=.4)