In [5]:
import onnxruntime as rt
import torch
from coco_eval import CocoEvaluator
from tqdm.notebook import tqdm

import numpy as np
import onnx
from onnxruntime import quantization
from onnxruntime.quantization import quantize_dynamic, QuantType

In [6]:
yolon_path = "yolon_best.onnx"
nanodet_path = "nanodet.onnx"

In [7]:
from roboflow import Roboflow
rf = Roboflow(api_key="xrjb7ahPJgT610pkOEf2")
project = rf.workspace("mooncrater").project("mooncrater")
dataset = project.version(3).download("coco")

loading Roboflow workspace...
loading Roboflow project...


In [8]:
import os
import torchvision

# settings
ANNOTATION_FILE_NAME = "_annotations.coco.json"
TRAIN_DIRECTORY = os.path.join(dataset.location, "train")
VAL_DIRECTORY = os.path.join(dataset.location, "valid")
TEST_DIRECTORY = os.path.join(dataset.location, "test")

class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(
        self,
        image_directory_path: str,
        # onnx_model_path: str,
        train: bool = True
    ):
        annotation_file_path = os.path.join(image_directory_path, ANNOTATION_FILE_NAME)
        super(CocoDetection, self).__init__(image_directory_path, annotation_file_path)

    def __getitem__(self, idx):
        images, annotations = super(CocoDetection, self).__getitem__(idx)
        image_id = self.ids[idx]
        annotations = {'image_id': image_id, 'annotations': annotations}
        
        return images, annotations


TRAIN_DATASET = CocoDetection(
    image_directory_path=TRAIN_DIRECTORY,
    train=True)
VAL_DATASET = CocoDetection(
    image_directory_path=VAL_DIRECTORY,
    train=False)
TEST_DATASET = CocoDetection(
    image_directory_path=TEST_DIRECTORY,
    train=False)

print("Number of training examples:", len(TRAIN_DATASET))
print("Number of validation examples:", len(VAL_DATASET))
print("Number of test examples:", len(TEST_DATASET))

loading annotations into memory...
Done (t=0.11s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Number of training examples: 1479
Number of validation examples: 80
Number of test examples: 20


In [23]:
class QuantizationDataReader(quantization.CalibrationDataReader):
    def __init__(self, test_ds, batch_size, input_name):

        self.test_ds = test_ds

        self.input_name = input_name
        self.datasize = len(self.test_ds)

        self.enum_data = iter(self.test_ds)

    def to_numpy(self, image):
        image = np.array(image)
        image = image.astype(np.float32) / 255.0
        # print(image)
        image = np.transpose(image, (2, 0, 1))  # Change data layout from HWC to CHW
        image = np.expand_dims(image, axis=0) 

        return image

    def get_next(self):
        batch = next(self.enum_data, None)
        if batch is not None:
          return {self.input_name: self.to_numpy(batch[0])}
        else:
          return None

    def rewind(self):
        self.enum_data = iter(self.torch_dl)



In [16]:
TEST_DATASET[2]

(<PIL.Image.Image image mode=RGB size=640x640>,
 {'image_id': 2,
  'annotations': [{'id': 7,
    'image_id': 2,
    'category_id': 3,
    'bbox': [74, 238, 35, 43.5],
    'area': 1522.5,
    'segmentation': [],
    'iscrowd': 0},
   {'id': 8,
    'image_id': 2,
    'category_id': 3,
    'bbox': [489, 261, 33.5, 24.5],
    'area': 820.75,
    'segmentation': [],
    'iscrowd': 0},
   {'id': 9,
    'image_id': 2,
    'category_id': 3,
    'bbox': [3, 304, 49, 61.5],
    'area': 3013.5,
    'segmentation': [],
    'iscrowd': 0},
   {'id': 10,
    'image_id': 2,
    'category_id': 3,
    'bbox': [397, 324, 68.5, 50],
    'area': 3425,
    'segmentation': [],
    'iscrowd': 0},
   {'id': 11,
    'image_id': 2,
    'category_id': 3,
    'bbox': [114, 344, 48.5, 30.5],
    'area': 1479.25,
    'segmentation': [],
    'iscrowd': 0},
   {'id': 12,
    'image_id': 2,
    'category_id': 3,
    'bbox': [43, 346, 56.5, 52],
    'area': 2938,
    'segmentation': [],
    'iscrowd': 0},
   {'id': 13,


In [81]:

onnx_yolon = rt.InferenceSession(yolon_path)


In [25]:
from PIL import ImageDraw

In [32]:
import time
def inf_on_image(input_pil_image, onnx_model):
    
    image = np.array(input_pil_image)

    # Preprocess the images as required by your ONNX model
    # This might involve resizing, normalization, adding a batch dimension, etc.
    # The exact preprocessing steps will depend on your specific model

    # Convert the images to a format that can be used as input to the ONNX model
    # print(image)
    image = image.astype(np.float32) / 255.0
    # print(image)
    image = np.transpose(image, (2, 0, 1))  # Change data layout from HWC to CHW
    image = np.expand_dims(image, axis=0)  # Add batch dimension

    # Run the ONNX model
    input_name = onnx_model.get_inputs()[0].name

    start = time.perf_counter()
    predictions = onnx_model.run(None, {input_name: image})
    end = (time.perf_counter() - start) * 1000

    return predictions, end

def draw_preds(input_pil_image, predictions, conf=0.70):
    draw = ImageDraw.Draw(input_pil_image)

    # Define a color for each label
    colors = {0: "green", 1: "blue", 2: "red"}

    for prediction in predictions[0][0]:

        x1, y1, x2, y2, score, label = prediction
        
        #print(score, label)

        if score > conf:
            # Draw the bounding box on the image
            x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
            draw.rectangle([x1, y1, x2, y2], outline=colors[int(label)], width=2)

    # Display the image
    input_pil_image.show()
    

In [33]:
test_img, _ = TEST_DATASET[4]

preds = inf_on_image(test_img)

print(preds[0].shape)

draw_preds(test_img, preds, 0.25)

TypeError: inf_on_image() missing 1 required positional argument: 'onnx_model'

In [34]:
TEST_DATASET.coco.dataset['annotations']

[{'id': 0,
  'image_id': 0,
  'category_id': 2,
  'bbox': [100, 402, 110.5, 38.5],
  'area': 4254.25,
  'segmentation': [],
  'iscrowd': 0},
 {'id': 1,
  'image_id': 1,
  'category_id': 2,
  'bbox': [41, 303, 100.5, 29.5],
  'area': 2964.75,
  'segmentation': [],
  'iscrowd': 0},
 {'id': 2,
  'image_id': 1,
  'category_id': 2,
  'bbox': [510, 330, 123.5, 46],
  'area': 5681,
  'segmentation': [],
  'iscrowd': 0},
 {'id': 3,
  'image_id': 1,
  'category_id': 2,
  'bbox': [342, 339, 65, 33.5],
  'area': 2177.5,
  'segmentation': [],
  'iscrowd': 0},
 {'id': 4,
  'image_id': 1,
  'category_id': 2,
  'bbox': [55, 361, 91.5, 34.5],
  'area': 3156.75,
  'segmentation': [],
  'iscrowd': 0},
 {'id': 5,
  'image_id': 1,
  'category_id': 2,
  'bbox': [291, 396, 262.5, 79],
  'area': 20737.5,
  'segmentation': [],
  'iscrowd': 0},
 {'id': 6,
  'image_id': 1,
  'category_id': 2,
  'bbox': [4, 427, 218.5, 50.5],
  'area': 11034.25,
  'segmentation': [],
  'iscrowd': 0},
 {'id': 7,
  'image_id': 2,


In [42]:
import time

def calculate_metrics(inference_func, session):
    evaluator = CocoEvaluator(coco_gt=TEST_DATASET.coco, iou_types=["bbox"])

    print("Running evaluation...")

    total_model_time = 0.0

    for idx, (input_pil_image, annotation) in enumerate(TEST_DATASET):
        
        image_id = annotation['image_id']

        predictions, elapsed_time = inference_func(input_pil_image, session)
        total_model_time += elapsed_time
        coco_results = []
        for prediction in predictions[0][0]:

            # Extract the bounding box coordinates, confidence score, and class label
            x1, y1, x2, y2, score, label = prediction
            
            # Convert the bounding box from (x1, y1, x2, y2) to (x, y, width, height)
            box = [x1, y1, x2 - x1, y2 - y1]

            print(score,label)

            if score > 0.25:
                # Append the result
                coco_results.append({
                    "image_id": image_id,
                    "category_id": int(label)+1,
                    "bbox": box,
                    "score": float(score),
                })
        
        if len(coco_results) > 0:
            evaluator.update(coco_results)
    


    print(f"Elapsed time: {total_model_time} seconds")
    

    evaluator.synchronize_between_processes()
    evaluator.accumulate()
    evaluator.summarize()

In [2]:
import json
resfile = "results_nanodet.json"

with open(resfile, 'r') as f:
    nanodet_data = json.load(f)

In [10]:
nanodet_data[:10]

[{'image_id': 1,
  'category_id': 1,
  'bbox': [215.38153076171875,
   395.5082092285156,
   363.05535888671875,
   244.49179077148438],
  'score': 0.06185806542634964},
 {'image_id': 1,
  'category_id': 1,
  'bbox': [0.0, 249.67079162597656, 557.7630004882812, 390.32920837402344],
  'score': 0.061044514179229736},
 {'image_id': 1,
  'category_id': 1,
  'bbox': [201.85061645507812,
   254.96240234375,
   342.8085632324219,
   359.4034423828125],
  'score': 0.0595320463180542},
 {'image_id': 1,
  'category_id': 2,
  'bbox': [561.5934448242188,
   435.10064697265625,
   74.22698974609375,
   36.68804931640625],
  'score': 0.31729182600975037},
 {'image_id': 1,
  'category_id': 2,
  'bbox': [445.81597900390625,
   200.00335693359375,
   194.18402099609375,
   103.88772583007812],
  'score': 0.2133171409368515},
 {'image_id': 1,
  'category_id': 2,
  'bbox': [583.5692138671875,
   434.7855529785156,
   56.42706298828125,
   33.037811279296875],
  'score': 0.1807582825422287},
 {'image_id':

In [27]:
for item in nanodet_data:
    item['image_id'] -= 2

In [29]:
nanodet_data

[{'image_id': 0,
  'category_id': 1,
  'bbox': [215.38153076171875,
   395.5082092285156,
   363.05535888671875,
   244.49179077148438],
  'score': 0.06185806542634964,
  'segmentation': [[215.38153076171875,
    395.5082092285156,
    215.38153076171875,
    640.0,
    578.4368896484375,
    640.0,
    578.4368896484375,
    395.5082092285156]],
  'area': 88764.05484339781,
  'id': 1,
  'iscrowd': 0},
 {'image_id': 0,
  'category_id': 1,
  'bbox': [0.0, 249.67079162597656, 557.7630004882812, 390.32920837402344],
  'score': 0.061044514179229736,
  'segmentation': [[0.0,
    249.67079162597656,
    0.0,
    640.0,
    557.7630004882812,
    640.0,
    557.7630004882812,
    249.67079162597656]],
  'area': 217711.19044091087,
  'id': 2,
  'iscrowd': 0},
 {'image_id': 0,
  'category_id': 1,
  'bbox': [201.85061645507812,
   254.96240234375,
   342.8085632324219,
   359.4034423828125],
  'score': 0.0595320463180542,
  'segmentation': [[201.85061645507812,
    254.96240234375,
    201.85061

In [30]:
evaluator = CocoEvaluator(coco_gt=TEST_DATASET.coco, iou_types=["bbox"])

evaluator.update(nanodet_data)

evaluator.synchronize_between_processes()
evaluator.accumulate()
evaluator.summarize()


Accumulating evaluation results...
DONE (t=0.02s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.002
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.005
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= la

In [82]:
calculate_metrics(inf_on_image, onnx_yolon)


Running evaluation...
0.80453384 1.0
0.43019015 1.0
0.15540627 2.0
0.09108779 1.0
0.05720362 1.0
0.055244952 1.0
0.054953188 1.0
0.04214111 1.0
0.03604743 1.0
0.02029711 1.0
0.019265175 1.0
0.018290311 1.0
0.018205285 1.0
0.015255034 1.0
0.014018834 1.0
0.013505399 1.0
0.013485581 1.0
0.013370097 2.0
0.010465056 1.0
0.010286629 1.0
0.010030866 1.0
0.007409185 1.0
0.0069520175 1.0
0.0067702234 1.0
0.0059640408 1.0
0.0047789514 1.0
0.00477013 1.0
0.0046601593 1.0
0.004436791 1.0
0.0042008758 2.0
0.004177153 2.0
0.003952205 2.0
0.0037287176 2.0
0.0035175383 0.0
0.0032535791 1.0
0.0029518008 1.0
0.0025835633 2.0
0.0025376081 1.0
0.0025102496 1.0
0.0024926066 1.0
0.0024467707 1.0
0.0024126768 2.0
0.0022917986 2.0
0.002221167 1.0
0.0020869374 1.0
0.0020593405 1.0
0.0020321608 1.0
0.001981467 1.0
0.001969397 1.0
0.001916647 1.0
0.0018888712 1.0
0.0018032491 1.0
0.0017979741 1.0
0.0017972589 1.0
0.0017921329 1.0
0.001513511 1.0
0.0015050769 2.0
0.0015002191 1.0
0.0014992952 1.0
0.0014522374 1.

In [58]:
test_img, _ = TEST_DATASET[2]

In [17]:
sess_options = rt.SessionOptions()
sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL

# Specify DNNL as the execution provider
# session = rt.InferenceSession(yolon_path, sess_options, providers=['DnnlExecutionProvider'])
session = rt.InferenceSession(yolon_path, sess_options, providers=['CPUExecutionProvider'])

In [60]:
onnx_model = rt.InferenceSession(yolon_path)


In [61]:
onnx_model.get_inputs()[0].name

'images'

In [30]:
def benchmark_model_time(session, num=1000):
    total_time = 0.0
    num_runs = 30
    for i in range(0,num_runs):
        _, elp_time = inf_on_image(test_img, session)
        total_time += elp_time
    avg_time_ms = total_time / num_runs
    return avg_time_ms

In [68]:
avg_time_ms

61.02335736650275

In [62]:
qdr = QuantizationDataReader(TEST_DATASET, batch_size=1, input_name=session.get_inputs()[0].name)

In [46]:
qdr.get_next()

{'images': array([[[[0.01176471, 0.01176471, 0.01176471, ..., 0.02745098,
           0.02745098, 0.02745098],
          [0.01176471, 0.01176471, 0.01176471, ..., 0.02745098,
           0.02745098, 0.02745098],
          [0.01176471, 0.01176471, 0.01176471, ..., 0.02745098,
           0.02745098, 0.02745098],
          ...,
          [0.2627451 , 0.23921569, 0.21176471, ..., 0.12156863,
           0.10588235, 0.08627451],
          [0.24313726, 0.23137255, 0.21568628, ..., 0.12156863,
           0.10196079, 0.08627451],
          [0.21176471, 0.21176471, 0.21176471, ..., 0.12156863,
           0.10196079, 0.08627451]],
 
         [[0.03137255, 0.03137255, 0.03137255, ..., 0.04705882,
           0.04705882, 0.04705882],
          [0.03137255, 0.03137255, 0.03137255, ..., 0.04705882,
           0.04705882, 0.04705882],
          [0.03137255, 0.03137255, 0.03137255, ..., 0.04705882,
           0.04705882, 0.04705882],
          ...,
          [0.3137255 , 0.2901961 , 0.2627451 , ..., 0.133

In [48]:
yolon_path_prep = "yolon_prep.onnx"
quantization.shape_inference.quant_pre_process(yolon_path, yolon_path_prep, skip_symbolic_shape=False)

In [63]:
q_static_opts = {"ActivationSymmetric":False,
                "WeightSymmetric":True}
if torch.cuda.is_available():
    q_static_opts = {"ActivationSymmetric":True,
                    "WeightSymmetric":True}

model_int8_path = 'yolon_int8.onnx'
quantized_model = quantization.quantize_static(model_input=yolon_path_prep,
                                               model_output=model_int8_path,
                                               calibration_data_reader=qdr)

In [64]:
int8_model = rt.InferenceSession(model_int8_path)

In [51]:
benchmark_model_time(int8_model, num=100)

134.7144072663165

In [68]:
inf_on_image(test_img,int8_model)[0][0][0]

array([[  0.       ,   0.       ,  17.800064 ,  14.833385 ,   0.       ,
          0.       ],
       [  0.       ,   0.       ,  17.800064 ,  14.833385 ,   0.       ,
          0.       ],
       [  0.       ,   0.       ,  17.800064 ,  14.833385 ,   0.       ,
          2.9666772],
       ...,
       [124.60044  ,  -2.9666772, 178.00063  ,  17.800064 ,   0.       ,
          0.       ],
       [124.60044  ,  -2.9666772, 178.00063  ,  17.800064 ,   0.       ,
          0.       ],
       [124.60044  ,  -2.9666772, 178.00063  ,  17.800064 ,   0.       ,
          2.9666772]], dtype=float32)

In [83]:
quantized_yolon_path = "yolon_int8_dyn.onnx"

In [84]:
quantize_dynamic(yolon_path_prep,
                     quantized_yolon_path,
                     weight_type=QuantType.QUInt8)

In [85]:
int8_model_dyn = rt.InferenceSession(quantized_yolon_path)
inf_on_image(test_img,int8_model_dyn)

([array([[[2.6920978e+02, 3.3017899e+02, 4.4634418e+02, 4.4187018e+02,
           9.3902755e-01, 2.0000000e+00],
          [2.6303589e+02, 4.4852527e+02, 4.5375281e+02, 5.7739105e+02,
           8.3488607e-01, 2.0000000e+00],
          [4.9034830e+02, 2.6118268e+02, 5.2054419e+02, 2.8753394e+02,
           7.7511281e-01, 2.0000000e+00],
          ...,
          [9.1386261e+00, 4.0846109e+02, 1.5792926e+02, 4.7968527e+02,
           2.4369359e-04, 2.0000000e+00],
          [3.3691507e+02, 3.0839490e+02, 3.6043195e+02, 3.2784262e+02,
           2.4256110e-04, 2.0000000e+00],
          [9.4576111e+00, 1.5277985e+02, 6.3358124e+02, 3.2317908e+02,
           2.3898482e-04, 1.0000000e+00]]], dtype=float32)],
 108.63460900145583)

In [86]:
benchmark_model_time(int8_model_dyn, num=100)

105.29064699876471

In [87]:
calculate_metrics(inf_on_image, int8_model_dyn)

Running evaluation...
0.8142687 1.0
0.24232233 1.0
0.23083985 1.0
0.17726478 2.0
0.06315377 1.0
0.03091243 1.0
0.028286546 1.0
0.027230263 1.0
0.026967406 1.0
0.023378491 1.0
0.022829652 1.0
0.022193551 2.0
0.021635234 1.0
0.020728022 1.0
0.016282916 1.0
0.015736312 1.0
0.0137319565 1.0
0.012794435 1.0
0.011069417 1.0
0.010166794 2.0
0.008723438 1.0
0.008408636 2.0
0.0065554082 1.0
0.0064972043 1.0
0.0064517856 2.0
0.006331265 1.0
0.006180495 1.0
0.0056768656 1.0
0.0055386424 2.0
0.005222976 2.0
0.0047337413 1.0
0.0047006905 1.0
0.004636407 1.0
0.004629582 2.0
0.004597336 1.0
0.0042921305 1.0
0.0042354167 2.0
0.0041570067 2.0
0.0038556159 1.0
0.0037367642 1.0
0.0034506917 1.0
0.0034206212 1.0
0.003295511 1.0
0.0032145083 1.0
0.003126949 1.0
0.0029935539 2.0
0.002899766 1.0
0.0026951432 1.0
0.0026673973 1.0
0.0025495589 1.0
0.0023975074 1.0
0.0022848547 1.0
0.002276063 1.0
0.0022456944 1.0
0.002234131 1.0
0.0022078454 2.0
0.0021749139 0.0
0.00204885 1.0
0.0020117462 1.0
0.0019361675 2.0