# Mobilenet v2 ONNX Runtime

In [6]:
!pip install torchvision

Collecting torchvision
  Downloading torchvision-0.15.2-cp310-cp310-macosx_11_0_arm64.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m439.4 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting torch==2.0.1 (from torchvision)
  Downloading torch-2.0.1-cp310-none-macosx_11_0_arm64.whl (55.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.8/55.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
Collecting filelock (from torch==2.0.1->torchvision)
  Obtaining dependency information for filelock from https://files.pythonhosted.org/packages/52/90/45223db4e1df30ff14e8aebf9a1bf0222da2e7b49e53692c968f36817812/filelock-3.12.3-py3-none-any.whl.metadata
  Using cached filelock-3.12.3-py3-none-any.whl.metadata (2.7 kB)
Collecting networkx (from torch==2.0.1->torchvision)
  Using cached networkx-3.1-py3-none-any.whl (2.1 MB)
Collecting jinja2 (from torch==2.0.1->torchvision)
  Using cached Jinja2-3.1.2-py3-n

In [7]:
from torchvision import models, datasets, transforms as T
mobilenet_v2 = models.mobilenet_v2(pretrained=True)



拿 pretrained 模型：mobilenet_v2_float.onnx

In [9]:
import torch
image_height = 224
image_width = 224
x = torch.randn(1, 3, image_height, image_width, requires_grad=True)
torch_out = mobilenet_v2(x)

# Export the model
torch.onnx.export(mobilenet_v2,              # model being run
                  x,                         # model input (or a tuple for multiple inputs)
                  "mobilenet_v2_float.onnx", # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=12,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output']) # the model's output names

verbose: False, log level: Level.ERROR



In [11]:
from PIL import Image
import numpy as np
import onnxruntime
import torch

def preprocess_image(image_path, height, width, channels=3):
    image = Image.open(image_path)
    image = image.resize((width, height), Image.NEAREST)
    image_data = np.asarray(image).astype(np.float32)
    image_data = image_data.transpose([2, 0, 1]) # transpose to CHW
    mean = np.array([0.079, 0.05, 0]) + 0.406
    std = np.array([0.005, 0, 0.001]) + 0.224
    for channel in range(image_data.shape[0]):
        image_data[channel, :, :] = (image_data[channel, :, :] / 255 - mean[channel]) / std[channel]
    image_data = np.expand_dims(image_data, 0)
    return image_data

In [13]:
# Download ImageNet labels
!curl -o imagenet_classes.txt https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt

# Read the categories
with open("imagenet_classes.txt", "r") as f:
    categories = [s.strip() for s in f.readlines()]

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 10472  100 10472    0     0  17396      0 --:--:-- --:--:-- --:--:-- 17511


開始推論

In [15]:
session_fp32 = onnxruntime.InferenceSession("mobilenet_v2_float.onnx")

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def run_sample(session, image_file, categories):
    output = session.run([], {'input':preprocess_image(image_file, image_height, image_width)})[0]
    output = output.flatten()
    output = softmax(output) # this is optional
    top5_catid = np.argsort(-output)[:5]
    for catid in top5_catid:
        print(categories[catid], output[catid])

run_sample(session_fp32, 'test_data/beer.JPG', categories)
# run_sample(session_fp32, 'test.JPG', categories)

combination lock 0.094247974
padlock 0.09198424
coffeepot 0.07444616
coffee mug 0.059318908
espresso maker 0.054574847


# 量化 Quantization 

In [2]:
from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType
import os

def preprocess_func(images_folder, height, width, size_limit=0):
    image_names = os.listdir(images_folder)
    if size_limit > 0 and len(image_names) >= size_limit:
        batch_filenames = [image_names[i] for i in range(size_limit)]
    else:
        batch_filenames = image_names
    unconcatenated_batch_data = []

    for image_name in batch_filenames:
        image_filepath = images_folder + '/' + image_name
        image_data = preprocess_image(image_filepath, height, width)
        unconcatenated_batch_data.append(image_data)
    batch_data = np.concatenate(np.expand_dims(unconcatenated_batch_data, axis=0), axis=0)
    return batch_data


class MobilenetDataReader(CalibrationDataReader):
    def __init__(self, calibration_image_folder):
        self.image_folder = calibration_image_folder
        self.preprocess_flag = True
        self.enum_data_dicts = []
        self.datasize = 0

    def get_next(self):
        if self.preprocess_flag:
            self.preprocess_flag = False
            nhwc_data_list = preprocess_func(self.image_folder, image_height, image_width, size_limit=0)
            self.datasize = len(nhwc_data_list)
            self.enum_data_dicts = iter([{'input': nhwc_data} for nhwc_data in nhwc_data_list])
        return next(self.enum_data_dicts, None)

preprocess:
- Symbolic Shape Inference. It works best with transformer models.
- ONNX Runtime Model Optimization.
- ONNX Shape Inference.

In [3]:
!python -m onnxruntime.quantization.preprocess --input mobilenet_v2_float.onnx --output mobilenet_v2_float_infer.onnx

量化模型 mobilenet_v2_float.onnx --> mobilenet_v2_uint8.onnx

In [4]:
# change it to your real calibration data set
calibration_data_folder = "calibration_imagenet"
dr = MobilenetDataReader(calibration_data_folder)

quantize_static('mobilenet_v2_float_infer.onnx',
                'mobilenet_v2_uint8.onnx',
                dr)

print('ONNX full precision model size (MB):', os.path.getsize("mobilenet_v2_float.onnx")/(1024*1024))
print('ONNX quantized model size (MB):', os.path.getsize("mobilenet_v2_uint8.onnx")/(1024*1024))

NameError: name 'image_height' is not defined

開始推論

In [29]:
session_quant = onnxruntime.InferenceSession("mobilenet_v2_uint8.onnx")
run_sample(session_quant, 'beer.JPG', categories)

beer glass 0.21646532
padlock 0.14559749
coffee mug 0.07273514
beer bottle 0.07273514
combination lock 0.059652314
