# Mobilenet v2 ONNX Runtime on CPU

In [2]:
from torchvision import models, datasets, transforms as T
mobilenet_v2 = models.mobilenet_v2(pretrained=True).to("mps")



拿 pretrained 模型：mobilenet_v2_float.onnx

In [3]:
import torch
image_height = 224
image_width = 224
x = torch.randn(1, 3, image_height, image_width, requires_grad=True)
torch_out = mobilenet_v2(x)

# Export the model
torch.onnx.export(mobilenet_v2,              # model being run
                  x,                         # model input (or a tuple for multiple inputs)
                  "mobilenet_v2_float.onnx", # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=12,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output']) # the model's output names

verbose: False, log level: Level.ERROR



In [33]:

print(torch.backends.mps.is_available()) #the MacOS is higher than 12.3+
print(torch.backends.mps.is_built()) #MPS is activated

True
True


In [4]:
from PIL import Image
import numpy as np
import onnxruntime
import torch

def preprocess_image(image_path, height, width, channels=3):
    image = Image.open(image_path)
    image = image.resize((width, height), Image.NEAREST)
    image_data = np.asarray(image).astype(np.float32)
    image_data = image_data.transpose([2, 0, 1]) # transpose to CHW
    mean = np.array([0.079, 0.05, 0]) + 0.406
    std = np.array([0.005, 0, 0.001]) + 0.224
    for channel in range(image_data.shape[0]):
        image_data[channel, :, :] = (image_data[channel, :, :] / 255 - mean[channel]) / std[channel]
    image_data = np.expand_dims(image_data, 0)
    return image_data

In [5]:
# Download ImageNet labels
!curl -o imagenet_classes.txt https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt

# Read the categories
with open("imagenet_classes.txt", "r") as f:
    categories = [s.strip() for s in f.readlines()]

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 10472  100 10472    0     0  16646      0 --:--:-- --:--:-- --:--:-- 16701


開始推論

In [28]:
session_fp32 = onnxruntime.InferenceSession("mobilenet_v2_float.onnx")

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def run_sample(session, image_file, categories):
    output = session.run([], {'input':preprocess_image(image_file, image_height, image_width)})[0]
    output = output.flatten()
    output = softmax(output) # this is optional
    top5_catid = np.argsort(-output)[:5]
    for catid in top5_catid:
        print(categories[catid], output[catid])

run_sample(session_fp32, 'beer.JPG', categories)
# run_sample(session_fp32, 'test.JPG', categories)

combination lock 0.13506676
padlock 0.11689277
coffeepot 0.065020375
oil filter 0.041768294
strainer 0.04128042


# 量化 Quantization 

In [22]:
from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType
import os

def preprocess_func(images_folder, height, width, size_limit=0):
    image_names = os.listdir(images_folder)
    if size_limit > 0 and len(image_names) >= size_limit:
        batch_filenames = [image_names[i] for i in range(size_limit)]
    else:
        batch_filenames = image_names
    unconcatenated_batch_data = []

    for image_name in batch_filenames:
        image_filepath = images_folder + '/' + image_name
        image_data = preprocess_image(image_filepath, height, width)
        unconcatenated_batch_data.append(image_data)
    batch_data = np.concatenate(np.expand_dims(unconcatenated_batch_data, axis=0), axis=0)
    return batch_data


class MobilenetDataReader(CalibrationDataReader):
    def __init__(self, calibration_image_folder):
        self.image_folder = calibration_image_folder
        self.preprocess_flag = True
        self.enum_data_dicts = []
        self.datasize = 0

    def get_next(self):
        if self.preprocess_flag:
            self.preprocess_flag = False
            nhwc_data_list = preprocess_func(self.image_folder, image_height, image_width, size_limit=0)
            self.datasize = len(nhwc_data_list)
            self.enum_data_dicts = iter([{'input': nhwc_data} for nhwc_data in nhwc_data_list])
        return next(self.enum_data_dicts, None)

壓縮模型 mobilenet_v2_float.onnx --> mobilenet_v2_uint8.onnx

In [25]:
# change it to your real calibration data set
calibration_data_folder = "calibration_imagenet"
dr = MobilenetDataReader(calibration_data_folder)

quantize_static('mobilenet_v2_float.onnx',
                'mobilenet_v2_uint8.onnx',
                dr)

print('ONNX full precision model size (MB):', os.path.getsize("mobilenet_v2_float.onnx")/(1024*1024))
print('ONNX quantized model size (MB):', os.path.getsize("mobilenet_v2_uint8.onnx")/(1024*1024))



ONNX full precision model size (MB): 13.344059944152832
ONNX quantized model size (MB): 3.483980178833008


開始推論

In [32]:
session_quant = onnxruntime.InferenceSession("mobilenet_v2_uint8.onnx")
run_sample(session_quant, 'test.JPG', categories)

cliff 0.18701226
wreck 0.15261108
seashore 0.101629
volcano 0.09180697
breakwater 0.04071354
