# Pytorch to ONNX
将pytorch模型转成ONNX模型,并测试直接使用pytorch部署的时间


### 1. 导入库包

In [1]:
import torch
import time
from MobileNetV2 import mobilenet_v2

### 2. 测试pytorch运行模型时间（4000次）

In [2]:
# 获取pytorch模型，并输出运行时间
model = mobilenet_v2(pretrained=True)
example = torch.rand(1, 3, 224, 224)   

# 输出pytorch运行的时间
with torch.no_grad():
    model.eval()
    since = time.time()
    for i in range(4000):
        model(example)
    time_elapsed = time.time() - since
    print('Time elapsed is {:.0f}m {:.0f}s'.
          format(time_elapsed // 60, time_elapsed % 60))

Time elapsed is 1m 30s


### 3. pytorch模型转换成ONNX模型

In [3]:
# pytorch to onnx
torch_out = torch.onnx.export(model,
                              example,
                              "../models/mobilenetv2.onnx",
                              verbose=True,
                              export_params=True   # 带参数输出
                              )

graph(%input.1 : Float(1:150528, 3:50176, 224:224, 224:1, requires_grad=0, device=cpu),
      %classifier.weight : Float(1000:1280, 1280:1, requires_grad=1, device=cpu),
      %classifier.bias : Float(1000:1, requires_grad=1, device=cpu),
      %468 : Float(32:27, 3:9, 3:3, 3:1, requires_grad=0, device=cpu),
      %469 : Float(32:1, requires_grad=0, device=cpu),
      %471 : Float(32:9, 1:9, 3:3, 3:1, requires_grad=0, device=cpu),
      %472 : Float(32:1, requires_grad=0, device=cpu),
      %474 : Float(16:32, 32:1, 1:1, 1:1, requires_grad=0, device=cpu),
      %475 : Float(16:1, requires_grad=0, device=cpu),
      %477 : Float(96:16, 16:1, 1:1, 1:1, requires_grad=0, device=cpu),
      %478 : Float(96:1, requires_grad=0, device=cpu),
      %480 : Float(96:9, 1:9, 3:3, 3:1, requires_grad=0, device=cpu),
      %481 : Float(96:1, requires_grad=0, device=cpu),
      %483 : Float(24:96, 96:1, 1:1, 1:1, requires_grad=0, device=cpu),
      %484 : Float(24:1, requires_grad=0, device=cpu),
    

### 4.onnx转成tvm，并测试是否成功

In [5]:
# onnx to tvm , or test onnx model
import onnx
import time
import tvm
import numpy as np
import tvm.relay as relay
from PIL import Image

onnx_model = onnx.load('../models/mobilenetv2.onnx')  # 导入模型

mean = [123., 117., 104.]                   # 在ImageNet上训练数据集的mean和std
std = [58.395, 57.12, 57.375]


def transform_image(image):                # 定义转化函数，将PIL格式的图像转化为格式维度的numpy格式数组
    image = image - np.array(mean)
    image /= np.array(std)
    image = np.array(image).transpose((2, 0, 1))
    image = image[np.newaxis, :].astype('float32')
    return image

img = Image.open('../imgs/plane.png').resize((224, 224)) # 这里我们将图像resize为特定大小
x = transform_image(img)
# saving demo image, 存储个二进制文件
x.astype("float32").tofile("./plane.bin")
x.shape

target = 'llvm'

input_name = "input.1"  # 注意这里为之前导出onnx模型中的模型的输入id，这里为0
shape_dict = {input_name: x.shape}
# 利用Relay中的onnx前端读取我们导出的onnx模型
sym, params = relay.frontend.from_onnx(onnx_model, shape_dict)


with relay.build_config(opt_level=3):
    intrp = relay.build_module.create_executor('graph', sym, tvm.cpu(0), target)
# with tvm.transform.PassContext(opt_level=3):
#      intrp = relay.build_module.create_executor("graph", sym, tvm.cpu(0), target)

        
dtype = 'float32'
# func = intrp.evaluate(sym)
func = intrp.evaluate()

output = func(tvm.nd.array(x.astype(dtype)), **params).asnumpy()
print(output.argmax())




Cannot find config for target=llvm -keys=cpu -link-params=0, workload=('dense_nopack.x86', ('TENSOR', (1, 1280), 'float32'), ('TENSOR', (1000, 1280), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=llvm -keys=cpu -link-params=0, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 320, 7, 7), 'float32'), ('TENSOR', (1280, 320, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=llvm -keys=cpu -link-params=0, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 960, 7, 7), 'float32'), ('TENSOR', (320, 960, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=llvm -keys=cpu -link-params=0, workload=('depthwise_conv2d_NCHWc.x86', ('TENSOR', (1, 960, 

404
