# deploy with python

### 1.使用python部署*.so文件

In [None]:
# 使用python部署*.so文件
import onnx
import time
import tvm
import numpy as np
import tvm.relay as relay
from PIL import Image
from tvm.contrib import graph_runtime

libpath = "../models/mobilenet_pc.so"
graph_json_path = "../models/mobilenet_pc.json"
param_path = "../models/mobilenet_pc.params"

# 接下来我们加载导出的模型去测试导出的模型是否可以正常工作
loaded_json = open(graph_json_path).read()
loaded_lib = tvm.runtime.load_module(libpath)
loaded_params = bytearray(open(param_path, "rb").read())

# 以下的图片读取仅仅是为了测试
mean = [123., 117., 104.]                   # 在ImageNet上训练数据集的mean和std
std = [58.395, 57.12, 57.375]

def transform_image(image):                # 定义转化函数，将PIL格式的图像转化为格式维度的numpy格式数组
    image = image - np.array(mean)
    image /= np.array(std)
    image = np.array(image).transpose((2, 0, 1))
    image = image[np.newaxis, :].astype('float32')
    return image

img = Image.open('../imgs/plane.png').resize((224, 224)) # 这里我们将图像resize为特定大小
x = transform_image(img)


# 这里执行的平台为CPU
ctx = tvm.cpu()
# ctx = tvm.gpu(0)
dtype = 'float32'

module = graph_runtime.create(loaded_json, loaded_lib, ctx) # 加载模型
module.load_params(loaded_params)
module.set_input("input.1", x)
module.run()
out_deploy = module.get_output(0).asnumpy()
print(type(out_deploy))
print(out_deploy.argmax())
# print(out_deploy)

# 输出tvm运行的时间
since = time.time()
for i in range(4000):
    module.run()
time_elapsed = time.time() - since
print('Time elapsed is {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))

### 2.使用python部署so （GPU）

In [1]:
# 导入onnx，转换成*.so动态库
import onnx
import time
import tvm
import numpy as np
import tvm.relay as relay
from PIL import Image
from tvm.contrib import graph_runtime

#开始同样是读取.onnx模型
onnx_model = onnx.load('../models/mobilenetv2.onnx')  # 导入模型

# -------以下的图片读取仅仅是为了测试-----
mean = [123., 117., 104.]                   # 在ImageNet上训练数据集的mean和std
std = [58.395, 57.12, 57.375]

def transform_image(image):                # 定义转化函数，将PIL格式的图像转化为格式维度的numpy格式数组
    image = image - np.array(mean)
    image /= np.array(std)
    image = np.array(image).transpose((2, 0, 1))
    image = image[np.newaxis, :].astype('float32')
    return image

img = Image.open('../imgs/plane.png').resize((224, 224)) # 这里我们将图像resize为特定大小
x = transform_image(img)
# -------------end---------------


#  ----------------import model into tvm from mxnet---------------
input_name = "input.1"  # 注意这里为之前导出onnx模型中的模型的输入id，这里为0
shape_dict = {input_name: x.shape}
# 利用Relay中的onnx前端读取我们导出的onnx模型
sym, params = relay.frontend.from_onnx(onnx_model, shape_dict)
# -----------------end-----------




# ------这里首先在PC的GPU上进行测试 所以使用LLVM进行导出----------
# 设定目标硬件为 GPU，生成TVM模型
## ---------------------------- 
# graph：execution graph in json format
# lib: tvm module library of compiled functions for the graph on the target hardware
# params: parameter blobs
## ---------------------------
target = 'cuda'
# target = tvm.target.create('llvm') # x86
# target = tvm.target.arm_cpu("rasp3b") # raspi
# target = 'llvm'

# with relay.build_config(opt_level=3):
#     graph, lib, params = relay.build(relay_func, target, params=relay_params)

# 这里利用TVM构建出优化后模型的信息
with relay.build_config(opt_level=2):
    graph, lib, params = relay.build(sym, target, params=params)
#     graph, lib, params = relay.build_module.build(sym, target, params=params)
# ------------

# ----------------推理 with GPU （PYTHON）------
# libpath = "../models/mobilenet_cuda.so"
# graph_json_path = "../models/mobilenet_cuda.json"
# param_path = "../models/mobilenet_cuda.params"

ctx = tvm.gpu(0)
dtype = 'float32'
module = graph_runtime.create(graph, lib, ctx)   # 加载模型
# module.load_params(params)
# module.set_input("input.1", x)
## set input data
module.set_input('input.1', tvm.nd.array(x.astype(dtype)))
## set input params
module.set_input(**params)
module.run()
out_deploy = module.get_output(0).asnumpy()
print(type(out_deploy))
print(out_deploy.argmax())
# print(out_deploy)

# ## 加载模型
# m = graph_runtime.create(graph, lib, ctx)
# ## set input data
# m.set_input('data', tvm.nd.array(x.astype(dtype)))
# ## set input params
# m.set_input(**params)
# m.run()
# # get output
# outputs = m.get_output(0)
# top1 = np.argmax(outputs.asnumpy()[0])

# -----------------导出库--------
# 下面的函数导出我们需要的动态链接库 地址可以自己定义
print("Output model files")
libpath = "../models/mobilenet_cuda.so"
lib.export_library(libpath)

# 下面的函数导出我们神经网络的结构，使用json文件保存
graph_json_path = "../models/mobilenet_cuda.json"
with open(graph_json_path, 'w') as fo:
    fo.write(graph)

# 下面的函数中我们导出神经网络模型的权重参数
param_path = "../models/mobilenet_cuda.params"
with open(param_path, 'wb') as fo:
    fo.write(relay.save_param_dict(params))
# -------------至此导出模型阶段已经结束--------
    
    


Cannot find config for target=cuda -keys=cuda,gpu -max_num_threads=1024 -thread_warp_size=32, workload=('dense_small_batch.cuda', ('TENSOR', (1, 1280), 'float32'), ('TENSOR', (1000, 1280), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.


Output model files


<class 'numpy.ndarray'>
404
