In [9]:
import torch
import cv2
import numpy as np
import sys
sys.path.append("../../deep_sort_lite")  # 添加上一级目录到模块搜索路径

# export mobileNetV2_bottle as onnx model
from embedder.embedder_pytorch import MobileNetv2_Embedder as Embedder

onnx_path = "../../model/mobilenetv2.onnx"  # 输出文件的路径
img = cv2.imread("./dog.jpg")

# load pytorch model
embedder = Embedder()
feats = embedder.predict([img])
print(feats)

[array([0.23643717, 1.2312804 , 1.8156879 , ..., 0.10831741, 0.16737019,
       0.70336944], dtype=float32)]


In [10]:
# export onnx model
model  = embedder.model
model.eval()
print(model)
dummy_input = torch.randn(1,3,224,224)  # 假设输入是这个形状
torch.onnx.export(model, dummy_input, onnx_path, export_params=True, opset_version=11)

MobileNetV2_bottle(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
        (3): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)


In [11]:
# load onnx model
import onnx
import onnxruntime as ort

onnx_model = onnx.load(onnx_path)

# 打印输入和输出
print("Inputs: ", onnx_model.graph.input)
print("Outputs: ", onnx_model.graph.output)

onnx.checker.check_model(onnx_model)
# 打印模型的层
print(onnx.helper.printable_graph(onnx_model.graph))

# 创建一个运行时会话
sess = ort.InferenceSession(onnx_path)

# 输入是一个形状为 (1, 3, 224, 224) 的张量
img_tensor = embedder.preprocess(img)[0]
img_array = np.expand_dims(img_tensor, axis=0)
input_name = sess.get_inputs()[0].name

# 运行模型
result = sess.run(None, {input_name: img_array})

# 打印结果
print(result)


Inputs:  [name: "input.1"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: 1
      }
      dim {
        dim_value: 3
      }
      dim {
        dim_value: 224
      }
      dim {
        dim_value: 224
      }
    }
  }
}
]
Outputs:  [name: "533"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: 1
      }
      dim {
        dim_value: 1280
      }
    }
  }
}
]
graph torch_jit (
  %input.1[FLOAT, 1x3x224x224]
) initializers (
  %onnx::Conv_535[FLOAT, 32x3x3x3]
  %onnx::Conv_536[FLOAT, 32]
  %onnx::Conv_538[FLOAT, 32x1x3x3]
  %onnx::Conv_539[FLOAT, 32]
  %onnx::Conv_541[FLOAT, 16x32x1x1]
  %onnx::Conv_542[FLOAT, 16]
  %onnx::Conv_544[FLOAT, 96x16x1x1]
  %onnx::Conv_545[FLOAT, 96]
  %onnx::Conv_547[FLOAT, 96x1x3x3]
  %onnx::Conv_548[FLOAT, 96]
  %onnx::Conv_550[FLOAT, 24x96x1x1]
  %onnx::Conv_551[FLOAT, 24]
  %onnx::Conv_553[FLOAT, 144x24x1x1]
  %onnx::Conv_554[FLOAT, 144]
  %onnx::Conv_556[FLOAT, 144x1x3x3]
  %onnx::C

2024-04-03 17:14:46.302631537 [E:onnxruntime:Default, env.cc:251 ThreadMain] pthread_setaffinity_np failed for thread: 3537669, index: 2, mask: {3, }, error code: 22 error msg: Invalid argument. Specify the number of threads explicitly so the affinity is not set.


[array([[0.23643705, 1.2312772 , 1.8156841 , ..., 0.10831792, 0.16736968,
        0.70337003]], dtype=float32)]


In [12]:
# use atc convert onnx to om
!atc --model=../../model/mobilenetv2.onnx --framework=5 --output=../../model/mobilenetv2 --input_format=NCHW  --soc_version=Ascend310B4 

ATC start working now, please wait for a moment.
^C


In [13]:
from ais_bench.infer.interface import InferSession
om_path = "../../model/mobilenetv2.om"  # om模型文件的路径

In [15]:
import acl

device_id = 0
NPY_FLOAT32 = 11
ACL_MEMCPY_HOST_TO_HOST = 0
ACL_MEMCPY_HOST_TO_DEVICE = 1
ACL_MEMCPY_DEVICE_TO_HOST = 2
ACL_MEMCPY_DEVICE_TO_DEVICE = 3
ACL_MEM_MALLOC_HUGE_FIRST = 0
ACL_DEVICE, ACL_HOST = 0, 1
ACL_SUCCESS = 0

# ======运行管理资源申请======

# init acl resource
ret = acl.init()
if ret != ACL_SUCCESS:
    print('acl init failed, errorCode is', ret)

# 1.指定运算的Device。
ret = acl.rt.set_device(device_id)
print("set device ret:", ret, "device_id:", device_id)

# 2.显式创建一个Context，用于管理Stream对象。
context, ret = acl.rt.create_context(device_id)
print("create context ret:", ret)

# 3.显式创建一个Stream。
#用于维护一些异步操作的执行顺序，确保按照应用程序中的代码调用顺序执行任务。
stream, ret = acl.rt.create_stream()
print("create stream ret:", ret)

# load model from file
model_id, ret = acl.mdl.load_from_file(om_path)
if ret != ACL_SUCCESS:
    print('load model failed, errorCode is', ret)

# create description of model
model_desc = acl.mdl.create_desc()
ret = acl.mdl.get_desc(model_desc, model_id)
if ret != ACL_SUCCESS:
    print('get desc failed, errorCode is', ret)

# 2.准备模型推理的输入数据集。
# 创建aclmdlDataset类型的数据，描述模型推理的输入。
load_input_dataset = acl.mdl.create_dataset()
# 获取模型输入的数量。
input_size = acl.mdl.get_num_inputs(model_desc)
input_data = []
# 循环为每个输入申请内存，并将每个输入添加到aclmdlDataset类型的数据中。
for i in range(input_size):
    buffer_size = acl.mdl.get_input_size_by_index(model_desc, i)
    print("input buffer[", i, "] size:", buffer_size)
    # 申请输入内存。
    buffer, ret = acl.rt.malloc(buffer_size, ACL_MEM_MALLOC_HUGE_FIRST)
    data = acl.create_data_buffer(buffer, buffer_size)
    _, ret = acl.mdl.add_dataset_buffer(load_input_dataset, data)
    print("add input dataset ret:", ret)
    input_data.append({"buffer": buffer, "size": buffer_size})

# 3.准备模型推理的输出数据集。
# 创建aclmdlDataset类型的数据，描述模型推理的输出。
load_output_dataset = acl.mdl.create_dataset()
# 获取模型输出的数量。
output_size = acl.mdl.get_num_outputs(model_desc)
output_data = []
# 循环为每个输出申请内存，并将每个输出添加到aclmdlDataset类型的数据中。
for i in range(output_size):
    buffer_size = acl.mdl.get_output_size_by_index(model_desc, i)
    print("output buffer[", i, "] size:", buffer_size)
    # 申请输出内存。
    buffer, ret = acl.rt.malloc(buffer_size, ACL_MEM_MALLOC_HUGE_FIRST)
    data = acl.create_data_buffer(buffer, buffer_size)
    _, ret = acl.mdl.add_dataset_buffer(load_output_dataset, data)
    print("add output dataset ret:", ret)
    output_data.append({"buffer": buffer, "size": buffer_size})


set device ret: 0 device_id: 0
create context ret: 0
create stream ret: 0
input buffer[ 0 ] size: 602112
add input dataset ret: 0
output buffer[ 0 ] size: 5120
add output dataset ret: 0
