# Pytorch to TVM (CPU)

### 1.导包

In [1]:
import tvm
from tvm import relay

import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

# PyTorch imports
import torch
import torchvision

import os
import sys
sys.path.append("../../")
from nets.yolo4 import YoloBody

### 2.Load a pretrained PyTorch model

In [2]:
# ------------- load the model
model_path = '../../model_data/yolo4_weights.pth'
anchors_path = '../../model_data/yolo_anchors.txt'
classes_path = '../../model_data/coco_classes.txt'

# get classes
classes_path = os.path.expanduser(classes_path)
with open(classes_path) as f:
    class_names = f.readlines()
class_names = [c.strip() for c in class_names]

# get anchors
anchors_path = os.path.expanduser(anchors_path)
with open(anchors_path) as f:
    anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
anchors = np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]

net = YoloBody(len(anchors[0]), len(class_names)).eval()
print('Loading weights into state dict...')
is_cuda = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device(is_cuda)
state_dict = torch.load(model_path, map_location=device)
net.load_state_dict(state_dict)

# if is_cuda == "cuda":
#     os.environ["CUDA_VISIBLE_DEVICES"] = '0'
#     net = net.cuda()

print("model set!")


Loading weights into state dict...
model set!


In [3]:
# We grab the TorchScripted model via tracing
input_shape = [1, 3, 416, 416]
input_data = torch.randn(input_shape)
scripted_model = torch.jit.trace(net, input_data).eval()

### 3.Load a test image

In [4]:
# ------------ load image
img = Image.open("street.jpg")
img = img.resize((416, 416))
img = np.array(img, dtype = np.float32)
img /= 255.0
img = img.transpose((2, 0, 1))
img = np.expand_dims(img, axis=0)

## 4~6是一个测试步骤，和7执行一个就行
### 4.Import the graph to Relay

Convert PyTorch graph to Relay graph. The input name can be arbitrary.

In [6]:
input_name = "input_0"
shape_list = [(input_name, img.shape)]
mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)

### 5.Relay Build

Compile the graph to llvm target with given input specification.




In [7]:
target = "llvm"
target_host = "llvm"
ctx = tvm.cpu(0)
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, target_host=target_host, params=params)

Cannot find config for target=llvm -keys=cpu -link-params=0, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 13, 13, 4), 'float32'), ('TENSOR', (51, 256, 1, 1, 4, 5), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW4c', 'NCHW5c', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=llvm -keys=cpu -link-params=0, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 13, 13, 8), 'float32'), ('TENSOR', (128, 64, 3, 3, 8, 8), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=llvm -keys=cpu -link-params=0, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 128, 13, 13, 8), 'float32'), ('TENSOR', (64, 128, 1, 1, 8, 8), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=ll

### 6.Execute the portable graph on TVM

Now we can try deploying the compiled model on target.

In [8]:
from tvm.contrib import graph_runtime

dtype = "float32"
m = graph_runtime.GraphModule(lib["default"](ctx))
# Set inputs
m.set_input(input_name, tvm.nd.array(img.astype(dtype)))
# Execute
m.run()
# Get outputs
tvm_output = m.get_output(0)

In [9]:
print(type(tvm_output))
tvm_output

<class 'tvm.runtime.ndarray.NDArray'>


<tvm.nd.NDArray shape=(1, 255, 13, 13), cpu(0)>
array([[[[ 1.58987308e+00,  3.62838149e-01,  1.57810807e-01, ...,
           1.97981685e-01, -5.12180746e-01, -1.56389832e+00],
         [ 1.19889498e+00,  4.58476335e-01, -2.45795161e-01, ...,
           1.08986273e-01, -1.44760519e-01, -1.21538436e+00],
         [ 1.01471198e+00,  2.55160570e-01,  1.79515574e-02, ...,
           1.37320906e-03, -1.23322256e-01, -1.07856035e+00],
         ...,
         [ 6.41318083e-01, -1.04248476e+00, -4.72584516e-02, ...,
          -4.59737927e-01,  8.08360428e-02, -1.12444615e+00],
         [ 7.29421794e-01, -1.29384160e+00, -6.04170084e-01, ...,
          -5.29084563e-01,  2.25030541e-01, -1.40401137e+00],
         [ 1.61844206e+00,  8.76458824e-01,  3.72441232e-01, ...,
          -1.08174050e+00, -5.71890950e-01, -1.38488781e+00]],

        [[ 1.38944399e+00,  1.25849831e+00,  1.11148238e+00, ...,
           5.82395911e-01,  1.07248533e+00,  1.79491758e+00],
         [ 4.71859902e-01,  3.65642279e-

## 7是导出的步骤
### 7. export *.so

In [5]:
input_name = "input_0" # 注意这里为之前导出onnx模型中的模型的输入id，这里为0
shape_list = [(input_name, img.shape)]


# 这里首先在PC的CPU上进行测试 所以使用LLVM进行导出
target = tvm.target.create('llvm') # x86
# target = tvm.target.arm_cpu("rasp3b") # raspi
# target = 'llvm'


# 利用Relay中的onnx前端读取我们导出的onnx模型
mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
# sym, params = relay.frontend.from_onnx(onnx_model, shape_dict)

# 这里利用TVM构建出优化后模型的信息
with relay.build_config(opt_level=2):
    graph, lib, params = relay.build_module.build(mod, target, params=params)
    

    
dtype = 'float32'
from tvm.contrib import graph_runtime

# 下面的函数导出我们需要的动态链接库 地址可以自己定义
print("Output model files")
libpath = "../models/yolov4_pc.so"
lib.export_library(libpath)

# 下面的函数导出我们神经网络的结构，使用json文件保存
graph_json_path = "../models/yolov4_pc.json"
with open(graph_json_path, 'w') as fo:
    fo.write(graph)

# 下面的函数中我们导出神经网络模型的权重参数
param_path = "../models/yolov4_pc.params"
with open(param_path, 'wb') as fo:
    fo.write(relay.save_param_dict(params))
# -------------至此导出模型阶段已经结束--------

Cannot find config for target=llvm -keys=cpu -link-params=0, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1024, 13, 13), 'float32'), ('TENSOR', (255, 1024, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=llvm -keys=cpu -link-params=0, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 13, 13), 'float32'), ('TENSOR', (1024, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=llvm -keys=cpu -link-params=0, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1024, 13, 13), 'float32'), ('TENSOR', (512, 1024, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=llvm -keys=cpu -link-params=0, w

Output model files
