# Export model and infer

In [None]:
control_yml = r'e:\PaddlePaddle\PaddleDetection\configs\ppyoloe\ppyoloe_plus_crn_l_custom.yml'
best_model_path = r'e:\PaddlePaddle\PaddleDetection\output\ppyoloe_plus_crn_l_custom\best_model.pdparams'
inference_model = r"e:\PaddlePaddle\PaddleDetection\output_inference\ppyoloe_plus_crn_l_custom"

In [None]:
# export model for paddle2onnx
!python tools/export_model.py -c $control_yml -o weights=$best_model_path trt=True exclude_nms=True TestReader.inputs_def.image_shape=[3,480,480]

In [None]:
# for inference
!python tools/export_model.py -c $control_yml -o weights=$best_model_path trt=True TestReader.inputs_def.image_shape=[3,480,480]

## generate images for inference

In [None]:
import numpy as np
import os
import shutil
import glob

image_path = r'dataset\road_disease_voc\images'
test_path = r'dataset\road_disease_voc\test_images'
if os.path.exists(test_path):
    shutil.rmtree(test_path)
os.makedirs(test_path, exist_ok=True)
imgs = glob.glob(os.path.join(image_path, '**', '*.jpg'), recursive=True)
infer_imgs = np.random.choice(imgs, 10)
for img in infer_imgs:
    img_name = os.path.basename(img)
    src_path = img
    dst_path = os.path.join(test_path, img_name)
    shutil.copy(src_path, dst_path)

In [None]:
!python tools/infer.py -c $control_yml -o weights=$best_model_path --infer_dir=$test_path --output_dir=results

# TRT

## Paddle Inference

In [None]:
!python deploy/python/infer.py --model_dir=$inference_model --image_dir=$test_path --device=GPU --run_mode=trt_fp16 --output_dir=results_trt

# ONNX

In [None]:
import onnx
import onnxsim
import numpy as np
import onnx_graphsurgeon as gs
from onnx import shape_inference
from collections import OrderedDict

# 注意修改
########################################################
INPUT_PATH = './onnx_model/ppyoloe_plus_crn_l_custom_std.onnx'
WEIGHTS_TYPE = "l"
SAVE_PATH = './onnx_nms_model/ppyoloe_plus_crn_l_custom_std_nms.onnx'
CLASS_NUM = 4
SCORE_THRESHOLD = 0.25
IOU_THRESHOLD = 0.45
########################################################

if(WEIGHTS_TYPE=="s"):
    Mul_name = 'Mul_78'
elif(WEIGHTS_TYPE=="m"):
    Mul_name = 'Mul_100'
elif(WEIGHTS_TYPE=="l"):
    Mul_name = 'Mul_244'
elif(WEIGHTS_TYPE=="x"):
    Mul_name = 'Mul_144'

gs_graph = gs.import_onnx(onnx.load(INPUT_PATH))
# fold constants
gs_graph.fold_constants()
gs_graph.cleanup().toposort()


## modify NMS

In [None]:
import os

Mul = [node for node in gs_graph.nodes if node.name=='p2o.Mul.244'][0]
Concat_14 = [node for node in gs_graph.nodes if node.name=='p2o.Concat.28'][0]

scores = gs.Variable(name='scores',shape=[1,4725,CLASS_NUM],dtype=np.float32)
Transpose = gs.Node(name='lastTranspose',op='Transpose',
                   inputs=[Concat_14.outputs[0]],
                   outputs=[scores],
                   attrs=OrderedDict(perm=[0,2,1]))
gs_graph.nodes.append(Transpose)

Mul.outputs[0].name = 'boxes'
gs_graph.inputs = [gs_graph.inputs[0]]
gs_graph.outputs = [Mul.outputs[0],scores]
gs_graph.outputs[0].dtype=np.float32
gs_graph.outputs[1].dtype=np.float32

gs_graph.cleanup().toposort()
onnx_graph = shape_inference.infer_shapes(gs.export_onnx(gs_graph))
onnx_graph, check = onnxsim.simplify(onnx_graph)

gs_graph = gs.import_onnx(onnx_graph)
op_inputs = gs_graph.outputs
op = "EfficientNMS_TRT"
attrs = {
    "plugin_version": "1",
    "background_class": -1,
    "max_output_boxes": 100,
    "score_threshold": SCORE_THRESHOLD,
    "iou_threshold": IOU_THRESHOLD,
    "score_activation": False,
    "box_coding": 0,
}

output_num_detections = gs.Variable(
    name="num_dets",
    dtype=np.int32,
    shape=[1, 1],
)
output_boxes = gs.Variable(
    name="det_boxes",
    dtype=np.float32,
    shape=[1, 100, 4],
)
output_scores = gs.Variable(
    name="det_scores",
    dtype=np.float32,
    shape=[1, 100],
)
output_labels = gs.Variable(
    name="det_classes",
    dtype=np.int32,
    shape=[1, 100],
)
op_outputs = [
    output_num_detections, output_boxes, output_scores, output_labels
]

TRT = gs.Node(op=op,name="batched_nms",inputs=op_inputs,outputs=op_outputs,attrs=attrs)
gs_graph.nodes.append(TRT)
gs_graph.outputs = op_outputs
gs_graph.cleanup().toposort()

onnx.save(gs.export_onnx(gs_graph),SAVE_PATH)
print("finished")

# get engine and Inference

In [None]:
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
import numpy as np
import cv2
import time

class BaseEngine(object):
    def __init__(self, engine_path, imgsz=(480,480)):
        self.imgsz = imgsz
        logger = trt.Logger(trt.Logger.WARNING)
        runtime = trt.Runtime(logger)
        trt.init_libnvinfer_plugins(logger, namespace="")
        with open(engine_path, "rb") as f:
            serialized_engine = f.read()
        engine = runtime.deserialize_cuda_engine(serialized_engine)
        self.context = engine.create_execution_context()
        self.inputs, self.outputs, self.bindings = [], [], []
        self.stream = cuda.Stream()
        for binding in engine:
            size = trt.volume(engine.get_binding_shape(binding))
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            self.bindings.append(int(device_mem))
            if engine.binding_is_input(binding):
                self.inputs.append({'host': host_mem, 'device': device_mem})
            else:
                self.outputs.append({'host': host_mem, 'device': device_mem})        

    def predict(self, img,threshold):
        self.img = self.preprocess(img)
        self.inputs[0]['host'] = np.ravel(self.img)
        # transfer data to the gpu
        for inp in self.inputs:
            cuda.memcpy_htod_async(inp['device'], inp['host'], self.stream)
        # run inference
        self.context.execute_async_v2(
            bindings=self.bindings,
            stream_handle=self.stream.handle)
        # fetch outputs from gpu
        for out in self.outputs:
            cuda.memcpy_dtoh_async(out['host'], out['device'], self.stream)
        # synchronize stream
        self.stream.synchronize()

        data = [out['host'] for out in self.outputs]
        results = self.postprocess(data,threshold)
        return results

    def letterbox(self,im,color=(114, 114, 114), auto=False, scaleup=True, stride=32):
        # Resize and pad image while meeting stride-multiple constraints
        shape = im.shape[:2]  # current shape [height, width]
        new_shape = self.imgsz
        if isinstance(new_shape, int):
            new_shape = (new_shape, new_shape)
        # Scale ratio (new / old)
        self.r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
        if not scaleup:  # only scale down, do not scale up (for better val mAP)
            self.r = min(self.r, 1.0)
        # Compute padding
        new_unpad = int(round(shape[1] * self.r)), int(round(shape[0] * self.r))
        self.dw, self.dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
        if auto:  # minimum rectangle
            self.dw, self.dh = np.mod(self.dw, stride), np.mod(self.dh, stride)  # wh padding
        self.dw /= 2  # divide padding into 2 sides
        self.dh /= 2
        if shape[::-1] != new_unpad:  # resize
            im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
        top, bottom = int(round(self.dh - 0.1)), int(round(self.dh + 0.1))
        left, right = int(round(self.dw - 0.1)), int(round(self.dw + 0.1))
        self.img = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
        return self.img,self.r,self.dw,self.dh

    def preprocess(self,image):
        self.img,self.r,self.dw,self.dh = self.letterbox(image)
        self.img = cv2.cvtColor(self.img,cv2.COLOR_BGR2RGB)
        self.img = self.img.astype(np.float32)
        self.img = self.img / 255.
        self.img -= np.array([0.485, 0.456, 0.406])[None, None, :]
        self.img /= np.array([0.229, 0.224, 0.225])[None, None, :]
        self.img = self.img.transpose((2, 0, 1))
        self.img = np.expand_dims(self.img,0)
        return self.img

    def postprocess(self,pred,threshold):
        new_bboxes = []
        num =int(pred[0][0])
        bboxes = pred[1].reshape(-1,4)
        scores = pred[2]
        clas = pred[3]
        for i in range(num):
            if(scores[i] < threshold):
                continue
            xmin = (bboxes[i][0] - self.dw)/self.r
            ymin = (bboxes[i][1] - self.dh)/self.r
            xmax = (bboxes[i][2] - self.dw)/self.r
            ymax = (bboxes[i][3] - self.dh)/self.r
            new_bboxes.append([clas[i],scores[i],xmin,ymin,xmax,ymax])
        return new_bboxes


def visualize(img,bbox_array):
    for temp in bbox_array:
        xmin = int(temp[2])
        ymin = int(temp[3])
        xmax = int(temp[4])
        ymax = int(temp[5])
        clas = int(temp[0])
        score = temp[1]

        """
        LineCrack
        AligatorCrack
        Repair
        Pothole

        """
        clas = {0:"LineCrack",1:"AligatorCrack",2:"Repair",3:"Pothole"}[clas]

        cv2.rectangle(img,(xmin,ymin),(xmax,ymax), (105, 237, 249), 2)
        img = cv2.putText(img, "class:"+str(clas)+" "+str(round(score,2)), (xmin,int(ymin)+16), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (105, 237, 249), 1)
    return img

trt_engine = BaseEngine("onnx_nms_model/ppyoloe_plus_crn_l_custom_std_nms.engine")
img1 = cv2.imread(r"dataset\rdd\test_images\China_Drone_000621.jpg")
results = trt_engine.predict(img1,threshold=0.5)
img = visualize(img1,results)

cv2.imwrite("results_trt/China_Drone_000621_pycuda.jpg",img)

## Speed Test

In [None]:
%timeit trt_engine.predict(img1,threshold=0.5)

## formal inference

In [None]:
if os.path.exists("results_trt"):
    shutil.rmtree("results_trt")
os.makedirs("results_trt", exist_ok=True)
trt_engine = BaseEngine("onnx_nms_model/ppyoloe_plus_crn_l_custom_std_nms.engine")
for img in glob.glob(f"{test_path}/*.jpg"):
    img1 = cv2.imread(img)
    results = trt_engine.predict(img1,threshold=0.5)
    img1 = visualize(img1,results)
    cv2.imwrite("results_trt/"+os.path.basename(img),img1)