# ONNX inference

In [4]:
import time
import cv2
import numpy as np
import onnxruntime

import numpy as np
import cv2

class_names = ['LineCrack', 'AligatorCrack', 'Repair', 'Pothole']

# Create a list of colors for each class where each color is a tuple of 3 integer values
rng = np.random.default_rng(3)
colors = rng.uniform(0, 255, size=(len(class_names), 3))


def nms(boxes, scores, iou_threshold):
    # Sort by score
    sorted_indices = np.argsort(scores)[::-1]

    keep_boxes = []
    while sorted_indices.size > 0:
        # Pick the last box
        box_id = sorted_indices[0]
        keep_boxes.append(box_id)

        # Compute IoU of the picked box with the rest
        ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])

        # Remove boxes with IoU over the threshold
        keep_indices = np.where(ious < iou_threshold)[0]

        # print(keep_indices.shape, sorted_indices.shape)
        sorted_indices = sorted_indices[keep_indices + 1]

    return keep_boxes


def compute_iou(box, boxes):
    # Compute xmin, ymin, xmax, ymax for both boxes
    xmin = np.maximum(box[0], boxes[:, 0])
    ymin = np.maximum(box[1], boxes[:, 1])
    xmax = np.minimum(box[2], boxes[:, 2])
    ymax = np.minimum(box[3], boxes[:, 3])

    # Compute intersection area
    intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)

    # Compute union area
    box_area = (box[2] - box[0]) * (box[3] - box[1])
    boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
    union_area = box_area + boxes_area - intersection_area

    # Compute IoU
    iou = intersection_area / union_area

    return iou


def xywh2xyxy(x):
    # Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
    y = np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2
    y[..., 1] = x[..., 1] - x[..., 3] / 2
    y[..., 2] = x[..., 0] + x[..., 2] / 2
    y[..., 3] = x[..., 1] + x[..., 3] / 2
    return y


def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3):
    mask_img = image.copy()
    det_img = image.copy()

    img_height, img_width = image.shape[:2]
    size = min([img_height, img_width]) * 0.0006
    text_thickness = int(min([img_height, img_width]) * 0.001)

    # Draw bounding boxes and labels of detections
    for box, score, class_id in zip(boxes, scores, class_ids):
        color = colors[class_id]

        x1, y1, x2, y2 = box.astype(int)

        # Draw rectangle
        cv2.rectangle(det_img, (x1, y1), (x2, y2), color, 2)

        # Draw fill rectangle in mask image
        cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)

        label = class_names[class_id]
        caption = f'{label} {int(score * 100)}%'
        (tw, th), _ = cv2.getTextSize(text=caption, fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                                      fontScale=size, thickness=text_thickness)
        th = int(th * 1.2)

        cv2.rectangle(det_img, (x1, y1),
                      (x1 + tw, y1 - th), color, -1)
        cv2.rectangle(mask_img, (x1, y1),
                      (x1 + tw, y1 - th), color, -1)
        cv2.putText(det_img, caption, (x1, y1),
                    cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA)

        cv2.putText(mask_img, caption, (x1, y1),
                    cv2.FONT_HERSHEY_SIMPLEX, size, (255, 255, 255), text_thickness, cv2.LINE_AA)

    return cv2.addWeighted(mask_img, mask_alpha, det_img, 1 - mask_alpha, 0)


def draw_comparison(img1, img2, name1, name2, fontsize=2.6, text_thickness=3):
    (tw, th), _ = cv2.getTextSize(text=name1, fontFace=cv2.FONT_HERSHEY_DUPLEX,
                                  fontScale=fontsize, thickness=text_thickness)
    x1 = img1.shape[1] // 3
    y1 = th
    offset = th // 5
    cv2.rectangle(img1, (x1 - offset * 2, y1 + offset),
                  (x1 + tw + offset * 2, y1 - th - offset), (0, 115, 255), -1)
    cv2.putText(img1, name1,
                (x1, y1),
                cv2.FONT_HERSHEY_DUPLEX, fontsize,
                (255, 255, 255), text_thickness)


    (tw, th), _ = cv2.getTextSize(text=name2, fontFace=cv2.FONT_HERSHEY_DUPLEX,
                                  fontScale=fontsize, thickness=text_thickness)
    x1 = img2.shape[1] // 3
    y1 = th
    offset = th // 5
    cv2.rectangle(img2, (x1 - offset * 2, y1 + offset),
                  (x1 + tw + offset * 2, y1 - th - offset), (94, 23, 235), -1)

    cv2.putText(img2, name2,
                (x1, y1),
                cv2.FONT_HERSHEY_DUPLEX, fontsize,
                (255, 255, 255), text_thickness)

    combined_img = cv2.hconcat([img1, img2])
    if combined_img.shape[1] > 3840:
        combined_img = cv2.resize(combined_img, (3840, 2160))

    return combined_img

class YOLOv8:

    def __init__(self, path, conf_thres=0.7, iou_thres=0.5):
        self.conf_threshold = conf_thres
        self.iou_threshold = iou_thres

        # Initialize model
        self.initialize_model(path)

    def __call__(self, image):
        return self.detect_objects(image)

    def initialize_model(self, path):
        self.session = onnxruntime.InferenceSession(path,
                                                    providers=['CUDAExecutionProvider',
                                                               'CPUExecutionProvider'])
        # Get model info
        self.get_input_details()
        self.get_output_details()


    def detect_objects(self, image):
        input_tensor = self.prepare_input(image)

        # Perform inference on the image
        outputs = self.inference(input_tensor)

        self.boxes, self.scores, self.class_ids = self.process_output(outputs)

        return self.boxes, self.scores, self.class_ids

    def prepare_input(self, image):
        self.img_height, self.img_width = image.shape[:2]

        input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Resize input image
        input_img = cv2.resize(input_img, (self.input_width, self.input_height))

        # Scale input pixel values to 0 to 1
        input_img = input_img / 255.0
        input_img = input_img.transpose(2, 0, 1)
        input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)

        return input_tensor


    def inference(self, input_tensor):
        start = time.perf_counter()
        outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})

        # print(f"Inference time: {(time.perf_counter() - start)*1000:.2f} ms")
        return outputs

    def process_output(self, output):
        predictions = np.squeeze(output[0]).T

        # Filter out object confidence scores below threshold
        scores = np.max(predictions[:, 4:], axis=1)
        predictions = predictions[scores > self.conf_threshold, :]
        scores = scores[scores > self.conf_threshold]

        if len(scores) == 0:
            return [], [], []

        # Get the class with the highest confidence
        class_ids = np.argmax(predictions[:, 4:], axis=1)

        # Get bounding boxes for each object
        boxes = self.extract_boxes(predictions)

        # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
        indices = nms(boxes, scores, self.iou_threshold)

        return boxes[indices], scores[indices], class_ids[indices]

    def extract_boxes(self, predictions):
        # Extract boxes from predictions
        boxes = predictions[:, :4]

        # Scale boxes to original image dimensions
        boxes = self.rescale_boxes(boxes)

        # Convert boxes to xyxy format
        boxes = xywh2xyxy(boxes)

        return boxes

    def rescale_boxes(self, boxes):

        # Rescale boxes to original image dimensions
        input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
        boxes = np.divide(boxes, input_shape, dtype=np.float32)
        boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
        return boxes

    def draw_detections(self, image, draw_scores=True, mask_alpha=0.4):

        return draw_detections(image, self.boxes, self.scores,
                               self.class_ids, mask_alpha)

    def get_input_details(self):
        model_inputs = self.session.get_inputs()
        self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]

        self.input_shape = model_inputs[0].shape
        self.input_height = self.input_shape[2]
        self.input_width = self.input_shape[3]

    def get_output_details(self):
        model_outputs = self.session.get_outputs()
        self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]


model_path = r"E:\PaddlePaddle\PaddleDetection\onnx_model\yolov8_std.onnx"

detector = YOLOv8(model_path, conf_thres=0.3, iou_thres=0.5)

img = cv2.imread(r"dataset\rdd\test_images\China_Drone_000621.jpg")

# Detect Objects
detector(img)

# Draw detections
combined_img = detector.draw_detections(img)
cv2.imwrite("results/result_yolo.jpg",combined_img)

True

In [7]:
import numpy as np
import os
import shutil
import glob

image_path = r'dataset\disease\images'
test_path = r'dataset\disease\test_images'
if os.path.exists(test_path):
    shutil.rmtree(test_path)
os.makedirs(test_path, exist_ok=True)
imgs = glob.glob(os.path.join(image_path, '**', '*.jpg'), recursive=True)
infer_imgs = np.random.choice(imgs, 10)
for img in infer_imgs:
    img_name = os.path.basename(img)
    src_path = img
    dst_path = os.path.join(test_path, img_name)
    shutil.copy(src_path, dst_path)

In [7]:
import os
import shutil
path = "results_yolo"
if os.path.exists(f"{path}"):
    shutil.rmtree(f"{path}")
os.makedirs(f"{path}", exist_ok=True)

model_path = r"E:\PaddlePaddle\PaddleDetection\onnx_model\yolov8_std.onnx"

detector = YOLOv8(model_path, conf_thres=0.3, iou_thres=0.5)
for img in glob.glob(f"{test_path}/*.jpg"):
    img1 = cv2.imread(img)
    detector(img1)
    combined_img = detector.draw_detections(img1)
    cv2.imwrite(f"{path}/"+os.path.basename(img),combined_img)

# TRT
```bash
trtexec.exe --onnx=E:\PaddlePaddle\PaddleDetection\onnx_model\yolov8_std_trt.onnx --saveEngine=e:\PaddlePaddle\PaddleDetection\onnx_nms_model\yolov8_std_trt_nms.engine --fp16 
```

## Formal Inference

In [5]:
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
import numpy as np
import cv2
import time

class BaseEngine(object):
    def __init__(self, engine_path, imgsz=(480,480)):
        self.imgsz = imgsz
        logger = trt.Logger(trt.Logger.WARNING)
        runtime = trt.Runtime(logger)
        trt.init_libnvinfer_plugins(logger, namespace="")
        with open(engine_path, "rb") as f:
            serialized_engine = f.read()
        engine = runtime.deserialize_cuda_engine(serialized_engine)
        self.context = engine.create_execution_context()
        self.inputs, self.outputs, self.bindings = [], [], []
        self.stream = cuda.Stream()
        for binding in engine:
            size = trt.volume(engine.get_binding_shape(binding))
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            self.bindings.append(int(device_mem))
            if engine.binding_is_input(binding):
                self.inputs.append({'host': host_mem, 'device': device_mem})
            else:
                self.outputs.append({'host': host_mem, 'device': device_mem})        

    def predict(self, img,threshold):
        self.img = self.preprocess(img)
        self.inputs[0]['host'] = np.ravel(self.img)
        # transfer data to the gpu
        for inp in self.inputs:
            cuda.memcpy_htod_async(inp['device'], inp['host'], self.stream)
        # run inference
        self.context.execute_async_v2(
            bindings=self.bindings,
            stream_handle=self.stream.handle)
        # fetch outputs from gpu
        for out in self.outputs:
            cuda.memcpy_dtoh_async(out['host'], out['device'], self.stream)
        # synchronize stream
        self.stream.synchronize()

        data = [out['host'] for out in self.outputs]
        results = self.postprocess(data,threshold)
        return results

    def letterbox(self,im,color=(114, 114, 114), auto=False, scaleup=True, stride=32):
        # Resize and pad image while meeting stride-multiple constraints
        shape = im.shape[:2]  # current shape [height, width]
        new_shape = self.imgsz
        if isinstance(new_shape, int):
            new_shape = (new_shape, new_shape)
        # Scale ratio (new / old)
        self.r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
        if not scaleup:  # only scale down, do not scale up (for better val mAP)
            self.r = min(self.r, 1.0)
        # Compute padding
        new_unpad = int(round(shape[1] * self.r)), int(round(shape[0] * self.r))
        self.dw, self.dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
        if auto:  # minimum rectangle
            self.dw, self.dh = np.mod(self.dw, stride), np.mod(self.dh, stride)  # wh padding
        self.dw /= 2  # divide padding into 2 sides
        self.dh /= 2
        if shape[::-1] != new_unpad:  # resize
            im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
        top, bottom = int(round(self.dh - 0.1)), int(round(self.dh + 0.1))
        left, right = int(round(self.dw - 0.1)), int(round(self.dw + 0.1))
        self.img = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
        return self.img,self.r,self.dw,self.dh

    def preprocess(self,image):
        self.img,self.r,self.dw,self.dh = self.letterbox(image)
        self.img = cv2.cvtColor(self.img,cv2.COLOR_BGR2RGB)
        self.img = self.img.astype(np.float32)
        self.img = self.img / 255.
        # self.img -= np.array([0.485, 0.456, 0.406])[None, None, :]
        # self.img /= np.array([0.229, 0.224, 0.225])[None, None, :]
        self.img = self.img.transpose((2, 0, 1))
        self.img = np.expand_dims(self.img,0)
        return self.img

    def postprocess(self,pred,threshold):
        new_bboxes = []
        num =int(pred[0][0])
        bboxes = pred[1].reshape(-1,4)
        scores = pred[2]
        clas = pred[3]
        for i in range(num):
            if(scores[i] < threshold):
                continue
            xmin = (bboxes[i][0] - self.dw)/self.r
            ymin = (bboxes[i][1] - self.dh)/self.r
            xmax = (bboxes[i][2] - self.dw)/self.r
            ymax = (bboxes[i][3] - self.dh)/self.r
            new_bboxes.append([clas[i],scores[i],xmin,ymin,xmax,ymax])
        return new_bboxes


def visualize(img,bbox_array):
    cc_map = {0:("LineCrack", (255,np.random.randint(0,255),np.random.randint(0,255))),
              1:("AligatorCrack", (255,np.random.randint(0,255),np.random.randint(0,255))),
              2:("Repair", (255,np.random.randint(0,255),np.random.randint(0,255))),
              3:("Pothole", (255,np.random.randint(0,255),np.random.randint(0,255)))}
    for temp in bbox_array:
        xmin = int(temp[2])
        ymin = int(temp[3])
        xmax = int(temp[4])
        ymax = int(temp[5])
        clas = int(temp[0])
        score = temp[1]

        """
        LineCrack
        AligatorCrack
        Repair
        Pothole

        """
        clas, color = cc_map[clas]
        cv2.rectangle(img,(xmin,ymin),(xmax,ymax), color, 2)
        img = cv2.putText(img, "class:"+str(clas)+" "+str(round(score,2)), (xmin,int(ymin)+20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
    return img

trt_path = "onnx_nms_model/yolov8_std_trt_nms.engine"
trt_engine = BaseEngine(trt_path, imgsz=(640, 640))
img1 = cv2.imread(r"dataset\rdd\test_images\China_Drone_000621.jpg")
results = trt_engine.predict(img1,threshold=0.3)
img = visualize(img1,results)

cv2.imwrite("results_trt/yolov8_trt.jpg",img)

  size = trt.volume(engine.get_binding_shape(binding))
  dtype = trt.nptype(engine.get_binding_dtype(binding))
  if engine.binding_is_input(binding):


True

In [4]:
%timeit trt_engine.predict(img1,threshold=0.5)

10.3 ms ± 108 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# test inference

In [10]:

path = "results_trt"
if os.path.exists(path):
    shutil.rmtree(path)
os.makedirs(path, exist_ok=True)
trt_engine = BaseEngine(trt_path, imgsz=(640,640))
for img in glob.glob(f"{test_path}/*.jpg"):
    img1 = cv2.imread(img)
    results = trt_engine.predict(img1,threshold=0.1)
    img1 = visualize(img1,results)
    cv2.imwrite(f"{path}/"+os.path.basename(img),img1)

  size = trt.volume(engine.get_binding_shape(binding))
  dtype = trt.nptype(engine.get_binding_dtype(binding))
  if engine.binding_is_input(binding):
