In [1]:
import os
import torch
import tensorrt as trt
import onnx
import onnxruntime

# ONNX to TRT

In [11]:
def export_engine(onnx, im, file, half, dynamic, workspace=4, verbose=False, prefix='Tensorrt'):
    logger = trt.Logger(trt.Logger.INFO)
    if verbose:
        logger.min_severity = trt.Logger.Severity.VERBOSE

    builder = trt.Builder(logger)
    config = builder.create_builder_config()
    config.max_workspace_size = workspace * 1 << 30
    # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30)  # fix TRT 8.4 deprecation notice

    flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
    network = builder.create_network(flag)
    parser = trt.OnnxParser(network, logger)
    if not parser.parse_from_file(str(onnx)):
        raise RuntimeError(f'failed to load ONNX file: {onnx}')

    inputs = [network.get_input(i) for i in range(network.num_inputs)]
    outputs = [network.get_output(i) for i in range(network.num_outputs)]
    for inp in inputs:
        print(f'{prefix} input "{inp.name}" with shape{inp.shape} {inp.dtype}')
    for out in outputs:
        print(f'{prefix} output "{out.name}" with shape{out.shape} {out.dtype}')
    
    if dynamic:
        profile = builder.create_optimization_profile()
        for inp in inputs:
            profile.set_shape(inp.name, (-1, *im.shape[1:]), (max(1, im.shape[0] // 2), *im.shape[1:]), im.shape)
        config.add_optimization_profile(profile)

    print(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 and half else 32} engine')
    if builder.platform_has_fast_fp16 and half:
        config.set_flag(trt.BuilderFlag.FP16)
    with builder.build_engine(network, config) as engine, open(file, 'wb') as t:
        t.write(engine.serialize())
    return True

In [12]:
ONNX_PATH = "./deepc.onnx"
TRT_PATH = ONNX_PATH.replace('.onnx', '.engine')
C, H, W = (1, 240, 320)

device = 'cuda'
inputs = torch.randn(1, C, H, W).to(device)

In [8]:
ONNX_PATH = "./refinenet.onnx"
TRT_PATH = ONNX_PATH.replace('.onnx', '.engine')
C, H, W = (1, 24, 24)

device = 'cuda'
inputs = torch.randn(1, C, H, W).to(device)

In [13]:
export_engine(ONNX_PATH, inputs, TRT_PATH, False, True, verbose=False)

  config.max_workspace_size = workspace * 1 << 30


[05/05/2023-12:30:52] [TRT] [I] The logger passed into createInferBuilder differs from one already provided for an existing builder, runtime, or refitter. Uses of the global logger, returned by nvinfer1::getLogger(), will return the existing value.

[05/05/2023-12:30:52] [TRT] [I] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 1960, GPU 1049 (MiB)
[05/05/2023-12:30:52] [TRT] [I] ----------------------------------------------------------------
[05/05/2023-12:30:52] [TRT] [I] Input filename:   ./deepc.onnx
[05/05/2023-12:30:52] [TRT] [I] ONNX IR version:  0.0.7
[05/05/2023-12:30:52] [TRT] [I] Opset version:    14
[05/05/2023-12:30:52] [TRT] [I] Producer name:    pytorch
[05/05/2023-12:30:52] [TRT] [I] Producer version: 1.13.1
[05/05/2023-12:30:52] [TRT] [I] Domain:           
[05/05/2023-12:30:52] [TRT] [I] Model version:    0
[05/05/2023-12:30:52] [TRT] [I] Doc string:       
[05/05/2023-12:30:52] [TRT] [I] ----------------------------------------------------------------
Tensorrt 

RuntimeError: Shape provided for min is inconsistent with other shapes.

# Inference with TRT model

In [24]:
In [1]: %load_ext autoreload

In [2]: %autoreload 2

In [44]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

import pycuda.driver as cuda
import pycuda.autoinit
import utils_engine as engine_utils # TRT Engine creation/save/load utils

from time import time
from PIL import Image
from torchvision.transforms import transforms

TRT_PATH = "./deepc.engine"

# SETUP TRT
logger = trt.Logger(trt.Logger.ERROR)
trt_runtime = trt.Runtime(logger)

print("Loading cached TensorRT engine from {}".format(TRT_PATH))
trt_engine = engine_utils.load_engine(trt_runtime, TRT_PATH)

# This allocates memory for network inputs/outputs on both CPU and GPU
inputs, outputs, bindings, stream = engine_utils.allocate_buffers(trt_engine)

# Execution context is needed for inference
context = trt_engine.create_execution_context()

Loading cached TensorRT engine from ./deepc.engine
input
output
109


In [43]:
# Prepare input data
IMG_PATH = "/home/adryw/Documents/deepcharuco/data_demo/test_frames/output_0005.png"
img = cv2.cvtColor(cv2.imread(IMG_PATH), cv2.COLOR_BGR2GRAY)
img = (img.astype(np.float32) - 128) / 255  # Well we started with this one so...
org_h, org_w = img.shape[:2]
img_input = cv2.resize(img, (320, 240), interpolation=cv2.INTER_LINEAR)

# Copy it into appropriate place into memory
# (inputs was returned earlier by allocate_buffers())
np.copyto(inputs[0].host, img_input.ravel())

# Feed to model
tic = time()
bs = 1
# Fetch output from the model
b_loc, b_ids = engine_utils.do_inference(
    context, bindings=bindings, inputs=inputs,
    outputs=outputs, stream=stream)
print(b_loc.shape, b_ids.shape)

(78000,) (20400,)
