In [1]:
import torch
import torch_tensorrt

# print(tensorrt.IBuilderConfig.max_workspace_size)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_model = torch.jit.load("./models/yolov5s.torchscript").to("cuda")
# load_model = torch.jit.load("./models/yolopv2.pt").to("cuda")

In [3]:
traced_model = torch.jit.trace(load_model, [torch.randn((1, 3, 384, 640)).to("cuda")])



In [4]:
# torch.device('cuda')
load_model = load_model.half()

In [None]:
trt_model = torch_tensorrt.compile(
    load_model,
    inputs = [torch_tensorrt.Input((1, 3, 384, 640), dtype=torch.float32)],
    enabled_precisions = {torch.float32},
    truncate_long_and_double = True,
    device = torch_tensorrt.Device("cuda:0"),
    # workspace_size=4194304
    workspace_size=300000
)
# torch_tensorrt.dtype.half

In [5]:
trt_model_half = torch_tensorrt.compile(
    load_model,
    inputs = [torch.randn((1, 3, 384, 640), dtype=torch.float16)],
    enabled_precisions = {torch.float16},
    truncate_long_and_double = True,
    device = torch_tensorrt.Device("cuda:0"),
    workspace_size=4194304
)



In [7]:
import time
import numpy as np

import torch.backends.cudnn as cudnn
cudnn.benchmark = True

def benchmark(model, device="cuda:0", input_shape=(1, 3, 384, 640), dtype='fp32', nwarmup=50, nruns=100):
    if dtype == 'fp16':
        input_data = torch.randn(input_shape, dtype = torch.half)
    else:
        input_data = torch.randn(input_shape)
    # input_data.half()
    input_data = input_data.to(device)
        
    print("Warm up ...")
    with torch.no_grad():
        for _ in range(nwarmup):
            features = model(input_data)
    torch.cuda.synchronize()
    print("Start timing ...")
    timings = []
    with torch.no_grad():
        for i in range(1, nruns+1):
            start_time = time.time()
            features = model(input_data)
            torch.cuda.synchronize()
            end_time = time.time()
            timings.append(end_time - start_time)
            if i%10==0:
                print('Iteration %d/%d, ave batch time %.2f ms'%(i, nruns, np.mean(timings)*1000))

    print("Input shape:", input_data.size())
    # print("Output features size:", features.size())
    print('Average batch time: %.2f ms'%(np.mean(timings)*1000))

In [49]:
import copy
model_half = copy.deepcopy(load_model).half()
benchmark(model_half, dtype='fp16')

Warm up ...
Start timing ...
Iteration 10/100, ave batch time 10.96 ms
Iteration 20/100, ave batch time 10.30 ms
Iteration 30/100, ave batch time 10.15 ms
Iteration 40/100, ave batch time 10.03 ms
Iteration 50/100, ave batch time 10.10 ms
Iteration 60/100, ave batch time 10.09 ms
Iteration 70/100, ave batch time 10.05 ms
Iteration 80/100, ave batch time 10.04 ms
Iteration 90/100, ave batch time 10.01 ms
Iteration 100/100, ave batch time 9.97 ms
Input shape: torch.Size([1, 3, 384, 640])
Average batch time: 9.97 ms


In [46]:
benchmark(load_model)

Warm up ...
Start timing ...
Iteration 10/100, ave batch time 15.64 ms
Iteration 20/100, ave batch time 15.95 ms
Iteration 30/100, ave batch time 15.88 ms
Iteration 40/100, ave batch time 15.84 ms
Iteration 50/100, ave batch time 15.77 ms
Iteration 60/100, ave batch time 15.79 ms
Iteration 70/100, ave batch time 15.78 ms
Iteration 80/100, ave batch time 15.79 ms
Iteration 90/100, ave batch time 15.79 ms
Iteration 100/100, ave batch time 15.78 ms
Input shape: torch.Size([1, 3, 384, 640])
Average batch time: 15.78 ms


In [9]:
benchmark(trt_model_half, dtype='fp16')

Warm up ...
Start timing ...
Iteration 10/100, ave batch time 1.14 ms
Iteration 20/100, ave batch time 1.12 ms
Iteration 30/100, ave batch time 1.11 ms
Iteration 40/100, ave batch time 1.11 ms
Iteration 50/100, ave batch time 1.11 ms
Iteration 60/100, ave batch time 1.10 ms
Iteration 70/100, ave batch time 1.10 ms
Iteration 80/100, ave batch time 1.10 ms
Iteration 90/100, ave batch time 1.10 ms
Iteration 100/100, ave batch time 1.10 ms
Input shape: torch.Size([1, 3, 384, 640])
Average batch time: 1.10 ms


In [54]:
benchmark(trt_model)

Warm up ...
Start timing ...
Iteration 10/100, ave batch time 14.61 ms
Iteration 20/100, ave batch time 14.90 ms
Iteration 30/100, ave batch time 14.90 ms
Iteration 40/100, ave batch time 14.92 ms
Iteration 50/100, ave batch time 14.86 ms
Iteration 60/100, ave batch time 14.90 ms
Iteration 70/100, ave batch time 14.86 ms
Iteration 80/100, ave batch time 14.86 ms
Iteration 90/100, ave batch time 14.88 ms
Iteration 100/100, ave batch time 14.91 ms
Input shape: torch.Size([1, 3, 384, 640])
Average batch time: 14.91 ms


In [5]:
trt_model_half(torch.randn((1, 3, 384, 640), dtype = torch.half))



RuntimeError: The following operation failed in the TorchScript interpreter.
Traceback of TorchScript (most recent call last):
RuntimeError: [Error thrown at core/runtime/execute_engine.cpp:136] Expected inputs[i].dtype() == expected_type to be true but got false
Expected input tensors to have type Half, found type float



In [29]:
teste = torch.randn((1, 3, 384, 640), dtype=torch.float16)

In [21]:
teste.half()

tensor([[[[ 0.1689,  0.0921,  1.7334,  ...,  1.1240, -0.9038, -1.4434],
          [-0.0855,  0.6484,  0.1616,  ..., -0.5337, -1.3232, -0.3740],
          [-0.6460,  0.3330, -0.0072,  ...,  0.5352,  1.4834,  1.0596],
          ...,
          [-0.9326,  0.8481, -0.3757,  ...,  1.0459,  0.3838, -0.6147],
          [ 0.8218, -1.5693, -0.7974,  ...,  1.5107, -0.9663, -0.0483],
          [ 1.7832, -1.4648, -0.8350,  ...,  0.5840, -0.8550,  0.7383]],

         [[-1.3564, -0.6748, -0.3154,  ...,  0.4971,  0.0774,  1.5732],
          [-0.0685,  0.3486,  0.4094,  ..., -0.7871,  0.3789, -1.7520],
          [-0.0982, -0.6846,  1.2363,  ..., -1.0967,  0.5737, -1.5830],
          ...,
          [ 0.5210, -0.8164, -0.7412,  ...,  1.1729,  0.7275, -1.8613],
          [ 0.5464, -0.0115,  0.2756,  ...,  0.9985,  1.8457,  1.5156],
          [-0.5640, -0.8818,  0.3110,  ..., -0.3245,  0.1918,  0.9941]],

         [[-0.1318, -1.9062, -0.3010,  ...,  0.6899, -0.2499,  0.7744],
          [ 0.2620,  0.5225,  

In [26]:
torch_tensorrt.Input((1, 3, 384, 640), dtype=torch_tensorrt.dtype.half)

<torch_tensorrt._Input.Input at 0x7f12c4389790>

In [30]:
trt_model_half(teste)



RuntimeError: The following operation failed in the TorchScript interpreter.
Traceback of TorchScript (most recent call last):
RuntimeError: [Error thrown at core/runtime/execute_engine.cpp:136] Expected inputs[i].dtype() == expected_type to be true but got false
Expected input tensors to have type Half, found type float



In [31]:
torch_tensorrt

<module 'torch_tensorrt' from '/home/gribeiro/.local/lib/python3.8/site-packages/torch_tensorrt/__init__.py'>

In [2]:
torch.__version__

'1.13.1+cu117'

In [6]:
torch.jit.save(trt_model_half, 'yolov5s_trtfp32.trt')