In [2]:
import torch
import torch_tensorrt

# print(tensorrt.IBuilderConfig.max_workspace_size)

In [3]:
# load_model = torch.jit.load("./models/yolov5s.torchscript").to("cuda")
load_model = torch.jit.load("./models/yolopv2.pt").to("cuda")

In [11]:
traced_model = torch.jit.script(load_model, [torch.randn((1, 3, 384, 640)).to("cuda")])

In [5]:
# torch.device('cuda')
load_model = load_model.half()

In [6]:
trt_model = torch_tensorrt.compile(
    load_model,
    inputs = [torch_tensorrt.Input((1, 3, 384, 640), dtype=torch.float32)],
    enabled_precisions = {torch.float32},
    truncate_long_and_double = True,
    device = torch_tensorrt.Device("cuda:0"),
    workspace_size=4194304
)
# torch_tensorrt.dtype.half



In [None]:
load_model.eval()
trt_model_half = torch_tensorrt.compile(
    load_model,
    inputs = [torch_tensorrt.Input((1, 3, 384, 640),dtype=torch.half)],
    enabled_precisions = {torch.half},
    truncate_long_and_double = True,
    device = torch_tensorrt.Device("cuda:0"),
    workspace_size=4194304
)

In [5]:
import time
import numpy as np

import torch.backends.cudnn as cudnn
cudnn.benchmark = True

def benchmark(model, device="cuda:0", input_shape=(1, 3, 384, 640), dtype='fp32', nwarmup=50, nruns=100):
    if dtype == 'fp16':
        input_data = torch.randn(input_shape, dtype = torch.half)
    else:
        input_data = torch.randn(input_shape)
    # input_data.half()
    input_data = input_data.to(device)
        
    print("Warm up ...")
    with torch.no_grad():
        for _ in range(nwarmup):
            features = model(input_data)
    torch.cuda.synchronize()
    print("Start timing ...")
    timings = []
    with torch.no_grad():
        for i in range(1, nruns+1):
            start_time = time.time()
            features = model(input_data)
            torch.cuda.synchronize()
            end_time = time.time()
            timings.append(end_time - start_time)
            if i%10==0:
                print('Iteration %d/%d, ave batch time %.2f ms'%(i, nruns, np.mean(timings)*1000))

    print("Input shape:", input_data.size())
    # print("Output features size:", features.size())
    print('Average batch time: %.2f ms'%(np.mean(timings)*1000))

In [49]:
import copy
model_half = copy.deepcopy(load_model).half()
benchmark(model_half, dtype='fp16')

Warm up ...
Start timing ...
Iteration 10/100, ave batch time 10.96 ms
Iteration 20/100, ave batch time 10.30 ms
Iteration 30/100, ave batch time 10.15 ms
Iteration 40/100, ave batch time 10.03 ms
Iteration 50/100, ave batch time 10.10 ms
Iteration 60/100, ave batch time 10.09 ms
Iteration 70/100, ave batch time 10.05 ms
Iteration 80/100, ave batch time 10.04 ms
Iteration 90/100, ave batch time 10.01 ms
Iteration 100/100, ave batch time 9.97 ms
Input shape: torch.Size([1, 3, 384, 640])
Average batch time: 9.97 ms


In [46]:
benchmark(load_model)

Warm up ...
Start timing ...
Iteration 10/100, ave batch time 15.64 ms
Iteration 20/100, ave batch time 15.95 ms
Iteration 30/100, ave batch time 15.88 ms
Iteration 40/100, ave batch time 15.84 ms
Iteration 50/100, ave batch time 15.77 ms
Iteration 60/100, ave batch time 15.79 ms
Iteration 70/100, ave batch time 15.78 ms
Iteration 80/100, ave batch time 15.79 ms
Iteration 90/100, ave batch time 15.79 ms
Iteration 100/100, ave batch time 15.78 ms
Input shape: torch.Size([1, 3, 384, 640])
Average batch time: 15.78 ms


In [6]:
benchmark(trt_model_half,dtype='fp16')

Warm up ...
Start timing ...
Iteration 10/100, ave batch time 4.41 ms
Iteration 20/100, ave batch time 4.38 ms
Iteration 30/100, ave batch time 4.37 ms
Iteration 40/100, ave batch time 4.36 ms
Iteration 50/100, ave batch time 4.36 ms
Iteration 60/100, ave batch time 4.36 ms
Iteration 70/100, ave batch time 4.37 ms
Iteration 80/100, ave batch time 4.37 ms
Iteration 90/100, ave batch time 4.37 ms
Iteration 100/100, ave batch time 4.37 ms
Input shape: torch.Size([1, 3, 384, 640])
Average batch time: 4.37 ms


In [54]:
benchmark(trt_model)

Warm up ...
Start timing ...
Iteration 10/100, ave batch time 14.61 ms
Iteration 20/100, ave batch time 14.90 ms
Iteration 30/100, ave batch time 14.90 ms
Iteration 40/100, ave batch time 14.92 ms
Iteration 50/100, ave batch time 14.86 ms
Iteration 60/100, ave batch time 14.90 ms
Iteration 70/100, ave batch time 14.86 ms
Iteration 80/100, ave batch time 14.86 ms
Iteration 90/100, ave batch time 14.88 ms
Iteration 100/100, ave batch time 14.91 ms
Input shape: torch.Size([1, 3, 384, 640])
Average batch time: 14.91 ms
