# TRT vs pytorch

## common things

In [1]:
import sys
import os
from time import time

import cv2
import numpy as np

In [2]:
img_path = './data/root-test.jpg'
test_loops = 100
batch_size = 1

w = 224
h = 224

num_classes = 1

## TRT test

In [3]:
import tensorrt as trt

from trt_toolkit import *

In [4]:
fp16_mode = False

onnx_file = './score_net.onnx'
engine_file = './score_net.trt'

In [5]:
trt_logger = trt.Logger(min_severity=trt.tensorrt.Logger.Severity.INFO)

engine = create_trt_engine(
    trt_logger,
    onnx_file,
    engine_file,
    batch_size=batch_size,
    fp16_mode=fp16_mode,
    save_engine=True
)

In [6]:
# create context
context = engine.create_execution_context()

# allocate host buffers
inputs, outputs, bindings, stream = allocate_buffers(engine)

# prepare data
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (w, h))
img = img.transpose((2, 0, 1))

# normalization
img = img / 255.

# create batch
imgs = np.expand_dims(img, axis=0)
imgs = imgs.astype(np.float32)

output_shape = (batch_size, num_classes)

# infer
t0 = cv2.getTickCount()

for i in range(test_loops):
    inputs[0].host = imgs.reshape(-1)
    
    outs = infer(
        context,
        bindings=bindings,
        inputs=inputs,
        outputs=outputs,
        stream=stream
    )
    
    results = post_process(outs[0], output_shape)
    
infer_time = (cv2.getTickCount() - t0) / cv2.getTickFrequency()

print(infer_time)
print(results)

0.299279383
[[0.9206087]]


## pytorch test

In [7]:
import torch
import torchvision
import torchvision.transforms.functional as TF

from resnet import resnet as score_model

In [8]:
score_model_path = './data/score_state_dict.pth'
device_name = 'cuda:0'

W = 224
H = 224

In [9]:
device = torch.device(device_name)
# create model
score_net = score_model.resnet50(num_classes=1)
score_net.load_state_dict(torch.load(score_model_path, map_location=torch.device(device_name)))
score_net = score_net.to(device)
score_net.eval()

# prepare data
img = cv2.imread(img_path)
img = cv2.resize(img, (W, H))
img = TF.to_tensor(img).unsqueeze(0).to(device)

# infer
t0 = cv2.getTickCount()
with torch.no_grad():
    for i in range(test_loops):
        output = score_net(img)
infer_time = (cv2.getTickCount() - t0) / cv2.getTickFrequency()

print(infer_time)

0.5387946


In [10]:
print(output)

tensor([[0.9206]], device='cuda:0')
