In [1]:
import sys
import os

sys.path.append('/detector/')
os.chdir('/detector')

In [25]:
from src.backbone import get_model
from timm import create_model

model = create_model('efficientnet_b0', pretrained=True)

# GPU device benchmark

In [None]:
TRT_FP32_MODEL_NAME = 'weights/trt_model_fp32.ts'
TRT_FP16_MODEL_NAME = 'weights/trt_model_fp16.ts'
DEVICE = 'cuda:0'
BATCH_SIZE = 1

In [4]:
import cv2
import torch
from src.transforms import torch_preprocessing

image = cv2.imread('datasets/birdview_vehicles/train/1.jpg')[..., ::-1]
torch_input = torch_preprocessing(image, image_size=(224, 224)).to(DEVICE)
torch_input = torch.cat([torch_input] * BATCH_SIZE)

In [5]:
model.eval()
model.cuda()
with torch.no_grad():
    torch_out = model(torch_input)

In [7]:
import torch_tensorrt

trt_model_fp32 = torch_tensorrt.compile(
    model,
    inputs = [torch_tensorrt.Input((BATCH_SIZE, 3, 224, 224))],
    enabled_precisions = torch.float32,
    workspace_size = 1 << 30, # 1 гибибайт
)



In [8]:
torch.jit.save(trt_model_fp32, TRT_FP32_MODEL_NAME)

## TRT

In [12]:
import timeit

trt_model_fp32 = torch.jit.load(TRT_FP32_MODEL_NAME)

# st = time
timeit.timeit(lambda: trt_model_fp32(torch_input).cpu().numpy(), number=1000)

1.9216044959994178

## torch

In [13]:
timeit.timeit(lambda: model(torch_input).cpu().detach().numpy(), number=1000)

8.951143038000737

# CPU device benchmark

In [17]:
model.cpu()
ONNX_MODEL_NAME = 'weights/onnx_model.onnx'

dummy_input = torch.rand(1, 3, 224, 224, device='cpu')
torch.onnx.export(
    model,
    dummy_input,
    ONNX_MODEL_NAME,
    input_names=['input'],
    output_names=['output'],
#     dynamic_axes = {'input': [0], 'output': [0]}, # динамический батч, но можно и статический
)

In [19]:
import onnx

onnx_model = onnx.load(ONNX_MODEL_NAME)
onnx.checker.check_model(onnx_model)

OPENVINO_FOLDER = 'weights/openvino_model'
OPENVINO_MODEL_NAME = f'{OPENVINO_FOLDER}/onnx_model.xml'
OPENVINO_WEIGHTS_NAME = f'{OPENVINO_FOLDER}/onnx_model.bin'

In [20]:
! mo --input_model {ONNX_MODEL_NAME} --output_dir {OPENVINO_FOLDER}

[ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False.
Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html
[ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11.
Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/2023.0/openvino_2_0_transition_guide.html
[ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release. Please use OpenVINO Model Converter (OVC). OVC represents a lightweight alternative of MO and provides simplified model conversion API. 
Find more information about transition from

In [21]:
from openvino.runtime import Core

ie = Core()
ovmodel = ie.read_model(model=OPENVINO_MODEL_NAME, weights=OPENVINO_WEIGHTS_NAME)
compiled_model = ie.compile_model(model=ovmodel)
output_layer = compiled_model.output(0)

In [23]:
import numpy as np
from src.transforms import openvino_preprocessing

openvino_input = np.concatenate([openvino_preprocessing(image, image_size=(224, 224))] * BATCH_SIZE)

## onnx

In [24]:
timeit.timeit(lambda: compiled_model([openvino_input])[output_layer], number=1000)

6.795465109998986

## torch

In [27]:
timeit.timeit(lambda: model(torch_input.cpu()).detach().numpy(), number=1000)

36.820241415000055