In [1]:
import os
import time
from transformers import AutoFeatureExtractor, AutoModelForImageClassification, ViTImageProcessor
import torch
from PIL import Image
from sklearn.metrics import accuracy_score
import torch_tensorrt
import numpy

In [2]:
# Путь к нашему ViT, который будем конвертировать.
path_to_model = "weights/my_model"

extractor = ViTImageProcessor.from_pretrained(path_to_model)
vit_model = AutoModelForImageClassification.from_pretrained(path_to_model)

INFO:torch.distributed.nn.jit.instantiator:Created a temporary directory at /tmp/tmpb4nxsmuk
INFO:torch.distributed.nn.jit.instantiator:Writing /tmp/tmpb4nxsmuk/_remote_module_non_scriptable.py


In [3]:
def model_use(model, img):
    with torch.no_grad():
        logits = model(**img).logits

    predicted_label = logits.argmax(-1).item()

    return model.config.id2label[predicted_label]


# Функция для запуска конвертированной в ONNX модели. 
def trt_model_use(model, img):
    
    model_id2label=  {0: "cats", 1: "dogs"}
    output = model(img)

    predicted_label = output.logits[0].argmax(-1).item()

    return model_id2label[predicted_label]

In [4]:
# Путь к тестовым картинкам.
path_to_images = "data/"

images_list = os.listdir(path_to_images)

In [5]:
# Функция для замера размера модели.
def size_measurement(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()

    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = (param_size + buffer_size) / (1024 ** 2)
    print('model size: {:.3f}MB'.format(size_all_mb))

In [6]:
# Найдем исходный размер модели.
size_measurement(vit_model)

model size: 327.302MB


In [7]:
model = vit_model.eval().to("cuda")

image = Image.open(path_to_images + images_list[1], mode='r', formats=None)
inputs = extractor(image, return_tensors="pt").to("cuda")

# Enabled precision for TensorRT optimization
enabled_precisions = {torch.float}

# Whether to print verbose logs
debug = True

# Workspace size for TensorRT
workspace_size = 20 << 30

# Maximum number of TRT Engines
# (Lower value allows more graph segmentation)
min_block_size = 7

# Operations to Run in Torch, regardless of converter support
torch_executed_ops = {}

# Define backend compilation keyword arguments
compilation_kwargs = {
    "enabled_precisions": enabled_precisions,
    "debug": debug,
    "workspace_size": workspace_size,
    "min_block_size": min_block_size,
    "torch_executed_ops": torch_executed_ops,
}

# Build and compile the model with torch.compile, using Torch-TensorRT backend
optimized_model = torch.compile(
    model,
    backend="torch_tensorrt",
    options=compilation_kwargs,
)

output = optimized_model(inputs['pixel_values'])

[2023-10-14 16:02:30,578] torch._dynamo.symbolic_convert: [INFO] Step 1: torchdynamo start tracing forward
[2023-10-14 16:02:31,682] torch._dynamo.symbolic_convert: [INFO] Step 1: torchdynamo done tracing forward (RETURN_VALUE)
[2023-10-14 16:02:31,704] torch._dynamo.output_graph: [INFO] Step 2: calling compiler function torch_tensorrt_backend
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002091


[10/14/2023-16:02:45] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:04.373623
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001781


[10/14/2023-16:02:49] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:49] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:04.038365
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002213


[10/14/2023-16:02:53] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:53] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:04.098320
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001769


[10/14/2023-16:02:57] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:57] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.207893
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001864
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.127980
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001503


[10/14/2023-16:02:58] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:58] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:58] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:58] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.120234
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001524
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.128666
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002743


[10/14/2023-16:02:58] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:58] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:58] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:58] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.320598
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001760


[10/14/2023-16:02:58] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:58] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.205377
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001739


[10/14/2023-16:02:59] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:59] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.338404
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002109


[10/14/2023-16:02:59] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:59] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.202797
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002623


[10/14/2023-16:02:59] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:59] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.201629
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001515
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.118471
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001484


[10/14/2023-16:02:59] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:59] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:59] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:02:59] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.118933
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001542
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.117703
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002750


[10/14/2023-16:03:00] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:00] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:00] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:00] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.319545
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002571


[10/14/2023-16:03:00] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:00] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.198522
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001712


[10/14/2023-16:03:00] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:00] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.319054
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002176


[10/14/2023-16:03:01] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:01] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.202804
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001752


[10/14/2023-16:03:01] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:01] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.201690
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001574
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.116549
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001543


[10/14/2023-16:03:01] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:01] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:01] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:01] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.125773
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001504
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.125949
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002744


[10/14/2023-16:03:02] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:02] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:02] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:02] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.327057
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001788


[10/14/2023-16:03:02] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:02] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.222112
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001711


[10/14/2023-16:03:02] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:02] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.352375
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002167


[10/14/2023-16:03:03] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:03] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.206382
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001789


[10/14/2023-16:03:03] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:03] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.212584
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001583
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.125498
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001548


[10/14/2023-16:03:03] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:03] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:03] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:03] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.125508
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001512
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.117468
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002752


[10/14/2023-16:03:03] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:03] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:04] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:04] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.321824
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001775


[10/14/2023-16:03:04] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:04] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.198333
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001762


[10/14/2023-16:03:04] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:04] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.339613
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002169


[10/14/2023-16:03:04] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:05] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.203186
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001779


[10/14/2023-16:03:05] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:05] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.207437
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001538
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.124553
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001550


[10/14/2023-16:03:05] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:05] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:05] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:05] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.118901
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001526
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.124857
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002726


[10/14/2023-16:03:05] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:05] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:05] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:05] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.320959
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001820


[10/14/2023-16:03:06] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:06] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.199799
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001701


[10/14/2023-16:03:06] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:06] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.342371
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002173


[10/14/2023-16:03:06] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:06] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.203424
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001768


[10/14/2023-16:03:07] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:07] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.200208
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001544
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.114205
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001562


[10/14/2023-16:03:07] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:07] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:07] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:07] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.116026
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001512
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.115588
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002708


[10/14/2023-16:03:07] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:07] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:07] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:07] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.323278
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001741


[10/14/2023-16:03:08] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:08] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.211352
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001708


[10/14/2023-16:03:08] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:08] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.350306
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002165


[10/14/2023-16:03:08] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:08] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.204058
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001748


[10/14/2023-16:03:08] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:08] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.202314
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001537
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.115467
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001530


[10/14/2023-16:03:09] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:09] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:09] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:09] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.114955
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001541
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.115126
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002716


[10/14/2023-16:03:09] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:09] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:09] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:09] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.319894
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001769


[10/14/2023-16:03:09] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:09] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.201933
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001737


[10/14/2023-16:03:10] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:10] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.345209
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002159


[10/14/2023-16:03:10] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:10] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.202653
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001801


[10/14/2023-16:03:10] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:10] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.197406
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001606
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.114238
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001697


[10/14/2023-16:03:10] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:11] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:11] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:11] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.114017
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001714
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.114148
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002807


[10/14/2023-16:03:11] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:11] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:11] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:11] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.323105
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001795


[10/14/2023-16:03:11] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:11] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.202580
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001758


[10/14/2023-16:03:11] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:12] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.345517
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002195


[10/14/2023-16:03:12] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:12] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.203781
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001815


[10/14/2023-16:03:12] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:12] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.200738
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001521
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.113909
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001536


[10/14/2023-16:03:12] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:12] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:12] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:12] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.115498
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001560
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.114291
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002755


[10/14/2023-16:03:13] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:13] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:13] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:13] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.320916
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001779


[10/14/2023-16:03:13] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:13] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.197043
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001697


[10/14/2023-16:03:13] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:13] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.339412
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002194


[10/14/2023-16:03:14] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:14] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.202053
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001766


[10/14/2023-16:03:14] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:14] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.195444
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001522
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.113759
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001539


[10/14/2023-16:03:14] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:14] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:14] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:14] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.112936
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001515
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.112648
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002753


[10/14/2023-16:03:14] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:14] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:15] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:15] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.320285
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001756


[10/14/2023-16:03:15] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:15] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.201829
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001650


[10/14/2023-16:03:15] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:15] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.343835
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002185


[10/14/2023-16:03:16] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:16] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.204856
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001773


[10/14/2023-16:03:16] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:16] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.197839
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001533
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.121965
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001521


[10/14/2023-16:03:16] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:16] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:16] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:16] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.113912
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001528
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.113142
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002726


[10/14/2023-16:03:16] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:16] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:16] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:16] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.321110
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001732


[10/14/2023-16:03:17] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:17] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.204362
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001848


[10/14/2023-16:03:17] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:17] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.342303
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002217


[10/14/2023-16:03:17] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:17] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.203808
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001758


[10/14/2023-16:03:18] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:18] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.195468
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001533
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.112417
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001497


[10/14/2023-16:03:18] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:18] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:18] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:18] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.113709
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.001502
INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.112205
INFO:torch_tensorrt.fx.fx2trt:TRT INetwork construction elapsed time: 0:00:00.002714


[10/14/2023-16:03:18] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:18] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:18] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/14/2023-16:03:18] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up 

INFO:torch_tensorrt.fx.fx2trt:Build TRT engine elapsed time: 0:00:00.320683
[2023-10-14 16:03:19,183] torch._dynamo.output_graph: [INFO] Step 2: done compiler function torch_tensorrt_backend


[10/14/2023-16:03:19] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


In [8]:
# Проверим размер trt
size_measurement(optimized_model)

model size: 327.302MB


In [9]:
# Запустим тест нашей trt модели.

start_time = time.time()

# Собака 1, кошка 0.
target_list = []
predict_list = []

for element in images_list:

    image = Image.open(path_to_images + element, mode='r', formats=None)

    inputs = extractor(image, return_tensors="pt")['pixel_values']
    predict = trt_model_use(optimized_model, inputs.to("cuda"))
    target = element[:element.find(".")]

    if target == "dog":
        label = 1
    else:
        label = 0

    target_list.append(label)

    if predict == "dogs":
        pr = 1
    else:
        pr = 0

    predict_list.append(pr)

end_time = time.time()

acc = accuracy_score(target_list, predict_list)
print("Точность сконвертированной в TRT модели= ", acc)
print("Время обработки изображений сконвертированной в TRT моделью = ", end_time-start_time, " секунд")
print("Скорость обработки изображений у сконвертированной в TRT модели составила  ", len(images_list)/(end_time-start_time), " картинок в секунду")

Точность сконвертированной в TRT модели=  0.9875
Время обработки изображений сконвертированной в TRT моделью =  6.732074975967407  секунд
Скорость обработки изображений у сконвертированной в TRT модели составила   23.76681789361798  картинок в секунду
