# Installations

In [8]:
!pip -q install torch
!pip -q install torchvision
!pip -q install albumentations
!pip -q install onnx
!pip -q install opencv-python
!pip -q install pynvml
!pip -q install nvidia-pyindex
!pip -q install nvidia-tensorrt==8.4.3.1



# Resnet Model Conversion to ONNX

In [9]:
import warnings
warnings.filterwarnings("ignore")

import torch
from torchvision import models

# Load pretrained ResNet50 model
model = models.resnet50(pretrained=True)
model.eval()

# Dummy input for the ONNX export
dummy_input = torch.randn(1, 3, 224, 224)  # Batch size of 1, 3 color channels, 224x224 resolution

# Convert to ONNX format
ONNX_FILE_PATH = 'resnet50.onnx'
torch.onnx.export(model, dummy_input, ONNX_FILE_PATH, input_names=['input'], output_names=['output'], export_params=True)

print(f"Model successfully converted to {ONNX_FILE_PATH}")


Model successfully converted to resnet50.onnx


# Building The Engine


In [11]:
import tensorrt as trt
import os


warnings.filterwarnings("ignore")

def build_engine_from_onnx(onnx_file_path, engine_file_path, max_batch_size=1, fp16_mode=False, int8_mode=False):
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    
    with trt.Builder(TRT_LOGGER) as builder, \
         builder.create_network(EXPLICIT_BATCH) as network, \
         trt.OnnxParser(network, TRT_LOGGER) as parser:
        
        config = builder.create_builder_config()
        config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30)  # 1GB
        if fp16_mode:
            config.set_flag(trt.BuilderFlag.FP16)
        if int8_mode:
            config.set_flag(trt.BuilderFlag.INT8)
        
        # Parse ONNX
        with open(onnx_file_path, 'rb') as model:
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        
        profile = builder.create_optimization_profile()
        input_name = network.get_input(0).name
        input_shape = network.get_input(0).shape
        profile.set_shape(input_name, (1, *input_shape[1:]), (max_batch_size, *input_shape[1:]), (max_batch_size, *input_shape[1:]))
        config.add_optimization_profile(profile)
        
        print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
        serialized_engine = builder.build_serialized_network(network, config)
        print("Completed creating engine")
        
        with open(engine_file_path, "wb") as f:
            f.write(serialized_engine)
        
        return serialized_engine
print("Engine has been Builded")

Engine has been Builded


# Loading the Engine

In [12]:
warnings.filterwarnings("ignore")
def load_engine(engine_file_path):
    with open(engine_file_path, 'rb') as f, trt.Runtime(trt.Logger(trt.Logger.WARNING)) as runtime:
        return runtime.deserialize_cuda_engine(f.read())

onnx_file_path = "/kaggle/working/resnet50.onnx"
engine_file_path = "resnet50.engine"

# Convert ONNX to TensorRT engine if it doesn't exist
if not os.path.exists(engine_file_path):
    serialized_engine = build_engine_from_onnx(onnx_file_path, engine_file_path)
    with trt.Runtime(trt.Logger(trt.Logger.WARNING)) as runtime:
        engine = runtime.deserialize_cuda_engine(serialized_engine)
else:
    # Load the existing engine
    engine = load_engine(engine_file_path)

# Create execution context
context = engine.create_execution_context()

print("TensorRT engine loaded and execution context created successfully.")

[10/03/2024-13:47:14] [TRT] [W] TensorRT was linked against cuDNN 8.4.1 but loaded cuDNN 8.4.0
TensorRT engine loaded and execution context created successfully.
[10/03/2024-13:47:14] [TRT] [W] TensorRT was linked against cuDNN 8.4.1 but loaded cuDNN 8.4.0


# Helper Functions

In [13]:
def preprocess_image(image_path, img_size=(224, 224)):
    input_image = Image.open(image_path).convert("RGB")
    preprocess = transforms.Compose([
        transforms.Resize(img_size),
        transforms.CenterCrop(img_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_tensor = preprocess(input_image)
    input_batch = input_tensor.unsqueeze(0)  # create a mini-batch as expected by the model
    return input_batch


# Function to load images from folder
def load_images_from_folder(folder_path):
    image_paths = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            image_paths.append(os.path.join(folder_path, filename))
    return image_paths

# General Comparsion for 100 Images

In [15]:
import os
import tensorrt as trt
import torch
import time
import pycuda.driver as cuda
import pycuda.autoinit
from pynvml import nvmlInit, nvmlShutdown, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
import numpy as np
from PIL import Image
from torchvision import transforms
from statistics import mean, stdev

# Initialize the NVML library
nvmlInit()

warnings.filterwarnings("ignore")



def benchmark_pytorch_model(model, images, preprocess_image):
    total_time = 0
    inference_times = []
    
    for img_path in images:
        input_tensor = preprocess_image(img_path).cuda()
        torch.cuda.synchronize()  # Synchronize GPU before timing
        start_time = time.time()
        
        with torch.no_grad():
            model(input_tensor)  # Perform inference
        
        torch.cuda.synchronize()  # Synchronize again after inference
        end_time = time.time()
        
        inference_time = end_time - start_time
        total_time += inference_time
        inference_times.append(inference_time)
        
        gpu_used, _, _ = get_gpu_metrics()
        print(f"Image: {img_path}, Inference time (PyTorch): {inference_time:.4f} s, GPU used: {gpu_used:.2f} MB")
    
    return total_time, inference_times


def benchmark_tensorrt_model(context, engine, images, bindings, stream, input_shape, preprocess_image):
    total_time = 0
    inference_times = []
    
    for img_path in images:
        input_tensor = preprocess_image(img_path).cpu().numpy()

        # Handle dynamic input shape
        if engine.has_implicit_batch_dimension:
            context.set_binding_shape(0, input_tensor.shape)
        else:
            context.set_binding_shape(0, (1, *input_tensor.shape[1:]))

        # Ensure input matches expected shape
        if input_tensor.shape != tuple(input_shape):
            input_tensor = input_tensor.reshape(input_shape)

        # Copy input data to GPU memory
        cuda.memcpy_htod_async(bindings[0], input_tensor, stream)

        # Allocate output memory
        output = cuda.pagelocked_empty(tuple(context.get_binding_shape(1)), dtype=np.float32)
        output_memory = cuda.mem_alloc(output.nbytes)
        bindings[1] = int(output_memory)

        # Measure inference time
        stream.synchronize()
        start_time = time.time()
        context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
        stream.synchronize()
        end_time = time.time()

        # Copy output data from GPU memory
        cuda.memcpy_dtoh_async(output, output_memory, stream)
        stream.synchronize()

        # Free the output memory
        output_memory.free()

        # Calculate inference time and GPU memory usage
        inference_time = end_time - start_time
        total_time += inference_time
        inference_times.append(inference_time)
        
        gpu_used, _, _ = get_gpu_metrics()
        print(f"Image: {img_path}, Inference time (TensorRT): {inference_time:.4f} s, GPU used: {gpu_used:.2f} MB")
    
    return total_time, inference_times


def print_inference_stats(times, model_name):
    print(f"--- {model_name} Inference Time Statistics ---")
    print(f"Average Inference Time: {mean(times):.4f} s")
    print(f"Standard Deviation: {stdev(times):.4f} s")
    print(f"Min Inference Time: {min(times):.4f} s")
    print(f"Max Inference Time: {max(times):.4f} s")
    print()

def compare_models(times_pytorch, times_tensorrt):
    avg_pytorch = mean(times_pytorch)
    avg_tensorrt = mean(times_tensorrt)

    speedup_factor = avg_pytorch / avg_tensorrt
    percentage_faster = (speedup_factor - 1) * 100

    print("Comparison between PyTorch and TensorRT:")
    print(f"Average PyTorch Inference Time: {avg_pytorch:.4f} s")
    print(f"Average TensorRT Inference Time: {avg_tensorrt:.4f} s")
    print(f"TensorRT is {speedup_factor:.2f}x faster than PyTorch.")
    print()


# Main execution
if __name__ == "__main__":
    onnx_file_path = "/kaggle/working/resnet50.onnx"
    engine_file_path = "resnet50.engine"
    
    print(f"TensorRT version: {trt.__version__}")
    
    # Convert ONNX to TensorRT engine if it doesn't exist
    if not os.path.exists(engine_file_path):
        print(f"Building TensorRT engine from {onnx_file_path}")
        engine = build_engine_from_onnx(onnx_file_path, engine_file_path)
    else:
        print(f"Loading existing TensorRT engine from {engine_file_path}")
        engine = load_engine(engine_file_path)
    
    # Create execution context
    context = engine.create_execution_context()
    print("----------------TensorRT engine loaded and execution context created successfully.-----------")
    
    if os.path.exists(engine_file_path):
        print(f"Confirmed: TensorRT engine file saved at {engine_file_path}")
        print(f"File size: {os.path.getsize(engine_file_path) / (1024 * 1024):.2f} MB")
    else:
        print(f"Warning: Expected engine file {engine_file_path} not found!")

    # Set up GPU memory for input and output
    input_shape = context.get_binding_shape(0)
    output_shape = context.get_binding_shape(1)
    print(f"Input tensor shape: {input_shape}")
    print(f"Output tensor shape: {output_shape}")

    # Allocate device memory
    d_input = cuda.mem_alloc(trt.volume(input_shape) * trt.float32.itemsize)
    d_output = cuda.mem_alloc(trt.volume(output_shape) * trt.float32.itemsize)
    bindings = [int(d_input), int(d_output)]
    stream = cuda.Stream()

    # Load images for benchmarking
    image_folder = "/kaggle/input/dataset/images"  # Replace with your image folder path
    images = load_images_from_folder(image_folder)

    # Load the PyTorch ResNet50 model for benchmarking
    pytorch_model = torch.hub.load('pytorch/vision', 'resnet50', pretrained=True).eval().cuda()

    # Benchmark PyTorch model
    print("Running benchmark for PyTorch ResNet50 model...")
    total_time_pytorch, inference_times_pytorch = benchmark_pytorch_model(pytorch_model, images, preprocess_image)

    # Benchmark TensorRT model
    print("Running benchmark for TensorRT ResNet50 model...")
    total_time_trt, inference_times_trt = benchmark_tensorrt_model(context, engine, images, bindings, stream, input_shape, preprocess_image)

    # Print statistics
    print_inference_stats(inference_times_pytorch, "PyTorch")
    print_inference_stats(inference_times_trt, "TensorRT")
    
    compare_models(inference_times_pytorch, inference_times_trt)
    
    
    d_input.free()
    d_output.free()
    nvmlShutdown()  


TensorRT version: 8.4.3.1
Loading existing TensorRT engine from resnet50.engine
[10/03/2024-13:53:23] [TRT] [W] TensorRT was linked against cuDNN 8.4.1 but loaded cuDNN 8.4.0
----------------TensorRT engine loaded and execution context created successfully.-----------
Confirmed: TensorRT engine file saved at resnet50.engine
File size: 99.47 MB
Input tensor shape: (1, 3, 224, 224)
Output tensor shape: (1, 1000)
[10/03/2024-13:53:23] [TRT] [W] TensorRT was linked against cuDNN 8.4.1 but loaded cuDNN 8.4.0


Using cache found in /root/.cache/torch/hub/pytorch_vision_main


Running benchmark for PyTorch ResNet50 model...
Image: /kaggle/input/dataset/images/image27.jpg, Inference time (PyTorch): 0.0089 s, GPU used: 2897.88 MB
Image: /kaggle/input/dataset/images/image2542.jpg, Inference time (PyTorch): 0.0085 s, GPU used: 2897.88 MB
Image: /kaggle/input/dataset/images/image2621.jpg, Inference time (PyTorch): 0.0088 s, GPU used: 2897.88 MB
Image: /kaggle/input/dataset/images/image56.jpg, Inference time (PyTorch): 0.0083 s, GPU used: 2897.88 MB
Image: /kaggle/input/dataset/images/image2.jpg, Inference time (PyTorch): 0.0085 s, GPU used: 2897.88 MB
Image: /kaggle/input/dataset/images/image2572.jpg, Inference time (PyTorch): 0.0086 s, GPU used: 2897.88 MB
Image: /kaggle/input/dataset/images/image47.jpg, Inference time (PyTorch): 0.0084 s, GPU used: 2897.88 MB
Image: /kaggle/input/dataset/images/image40.jpg, Inference time (PyTorch): 0.0085 s, GPU used: 2897.88 MB
Image: /kaggle/input/dataset/images/image34.jpg, Inference time (PyTorch): 0.0084 s, GPU used: 2897

# Playing with batch size, Precision(INT8, FP16) and Image size

In [20]:
# Initialize NVML for GPU memory metrics
nvmlInit()

def get_gpu_metrics(device_index=0):
    nvml_device = nvmlDeviceGetHandleByIndex(device_index)
    memory_info = nvmlDeviceGetMemoryInfo(nvml_device)
    gpu_usage = memory_info.used / (1024 ** 2)  # Convert to MB
    return gpu_usage, memory_info.free / (1024 ** 2), memory_info.total / (1024 ** 2)


def preprocess_image(image_path, img_size):
    input_image = Image.open(image_path).convert("RGB")
    preprocess = transforms.Compose([
        transforms.Resize(img_size),
        transforms.CenterCrop(img_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_tensor = preprocess(input_image)
    return input_tensor

def load_images_from_folder(folder_path):
    image_paths = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            image_paths.append(os.path.join(folder_path, filename))
    return image_paths

def benchmark_pytorch_model(model, images, preprocess_image, batch_size, img_size):
    total_time = 0
    inference_times = []

    for i in range(0, len(images), batch_size):
        batch_images = images[i:i+batch_size]
        input_batch = torch.stack([preprocess_image(img_path, img_size) for img_path in batch_images]).cuda()

        torch.cuda.synchronize()
        start_time = time.time()

        with torch.no_grad():
            model(input_batch)

        torch.cuda.synchronize()
        end_time = time.time()

        inference_time = end_time - start_time
        total_time += inference_time
        inference_times.append(inference_time)

        gpu_used, _, _ = get_gpu_metrics()
        print(f"Batch {i//batch_size + 1}, Inference time (PyTorch): {inference_time:.4f} s, GPU used: {gpu_used:.2f} MB")

    return total_time, inference_times

def benchmark_tensorrt_model(context, engine, images, bindings, stream, input_shape, preprocess_image, batch_size, img_size):
    total_time = 0
    inference_times = []

    for i in range(0, len(images), batch_size):
        batch_images = images[i:i+batch_size]
        input_batch = np.stack([preprocess_image(img_path, img_size).numpy() for img_path in batch_images])

        if engine.has_implicit_batch_dimension:
            context.set_binding_shape(0, input_batch.shape)
        else:
            context.set_binding_shape(0, (batch_size, *input_shape[1:]))

        # Allocate device memory
        d_input = cuda.mem_alloc(input_batch.nbytes)
        d_output = cuda.mem_alloc(trt.volume(context.get_binding_shape(1)) * trt.float32.itemsize)
        bindings = [int(d_input), int(d_output)]

        # Copy input data to device
        cuda.memcpy_htod_async(d_input, input_batch, stream)

        # Run inference
        stream.synchronize()
        start_time = time.time()
        context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
        stream.synchronize()
        end_time = time.time()

        inference_time = end_time - start_time
        total_time += inference_time
        inference_times.append(inference_time)

        gpu_used, _, _ = get_gpu_metrics()
        print(f"Batch {i//batch_size + 1}, Inference time (TensorRT): {inference_time:.4f} s, GPU used: {gpu_used:.2f} MB")

        # Free device memory
        d_input.free()
        d_output.free()

    return total_time, inference_times

def print_inference_stats(times, model_name):
    print(f"--- {model_name} Inference Time Statistics ---")
    print(f"Average Inference Time: {mean(times):.4f} s")
    print(f"Standard Deviation: {stdev(times):.4f} s")
    print(f"Min Inference Time: {min(times):.4f} s")
    print(f"Max Inference Time: {max(times):.4f} s")
    print()

def compare_models(times_pytorch, times_tensorrt, precision, batch_size):
    avg_pytorch = mean(times_pytorch)
    avg_tensorrt = mean(times_tensorrt)

    speedup_factor = avg_pytorch / avg_tensorrt
    percentage_faster = (speedup_factor - 1) * 100

    print(f"Comparison (Batch Size {batch_size}, Precision {precision}):")
    print(f"Average PyTorch Inference Time: {avg_pytorch:.4f} s")
    print(f"Average TensorRT Inference Time: {avg_tensorrt:.4f} s")
    print(f"TensorRT is {speedup_factor:.2f}x faster than PyTorch.")
    print(f"TensorRT is {percentage_faster:.2f}% faster than PyTorch.")
    print()

def run_experiment(batch_size, precision, img_size, engine, context):
    print(f"Running experiment with batch size: {batch_size}, precision: {precision}, image size: {img_size}")

    # Load images
    image_folder = "/kaggle/input/dataset/images"
    images = load_images_from_folder(image_folder)

    # Set up GPU memory for input and output
    input_shape = context.get_binding_shape(0)

    # Create CUDA stream
    stream = cuda.Stream()

    # Load PyTorch model
    pytorch_model = torch.hub.load('pytorch/vision', 'resnet50', pretrained=True).eval().cuda()

    # Benchmark PyTorch model
    print("Running benchmark for PyTorch ResNet50 model...")
    total_time_pytorch, inference_times_pytorch = benchmark_pytorch_model(pytorch_model, images, preprocess_image, batch_size, img_size)

    # Benchmark TensorRT model
    print("Running benchmark for TensorRT ResNet50 model...")
    total_time_trt, inference_times_trt = benchmark_tensorrt_model(context, engine, images, None, stream, input_shape, preprocess_image, batch_size, img_size)

    # Print statistics
    print_inference_stats(inference_times_pytorch, f"PyTorch (Batch {batch_size}, {img_size[0]}x{img_size[1]})")
    print_inference_stats(inference_times_trt, f"TensorRT (Batch {batch_size}, {precision}, {img_size[0]}x{img_size[1]})")

    # Compare results
    compare_models(inference_times_pytorch, inference_times_trt, precision, batch_size)

if __name__ == '__main__':
    # Build or load TensorRT engine
    onnx_file_path = "/kaggle/working/resnet50.onnx"
    engine_file_path = "resnet50.engine"
    
    if not os.path.exists(engine_file_path):
        print(f"Building engine from {onnx_file_path}...")
        build_engine_from_onnx(onnx_file_path, engine_file_path, max_batch_size=32, precision='FP16', img_size=(224, 224))
        build_engine_from_onnx(onnx_file_path, engine_file_path.replace(".engine", "_INT8.engine"), max_batch_size=32, precision='INT8', img_size=(224, 224))
    
    # Load TensorRT engines
    engine_fp16 = load_engine(engine_file_path)
    engine_int8 = load_engine(engine_file_path.replace(".engine", ".engine"))

    context_fp16 = engine_fp16.create_execution_context()
    context_int8 = engine_int8.create_execution_context()

    # Experiment with different configurations
    batch_sizes = [8, 16, 32]
    for batch_size in batch_sizes:
        run_experiment(batch_size, 'FP16', (224, 224), engine_fp16, context_fp16)
        run_experiment(batch_size, 'INT8', (224, 224), engine_int8, context_int8)

    nvmlShutdown()  # Clean up NVML


[10/03/2024-13:55:08] [TRT] [W] TensorRT was linked against cuDNN 8.4.1 but loaded cuDNN 8.4.0
[10/03/2024-13:55:08] [TRT] [W] TensorRT was linked against cuDNN 8.4.1 but loaded cuDNN 8.4.0
[10/03/2024-13:55:08] [TRT] [W] TensorRT was linked against cuDNN 8.4.1 but loaded cuDNN 8.4.0
Running experiment with batch size: 8, precision: FP16, image size: (224, 224)
[10/03/2024-13:55:08] [TRT] [W] TensorRT was linked against cuDNN 8.4.1 but loaded cuDNN 8.4.0


Using cache found in /root/.cache/torch/hub/pytorch_vision_main


Running benchmark for PyTorch ResNet50 model...
Batch 1, Inference time (PyTorch): 0.0377 s, GPU used: 3311.88 MB
Batch 2, Inference time (PyTorch): 0.0377 s, GPU used: 3311.88 MB
Batch 3, Inference time (PyTorch): 0.0377 s, GPU used: 3311.88 MB
Batch 4, Inference time (PyTorch): 0.0303 s, GPU used: 3311.88 MB
Batch 5, Inference time (PyTorch): 0.0285 s, GPU used: 3311.88 MB
Batch 6, Inference time (PyTorch): 0.0282 s, GPU used: 3311.88 MB
Batch 7, Inference time (PyTorch): 0.0279 s, GPU used: 3311.88 MB
Batch 8, Inference time (PyTorch): 0.0254 s, GPU used: 3311.88 MB
Batch 9, Inference time (PyTorch): 0.0224 s, GPU used: 3311.88 MB
Batch 10, Inference time (PyTorch): 0.0226 s, GPU used: 3311.88 MB
Batch 11, Inference time (PyTorch): 0.0225 s, GPU used: 3311.88 MB
Batch 12, Inference time (PyTorch): 0.0225 s, GPU used: 3311.88 MB
Batch 13, Inference time (PyTorch): 0.0119 s, GPU used: 3311.88 MB
Running benchmark for TensorRT ResNet50 model...
[10/03/2024-13:55:10] [TRT] [E] 3: [execu

Using cache found in /root/.cache/torch/hub/pytorch_vision_main


Running benchmark for PyTorch ResNet50 model...
Batch 1, Inference time (PyTorch): 0.0215 s, GPU used: 3311.88 MB
Batch 2, Inference time (PyTorch): 0.0222 s, GPU used: 3311.88 MB
Batch 3, Inference time (PyTorch): 0.0227 s, GPU used: 3311.88 MB
Batch 4, Inference time (PyTorch): 0.0222 s, GPU used: 3311.88 MB
Batch 5, Inference time (PyTorch): 0.0226 s, GPU used: 3311.88 MB
Batch 6, Inference time (PyTorch): 0.0227 s, GPU used: 3311.88 MB
Batch 7, Inference time (PyTorch): 0.0224 s, GPU used: 3311.88 MB
Batch 8, Inference time (PyTorch): 0.0224 s, GPU used: 3311.88 MB
Batch 9, Inference time (PyTorch): 0.0223 s, GPU used: 3311.88 MB
Batch 10, Inference time (PyTorch): 0.0223 s, GPU used: 3311.88 MB
Batch 11, Inference time (PyTorch): 0.0225 s, GPU used: 3311.88 MB
Batch 12, Inference time (PyTorch): 0.0224 s, GPU used: 3311.88 MB
Batch 13, Inference time (PyTorch): 0.0124 s, GPU used: 3311.88 MB
Running benchmark for TensorRT ResNet50 model...
[10/03/2024-13:55:12] [TRT] [E] 3: [execu

Using cache found in /root/.cache/torch/hub/pytorch_vision_main


Running benchmark for PyTorch ResNet50 model...
Batch 1, Inference time (PyTorch): 0.0389 s, GPU used: 3311.88 MB
Batch 2, Inference time (PyTorch): 0.0391 s, GPU used: 3311.88 MB
Batch 3, Inference time (PyTorch): 0.0394 s, GPU used: 3311.88 MB
Batch 4, Inference time (PyTorch): 0.0393 s, GPU used: 3311.88 MB
Batch 5, Inference time (PyTorch): 0.0388 s, GPU used: 3311.88 MB
Batch 6, Inference time (PyTorch): 0.0401 s, GPU used: 3311.88 MB
Batch 7, Inference time (PyTorch): 0.0140 s, GPU used: 3311.88 MB
Running benchmark for TensorRT ResNet50 model...
[10/03/2024-13:55:13] [TRT] [E] 3: [executionContext.cpp::setBindingDimensions::976] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::setBindingDimensions::976, condition: profileMaxDims.d[i] >= dimensions.d[i]. Supplied binding dimension [16,3,224,224] for bindings[0] exceed min ~ max range at index 0, maximum dimension in profile is 1, minimum dimension in profile is 1, but supplied dimension 

Using cache found in /root/.cache/torch/hub/pytorch_vision_main


Running benchmark for PyTorch ResNet50 model...
Batch 1, Inference time (PyTorch): 0.0384 s, GPU used: 3311.88 MB
Batch 2, Inference time (PyTorch): 0.0399 s, GPU used: 3311.88 MB
Batch 3, Inference time (PyTorch): 0.0400 s, GPU used: 3311.88 MB
Batch 4, Inference time (PyTorch): 0.0396 s, GPU used: 3311.88 MB
Batch 5, Inference time (PyTorch): 0.0393 s, GPU used: 3311.88 MB
Batch 6, Inference time (PyTorch): 0.0386 s, GPU used: 3311.88 MB
Batch 7, Inference time (PyTorch): 0.0137 s, GPU used: 3311.88 MB
Running benchmark for TensorRT ResNet50 model...
[10/03/2024-13:55:15] [TRT] [E] 3: [executionContext.cpp::setBindingDimensions::976] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::setBindingDimensions::976, condition: profileMaxDims.d[i] >= dimensions.d[i]. Supplied binding dimension [16,3,224,224] for bindings[0] exceed min ~ max range at index 0, maximum dimension in profile is 1, minimum dimension in profile is 1, but supplied dimension 

Using cache found in /root/.cache/torch/hub/pytorch_vision_main


Running benchmark for PyTorch ResNet50 model...
Batch 1, Inference time (PyTorch): 0.0827 s, GPU used: 3311.88 MB
Batch 2, Inference time (PyTorch): 0.0823 s, GPU used: 3311.88 MB
Batch 3, Inference time (PyTorch): 0.0822 s, GPU used: 3311.88 MB
Batch 4, Inference time (PyTorch): 0.0151 s, GPU used: 3311.88 MB
Running benchmark for TensorRT ResNet50 model...
[10/03/2024-13:55:17] [TRT] [E] 3: [executionContext.cpp::setBindingDimensions::976] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::setBindingDimensions::976, condition: profileMaxDims.d[i] >= dimensions.d[i]. Supplied binding dimension [32,3,224,224] for bindings[0] exceed min ~ max range at index 0, maximum dimension in profile is 1, minimum dimension in profile is 1, but supplied dimension is 32.
)
Batch 1, Inference time (TensorRT): 0.0029 s, GPU used: 3331.88 MB
[10/03/2024-13:55:17] [TRT] [E] 3: [executionContext.cpp::setBindingDimensions::976] Error Code 3: API Usage Error (Parame

Using cache found in /root/.cache/torch/hub/pytorch_vision_main


Running benchmark for PyTorch ResNet50 model...
Batch 1, Inference time (PyTorch): 0.0811 s, GPU used: 3311.88 MB
Batch 2, Inference time (PyTorch): 0.0818 s, GPU used: 3311.88 MB
Batch 3, Inference time (PyTorch): 0.0829 s, GPU used: 3311.88 MB
Batch 4, Inference time (PyTorch): 0.0144 s, GPU used: 3311.88 MB
Running benchmark for TensorRT ResNet50 model...
[10/03/2024-13:55:18] [TRT] [E] 3: [executionContext.cpp::setBindingDimensions::976] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::setBindingDimensions::976, condition: profileMaxDims.d[i] >= dimensions.d[i]. Supplied binding dimension [32,3,224,224] for bindings[0] exceed min ~ max range at index 0, maximum dimension in profile is 1, minimum dimension in profile is 1, but supplied dimension is 32.
)
Batch 1, Inference time (TensorRT): 0.0032 s, GPU used: 3331.88 MB
[10/03/2024-13:55:18] [TRT] [E] 3: [executionContext.cpp::setBindingDimensions::976] Error Code 3: API Usage Error (Parame