In [3]:
import time
import torch
import torch_tensorrt
import torchvision.models as models
import tensorrt as trt

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

# Compiling ResNet using the Torch-TensorRT torch.compile Backend

#### With Full Precision

In [4]:
model = models.resnet101( weights=models.ResNet101_Weights.DEFAULT).eval().to(device=device)

In [5]:
inputs = [torch.randn((1,3,224,224)).to(device)]

In [6]:
enabled_precisions = {torch.float}
debug = True
workspace_size = 20 << 30
min_block_size  = 7
torch_executed_ops = {}

In [7]:
optimized_model = torch_tensorrt.compile(
    model,
    ir="torch_compile",
    inputs=inputs,
    enabled_precisions=enabled_precisions,
    debug=debug,
    workspace_size=workspace_size,
    min_block_size=min_block_size,
    torch_executed_ops=torch_executed_ops,
)

In [11]:
new_inputs = [torch.randn((1,3,224,224)).to(device)]
new_outputs = optimized_model(*new_inputs)
print(new_outputs.shape)

torch.Size([1, 1000])


In [None]:
# Predict with TensorRT model
new_inputs = [torch.randn((20,3,224,224)).to(device)]
start_time = time.time()
new_outputs = optimized_model(*new_inputs)
end_time = time.time()
total_time = end_time - start_time
print(f"Total optimized model runtime: {total_time:.4f} seconds")
print(new_outputs.shape)
del new_inputs
del new_ouputs

INFO:torch_tensorrt.dynamo.utils:Using Default Torch-TRT Runtime (as requested by user)
INFO:torch_tensorrt.dynamo.utils:Device not specified, using Torch default current device - cuda:0. If this is incorrect, please specify an input device, via the device keyword.
INFO:torch_tensorrt.dynamo.utils:Compilation Settings: CompilationSettings(enabled_precisions={<dtype.f32: 7>}, debug=True, workspace_size=21474836480, min_block_size=7, torch_executed_ops={}, pass_through_build_failures=False, max_aux_streams=None, version_compatible=False, optimization_level=None, use_python_runtime=False, truncate_double=False, use_fast_partitioner=True, enable_experimental_decompositions=False, device=Device(type=DeviceType.GPU, gpu_id=0), require_full_compilation=False, disable_tf32=False, assume_dynamic_shape_support=False, sparse_weights=False, refit=False, engine_capability=<EngineCapability.STANDARD: 1>, num_avg_timing_iters=1, dla_sram_size=1048576, dla_local_dram_size=1073741824, dla_global_dram_s

In [None]:
# Predict with Torch  model
new_inputs = [torch.randn((20,3,224,224)).to(device)]
start_time = time.time()
new_outputs = model(*new_inputs)
end_time = time.time()
total_time = end_time - start_time
print(f"Total torch model runtime: {total_time:.4f} seconds")
print(new_outputs.shape)
del new_inputs
del new_ouputs

### With Half Precision

In [5]:
model = models.resnet101( weights=models.ResNet101_Weights.DEFAULT).half().eval().to(device=device)

In [6]:
inputs = [torch.randn((1,3,224,224)).to(device).half()]

In [7]:
enabled_precisions = {torch.half}
debug = True
workspace_size = 20 << 30
min_block_size  = 7
torch_executed_ops = {}

In [8]:
optimized_model = torch_tensorrt.compile(
    model,
    ir="torch_compile",
    inputs=inputs,
    enabled_precisions=enabled_precisions,
    debug=debug,
    workspace_size=workspace_size,
    min_block_size=min_block_size,
    torch_executed_ops=torch_executed_ops,
)

In [11]:
new_inputs = [torch.randn((1,3,224,224)).to(device).half()]
new_outputs = optimized_model(*new_inputs)
print(new_outputs.shape)

torch.Size([1, 1000])


In [76]:
# Predict with TensorRT model
new_inputs = [torch.randn((100,3,224,224)).to(device).half()]
start_time = time.time()
new_outputs = optimized_model(*new_inputs)
end_time = time.time()
total_time = end_time - start_time
print(f"Total optimized model runtime: {total_time:.4f} seconds")
print(new_outputs.shape)

Total optimized model runtime: 0.0007 seconds
torch.Size([100, 1000])


In [77]:
# Predict with Torch  model
new_inputs = [torch.randn((100,3,224,224)).half().to(device)]
start_time = time.time()
new_outputs = model(*new_inputs)
end_time = time.time()
total_time = end_time - start_time
print(f"Total torch model runtime: {total_time:.4f} seconds")
print(new_outputs.shape)

Total torch model runtime: 0.0037 seconds
torch.Size([100, 1000])
