In [5]:
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import time

# Load TensorRT Engine
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
engine_file = "lenet_model.trt"

with open(engine_file, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
    print("Loading TensorRT engine...")
    engine_data = f.read()
    engine = runtime.deserialize_cuda_engine(engine_data)
    if engine is None:
        raise RuntimeError("Failed to deserialize TensorRT engine.")
    print("TensorRT engine loaded successfully.")



with open(engine_file, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())

# Allocate Buffers
context = engine.create_execution_context()
input_shape = engine.get_binding_shape(0)
output_shape = engine.get_binding_shape(1)

input_size = trt.volume(input_shape) * np.dtype(np.float32).itemsize
output_size = trt.volume(output_shape) * np.dtype(np.float32).itemsize

d_input = cuda.mem_alloc(input_size)
d_output = cuda.mem_alloc(output_size)
stream = cuda.Stream()

# Generate a Dummy Input
dummy_input_np = np.random.random((1, 1, 28, 28)).astype(np.float32)

# Transfer Input to GPU
cuda.memcpy_htod_async(d_input, dummy_input, stream)

# Measure Latency
num_iterations = 100
latency_times = []

for _ in range(num_iterations):
    start_time = time.time()
    context.execute_async_v2([int(d_input), int(d_output)], stream.handle)
    stream.synchronize()
    end_time = time.time()
    latency_times.append(end_time - start_time)

# Calculate Average Latency
average_latency = sum(latency_times) / num_iterations
print(f"TensorRT Optimized Latency per Sample: {average_latency * 1000:.4f} ms")

Loading TensorRT engine...
[01/28/2025-17:47:14] [TRT] [E] IRuntime::deserializeCudaEngine: Error Code 1: Serialization (Serialization assertion plan->header.pad == expectedPlatformTag failed.Platform specific tag mismatch detected. TensorRT plan files are only supported on the target runtime platform they were created on.)


RuntimeError: Failed to deserialize TensorRT engine.