In [6]:
import onnxruntime as ort
import time
import numpy as np

BASE_DIR = "../exports/"

sess = ort.InferenceSession(
    BASE_DIR + "model_fp32.onnx",
    providers=["CUDAExecutionProvider"]
)
input_tensor = sess.get_inputs()[0]
dtype = np.float16 if "float16" in input_tensor.type else np.float32

x = np.random.randn(64,3,224,224).astype(dtype)

# warmup
for _ in range(20):
    sess.run(None, {"input": x})

# benchmark
t0 = time.time()
for _ in range(100):
    sess.run(None, {"input": x})
l = (time.time()-t0)/(100 * 64)
print("ONNX FP32 latency:", l)
print("ONNX FP32 throughput:", 1/l)

[1;31m2025-12-15 04:22:36.034236526 [E:onnxruntime:Default, provider_bridge_ort.cc:2251 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1844 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcudnn.so.9: cannot open shared object file: No such file or directory
[m
[0;93m2025-12-15 04:22:36.034342852 [W:onnxruntime:Default, onnxruntime_pybind_state.cc:1013 CreateExecutionProviderFactoryInstance] Failed to create CUDAExecutionProvider. Require cuDNN 9.* and CUDA 12.*. Please install all dependencies as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported.[m


ONNX FP32 latency: 0.00164865892380476
ONNX FP32 throughput: 606.5535967210302


In [9]:
sess = ort.InferenceSession(
    BASE_DIR + "model_fp16.onnx",
    providers=["CUDAExecutionProvider"]
)

x = np.random.randn(64,3,224,224).astype(np.float16)

# benchmark
t0 = time.time()
for _ in range(100):
    sess.run(None, {"input": x})
l = (time.time()-t0)/(100*64)
print("ONNX FP16 latency:", l)
print("ONNX FP16 throughput:", 1/l)

[1;31m2025-12-15 04:26:44.351137118 [E:onnxruntime:Default, provider_bridge_ort.cc:2251 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1844 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcudnn.so.9: cannot open shared object file: No such file or directory
[m
[0;93m2025-12-15 04:26:44.351176709 [W:onnxruntime:Default, onnxruntime_pybind_state.cc:1013 CreateExecutionProviderFactoryInstance] Failed to create CUDAExecutionProvider. Require cuDNN 9.* and CUDA 12.*. Please install all dependencies as mentioned in the GPU requirements page (https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements), make sure they're in the PATH, and that your GPU is supported.[m


ONNX FP16 latency: 0.0021170101687312125
ONNX FP16 throughput: 472.3642875080425


In [3]:
import numpy as np
from pathlib import Path

LABEL_DIR = Path("../data/CALIB")
PRED_DIR = Path("../data/TRT_OUT_FP16")

correct = 0
total = 0

for label_file in sorted(LABEL_DIR.glob("test_labels_*.npy")):
    idx = label_file.stem.split("_")[-1]

    y_true = np.load(label_file)
    y_pred = np.load(PRED_DIR / f"trt_preds_{idx}.npy")

    pred = y_pred.argmax(axis=1)
    pred = pred[:len(y_true)] 
    correct += (pred == y_true).sum()
    total += len(y_true)

print("TensorRT FP16 accuracy:", correct / total)


TensorRT FP16 accuracy: 0.9817073170731707


In [4]:
import numpy as np
from pathlib import Path

LABEL_DIR = Path("../data/CALIB")
PRED_DIR = Path("../data/TRT_OUT_INT8")

correct = 0
total = 0

for label_file in sorted(LABEL_DIR.glob("test_labels_*.npy")):
    idx = label_file.stem.split("_")[-1]

    y_true = np.load(label_file)
    y_pred = np.load(PRED_DIR / f"trt_preds_{idx}.npy")

    pred = y_pred.argmax(axis=1)
    pred = pred[:len(y_true)] 
    correct += (pred == y_true).sum()
    total += len(y_true)

print("TensorRT INT8 accuracy:", correct / total)


TensorRT INT8 accuracy: 0.6439024390243903
