## 1. Import packages

In [1]:
import glob
import time
from PIL import Image
import torch
from tqdm import tqdm
from ultralytics import YOLO
import sys
sys.path.append("..")
from app.utils.process import preprocess_image
from torchvision import transforms


## 2. Config

In [2]:
ENGINE_PATH = "/home/mlops/Repository/aio2025-onnx-tensorrt/models/yolo26l_b8.engine"
IMAGE_GLOB = "/home/mlops/Repository/aio2025-onnx-tensorrt/images/val2014/*.jpg"
IMG_SIZE = 640
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
WARMUP = 20
BATCH_SIZE = 8
SAMPLE_SIZE = 100 * BATCH_SIZE
transform = transforms.ToTensor()

In [3]:
# Load images
image_paths = sorted(glob.glob(IMAGE_GLOB))
assert len(image_paths) > 0, "No images found"
if SAMPLE_SIZE != -1:
    image_paths = image_paths[:SAMPLE_SIZE]

## 3. Model

In [4]:
# Load model
model = YOLO(ENGINE_PATH, task='detect')

# Warmup
dummy = torch.zeros((BATCH_SIZE, 3, IMG_SIZE, IMG_SIZE), device=DEVICE)
for _ in range(WARMUP):
    rs = model(dummy, verbose=False)

Loading /home/mlops/Repository/aio2025-onnx-tensorrt/models/yolo26l_b8.engine for TensorRT inference...
[01/16/2026-19:12:16] [TRT] [I] Loaded engine size: 100 MiB
[01/16/2026-19:12:16] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +1, GPU +593, now: CPU 1, GPU 691 (MiB)


## 5. Test loop

In [5]:
batch_infers = []

for i in range(0, len(image_paths), BATCH_SIZE):
    batch_paths = image_paths[i:i+BATCH_SIZE]
    processed_images = [transform(preprocess_image(Image.open(p), IMG_SIZE)[0]) for p in batch_paths]
    tensor_batch = torch.stack(processed_images).to(DEVICE)
    batch_infers.append(tensor_batch)

In [6]:
# FPS test
start = time.perf_counter()

for tensor_batch in tqdm(batch_infers):
    _ = model(tensor_batch, verbose=False)
end = time.perf_counter()

total_time = end - start
fps = SAMPLE_SIZE / total_time

100%|██████████| 100/100 [00:09<00:00, 11.10it/s]


In [7]:
print("=" * 40)
print(f"Device           : {DEVICE}")
print(f"Images processed : {SAMPLE_SIZE}")
print(f"Total time       : {total_time:.2f} s")
print(f"FPS              : {fps:.2f}")
print("=" * 40)

Device           : cuda
Images processed : 800
Total time       : 9.01 s
FPS              : 88.78
