In [1]:
# Script from https://deci.ai/blog/measure-inference-time-deep-neural-networks/

In [6]:
import torch
from tqdm import tqdm

import models
from utils.criterion import OhemCrossEntropy, BoundaryLoss
from utils.utils import FullModel
from configs import config

# Build model
num_layers = 5.125  # 5.125 for small, 6 for large, 7.334 for huge
device = torch.device("cuda")

pidnet = models.pidnet.get_seg_model(config)  # creates a pretrained model by default
semantic_seg_criterion = OhemCrossEntropy(
    ignore_label=255,
    thres=0.9,
    min_kept=int(0.125 * (1024 ** 2)),
    weight=torch.FloatTensor([1.0186, 54.7257])
)
bd_criterion = BoundaryLoss()
model = FullModel(pidnet, semantic_seg_criterion, bd_criterion).model.cuda().eval()

Loading pretrained model from pretrained_models/imagenet/PIDNet_S_ImageNet.pth.tar
> loaded 302 parameters


In [7]:
# Construct random test input
input = torch.randn((1, 3, 3000, 4096), dtype=torch.float).to(device)

# Timers
starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
repetitions = 1000
warm_up_rounds = 10
timings = torch.zeros((repetitions, 1))

In [8]:
with torch.autocast(device_type="cuda", dtype=torch.float16, enabled=True):
    # GPU warm-up
    for _ in tqdm(range(warm_up_rounds), desc="Warm-up"):
        _ = model(input)

    # Measure inference time
    with torch.no_grad():
        for rep in tqdm(range(repetitions), desc="Inference"):
            starter.record()
            _ = model(input)
            ender.record()

            # Synchronize GPU
            torch.cuda.synchronize()
            elapsed_time = starter.elapsed_time(ender)
            timings[rep] = elapsed_time

Warm-up: 100%|██████████| 10/10 [00:00<00:00, 28.36it/s]
Inference: 100%|██████████| 1000/1000 [00:45<00:00, 21.94it/s]


In [9]:
# Compute inference time
mean_syn = timings.mean()
std_syn = timings.std()
print(f"Inference speed: {mean_syn:.3f} +- {std_syn:.3f}")

Inference speed: 45.103 +- 0.050
