In [None]:
# Script from https://deci.ai/blog/measure-inference-time-deep-neural-networks/

In [1]:
import torch

import models
from utils.criterion import OhemCrossEntropy, BoundaryLoss
from utils.utils import FullModel
from configs import config

# Build model
num_layers = 5.125  # 5.125 for small, 6 for large, 7.334 for huge
device = torch.device("cuda")

pidnet = models.pidnet.get_seg_model(config)  # creates a pretrained model by default
semantic_seg_criterion = OhemCrossEntropy(
    ignore_label=255,
    thres=0.9,
    min_kept=int(0.125 * (1024 ** 2)),
    weight=torch.FloatTensor([1.0186, 54.7257])
)
bd_criterion = BoundaryLoss()
model = FullModel(pidnet, semantic_seg_criterion, bd_criterion).model.cuda().eval()

  Referenced from: <C0CD941A-7290-3098-8109-E3A1BBA30841> /Users/gwizdala/opt/anaconda3/envs/dl2/lib/python3.8/site-packages/torchvision/image.so
  warn(


In [None]:
# Construct random test input
input = torch.randn((1, 3, 3000, 4096), dtype=torch.float).to(device)

# Timers
starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
repetitions = 1000
warm_up_rounds = 10
timings = torch.zeros((repetitions, 1))

In [None]:
with torch.autocast(device_type="cuda", dtype=torch.float16, enabled=True):
    # GPU warm-up
    for _ in range(warm_up_rounds):
        _ = model(input)

    # Measure inference time
    with torch.no_grad():
        for rep in range(repetitions):
            starter.record()
            _ = model(input)
            ender.record()

            # Synchronize GPU
            torch.cuda.synchronize()
            elapsed_time = starter.elapsed_time(ender)
            timings[rep] = elapsed_time

In [None]:
# Compute inference time
mean_syn = timings.mean()
std_syn = timings.std()
print(f"Inference speed: {mean_syn:.3f} +- {std_syn:.3f}")