In [None]:
# Script from https://deci.ai/blog/measure-inference-time-deep-neural-networks/

In [1]:
import torch

from model.complex.convnext_regressor import ConvNextRegressor

# Build model
num_layers = 5.125  # 5.125 for small, 6 for large, 7.334 for huge
device = torch.device("cuda")

model = ConvNextRegressor(
    pretrained=True,
    num_layers=num_layers,
    output_config={
        "type": "dual_dense",
        "out_channels": 1,
        "activation": "clamp",
        "feature_map": False,
        "max_distance_mask_value": 128,
        "labels": True,
        "labels_larger_threshold": True
    },
    predicted_entity="distance_mask",
    patch_size=1024,
    bayer_array_input=False,
    freeze_encoder=False
).to(device).eval()

  Referenced from: <C0CD941A-7290-3098-8109-E3A1BBA30841> /Users/gwizdala/opt/anaconda3/envs/dl2/lib/python3.8/site-packages/torchvision/image.so
  warn(


In [None]:
# Construct random test input
input = torch.randn((1, 3, 3000, 4096), dtype=torch.float).to(device)

# Timers
starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
repetitions = 1000
warm_up_rounds = 10
timings = torch.zeros((repetitions, 1))

In [None]:
with torch.autocast(device_type="cuda", dtype=torch.float16, enabled=True):
    # GPU warm-up
    for _ in range(warm_up_rounds):
        _ = model(input)

    # Measure inference time
    with torch.no_grad():
        for rep in range(repetitions):
            starter.record()
            _ = model(input)
            ender.record()

            # Synchronize GPU
            torch.cuda.synchronize()
            elapsed_time = starter.elapsed_time(ender)
            timings[rep] = elapsed_time

In [None]:
# Compute inference time
mean_syn = timings.mean()
std_syn = timings.std()
print(f"Inference speed: {mean_syn:.3f} +- {std_syn:.3f}")