In [None]:
import numpy as np
import cv2
from tqdm import tqdm

import torch
from sgg_benchmark.config import cfg
from sgg_benchmark.modeling.detector import build_detection_model

def latency_bench(config_file, task):
    cfg.merge_from_file(config_file)
    cfg.TEST.CUSTUM_EVAL = "true"
    cfg.freeze()

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)
    model.eval()

    # random cv2 image
    img = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    im = np.array(img)
    im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1])  # HWC to CHW -> BGR to RGB -> contiguous
    im = torch.from_numpy(im)  # to torch
    dummy_input = im.float()  
    dummy_input /= 255.0  
    # to cuda
    dummy_input = dummy_input.to(cfg.MODEL.DEVICE)

    # INIT LOGGERS
    starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
    repetitions = 100
    timings=np.zeros((repetitions,1))
    #GPU-WARM-UP
    for _ in tqdm(range(10)):
        _ = model([dummy_input], None)
    # MEASURE PERFORMANCE
    with torch.no_grad():
        for rep in tqdm(range(repetitions)):
            starter.record()
            _ = model([dummy_input], None)
            ender.record()
            # WAIT FOR GPU SYNC
            torch.cuda.synchronize()
            curr_time = starter.elapsed_time(ender)
            timings[rep] = curr_time

    mean_syn = np.sum(timings) / repetitions
    std_syn = np.std(timings)
    print(mean_syn)

    return mean_syn

conf = "/home/maelic/SGG-Benchmark/checkpoints/IndoorVG4/SGDET/penet-yolov8m/config.yml"

latency_bench(conf, "sgdet")