In [2]:
import numpy as np
from tqdm import tqdm

import torch
from sgg_benchmark.config import cfg
from sgg_benchmark.modeling.detector import build_detection_model
from sgg_benchmark.utils.checkpoint import DetectronCheckpointer
from sgg_benchmark.data import make_data_loader
from sgg_benchmark.structures.image_list import to_image_list

def latency_bench(config_file):
    cfg.merge_from_file(config_file)
    cfg.MODEL.BACKBONE.NMS_THRESH = 0.001
    cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG = 80
    cfg.TEST.IMS_PER_BATCH = 1
    # cfg.freeze()

    # build dataloader
    val_data_loader = make_data_loader(
        cfg,
        mode='val',
        is_distributed=False,
    )
    val_data_loader = val_data_loader[0]

    cfg.TEST.CUSTUM_EVAL = True

    model = build_detection_model(cfg)
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.OUTPUT_DIR)
    last_check = checkpointer.get_checkpoint_file()
    if last_check == "":
        last_check = cfg.MODEL.WEIGHT
    print("Loading last checkpoint from {}...".format(last_check))
    _ = checkpointer.load(last_check)

    model.to(cfg.MODEL.DEVICE)
    model.roi_heads.eval()
    model.backbone.eval()

    # INIT LOGGERS
    starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
    repetitions = 100
    timings = np.zeros((repetitions,1))
    timings_relation_head = np.zeros((repetitions,1))
    input_img, _, _ = next(iter(val_data_loader))
    input_img = input_img.to(cfg.MODEL.DEVICE)
    # onnx_program = torch.onnx.dynamo_export(model, (input_img,None))
    # onnx_program.save("my_image_classifier.onnx")

    # Export the model
    torch.onnx.export(model,               # model being run
                    (input_img,None),                         # model input (or a tuple for multiple inputs)
                    "my_model.onnx",   # where to save the model (can be a file or file-like object)
                    export_params=True,        # store the trained parameter weights inside the model file
                    opset_version=17,          # the ONNX version to export the model to
                    do_constant_folding=True,  # whether to execute constant folding for optimization
                    input_names = ['input'],   # the model's input names
                    output_names = ['output'], # the model's output names
                    )
    #GPU-WARM-UP
    for _ in tqdm(range(10)):
        _ = model(input_img, None)
    # MEASURE PERFORMANCE
    with torch.no_grad():
        for rep, (input_img, _, _) in enumerate(tqdm(val_data_loader)):
            if rep == repetitions:
                break
            input_img = input_img.to(cfg.MODEL.DEVICE)
            images = to_image_list(input_img)
            starter.record()
            outputs, features = model.backbone(images.tensors, embed=True)
            proposals = model.backbone.postprocess(outputs, images.image_sizes)
            ender.record()
            # WAIT FOR GPU SYNC
            torch.cuda.synchronize()
            curr_time = starter.elapsed_time(ender)
            timings[rep] = curr_time

            starter.record()
            _, _, _ = model.roi_heads(features, proposals, None, None, proposals)
            ender.record()
            # WAIT FOR GPU SYNC
            torch.cuda.synchronize()
            curr_time = starter.elapsed_time(ender)
            timings_relation_head[rep] = curr_time

    mean_syn = np.sum(timings) / repetitions
    mean_syn_relation_head = np.sum(timings_relation_head) / repetitions
    std_syn = np.std(timings)
    print("Average time backbone: {} ms".format(mean_syn))
    print("Average time relation head: {} ms".format(mean_syn_relation_head))
    print("Full network latency: {} ms".format(mean_syn + mean_syn_relation_head))
    print("Standard deviation: {} ms".format(std_syn))

    # print total number of params
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Total number of parameters: {total_params}")

conf = "/home/maelic/Documents/PhD/MyModel/SGG-Benchmark/checkpoints/IndoorVG4/SGDET/penet-yolov8m/config.yml"

latency_bench(conf)

Overriding model.yaml nc=80 with nc=84

                   from  n    params  module                                       arguments                     
  0                  -1  1      1392  ultralytics.nn.modules.conv.Conv             [3, 48, 3, 2]                 
  1                  -1  1     41664  ultralytics.nn.modules.conv.Conv             [48, 96, 3, 2]                
  2                  -1  2    111360  ultralytics.nn.modules.block.C2f             [96, 96, 2, True]             
  3                  -1  1    166272  ultralytics.nn.modules.conv.Conv             [96, 192, 3, 2]               
  4                  -1  4    813312  ultralytics.nn.modules.block.C2f             [192, 192, 4, True]           
  5                  -1  1    664320  ultralytics.nn.modules.conv.Conv             [192, 384, 3, 2]              
  6                  -1  4   3248640  ultralytics.nn.modules.block.C2f             [384, 384, 4, True]           
  7                  -1  1   1991808  ultralytic

2024-05-02 17:51:15.841 | INFO     | sgg_benchmark.data.build:get_dataset_statistics:30 - ----------------------------------------------------------------------------------------------------
2024-05-02 17:51:15.841 | INFO     | sgg_benchmark.data.build:get_dataset_statistics:31 - get dataset statistics...
2024-05-02 17:51:15.842 | INFO     | sgg_benchmark.data.build:get_dataset_statistics:42 - Loading data statistics from: ./checkpoints/IndoorVG4/SGDET/penet-yolov8m/VG_indoor_filtered_train_statistics.cache
2024-05-02 17:51:15.842 | INFO     | sgg_benchmark.data.build:get_dataset_statistics:43 - ----------------------------------------------------------------------------------------------------
2024-05-02 17:51:16.133 | INFO     | sgg_benchmark.data.build:get_dataset_statistics:30 - ----------------------------------------------------------------------------------------------------
2024-05-02 17:51:16.133 | INFO     | sgg_benchmark.data.build:get_dataset_statistics:31 - get dataset sta

loading word vectors from /home/maelic/glove/glove.6B.200d.pt
loading word vectors from /home/maelic/glove/glove.6B.200d.pt


2024-05-02 17:51:17.246 | INFO     | sgg_benchmark.utils.checkpoint:load:65 - Loading checkpoint from ./checkpoints/IndoorVG4/SGDET/penet-yolov8m/best_model_epoch_8.pth


Loading last checkpoint from ./checkpoints/IndoorVG4/SGDET/penet-yolov8m/best_model_epoch_8.pth...


RuntimeError: Only tuples, lists and Variables are supported as JIT inputs/outputs. Dictionaries and strings are also accepted, but their usage is not recommended. Here, received an input of unsupported type: ImageList