In [1]:
import argparse
import os
import time
import torch
import yolo

    
def main(args):
    device = torch.device("cuda" if torch.cuda.is_available() and args.use_cuda else "cpu")
    cuda = device.type == "cuda"
    if cuda: yolo.get_gpu_prop(show=True)
    print("\ndevice: {}".format(device))
    
    args.amp = False
    if cuda and torch.__version__ >= "1.6.0":
        capability = torch.cuda.get_device_capability()[0]
        if capability >= 7: # 7 refers to RTX series GPUs
            args.amp = True
            print("Automatic mixed precision (AMP) is enabled!")
            
    # ---------------------- prepare data loader ------------------------------- #
    
    DALI = cuda & yolo.DALI & (args.dataset == "coco")
    
    if DALI:
        print("Nvidia DALI is utilized!")
        d_test = yolo.DALICOCODataLoader(
            args.file_root, args.ann_file, args.batch_size, collate_fn=yolo.collate_wrapper)
    else:
        dataset_test = yolo.datasets(args.dataset, args.file_root, args.ann_file, train=True) # set train=True for eval
        sampler_test = torch.utils.data.SequentialSampler(dataset_test)

        batch_sampler_test = yolo.GroupedBatchSampler(
            sampler_test, dataset_test.aspect_ratios, args.batch_size)
        
        args.num_workers = min(os.cpu_count() // 2, 8, args.batch_size if args.batch_size > 1 else 0)
        data_loader_test = torch.utils.data.DataLoader(
            dataset_test, batch_sampler=batch_sampler_test, num_workers=args.num_workers,  
            collate_fn=yolo.collate_wrapper, pin_memory=cuda)

        d_test = yolo.DataPrefetcher(data_loader_test) if cuda else data_loader_test
    
    # -------------------------------------------------------------------------- #

    yolo.setup_seed(3)
    
    model_sizes = {"small": (0.33, 0.5), "medium": (0.67, 0.75), "large": (1, 1), "extreme": (1.33, 1.25)}
    num_classes = len(d_test.dataset.classes)
    model = yolo.YOLOv5(num_classes, model_sizes[args.model_size], **args.kwargs).to(device)
    model.head.eval_with_loss = args.eval_with_loss
    
    checkpoint = torch.load(args.ckpt_path, map_location=device)
    if "ema" in checkpoint:
        model.load_state_dict(checkpoint["ema"][0])
        print(checkpoint["eval_info"])
    else:
        model.load_state_dict(checkpoint)

    model.fuse()
    print("evaluating...")
    B = time.time()
    eval_output, iter_eval = yolo.evaluate(model, d_test, device, args, evaluation=args.evaluation)
    B = time.time() - B
    print(eval_output)
    print("\ntotal time of this evaluation: {:.2f} s, speed: {:.2f} FPS".format(B, args.batch_size / iter_eval))
    
    
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    args = parser.parse_args([]) # [] is needed when using Jupyter Notebook.
    
    args.use_cuda = True
    
    args.dataset = "coco"
    args.file_root = "data/coco2017/val2017"
    args.ann_file = "data/coco2017/annotations/instances_val2017.json"
    args.ckpt_path = "yolov5s_official_2cf45318.pth"
    args.results = os.path.join(os.path.dirname(args.ckpt_path), "results.json")
    
    args.batch_size = 32
    args.iters = -1
    
    args.model_size = "small"
    args.kwargs = {"img_sizes": 640, "score_thresh": 0.1, "detections": 100} # mAP 34.6 FPS 451
    #args.kwargs = {"img_sizes": 672, "score_thresh": 0.001, "detections": 300} # mAP 36.1. take more(2x-4x) time in total
    args.evaluation = True
    args.eval_with_loss = False
    
    main(args)
    
    

cuda: True
available GPU(s): 1
0: {'name': 'NVIDIA GeForce RTX 3060 Ti', 'capability': [8, 6], 'total_momory': 8.0, 'sm_count': 38}

device: cuda
Automatic mixed precision (AMP) is enabled!


ModuleNotFoundError: No module named 'pycocotools._mask'