In [1]:
import torch
import torchvision
import matplotlib.pyplot as plt

from torchvision.models.detection import retinanet_resnet50_fpn_v2
from torchvision.transforms import functional as F
from torchvision.datasets  import CocoDetection
from torch.utils.data import DataLoader


In [2]:
model = retinanet_resnet50_fpn_v2(pretrained=True)
print('Number parameters: {0:,}'.format(sum(p.numel() for p in model.parameters())))
model.eval()



Number parameters: 38,198,935


RetinaNet(
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      

In [3]:
categories = [
    '__background__',
    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
    'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
    'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
    'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
    'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
    'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
    'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
    'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

In [4]:
dataset_path = 'val2017'
annotations_path = 'annotations_trainval2017/annotations/instances_val2017.json'

def transform(image, target):
    image = F.resize(image, [256, 256])
    if 'boxes' in target:
        boxes = target['boxes']
        scale_factor = torch.tensor([256 / image.size[1], 256 / image.size[0],
                                     256 / image.size[1], 256 / image.size[0]])
        boxes = boxes*scale_factor
        target['boxes'] = boxes
    image = F.to_tensor(image)
    return image, target

dataset = CocoDetection(root=dataset_path, annFile=annotations_path,
                        transforms=transform)
data_loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True, num_workers=2)

loading annotations into memory...
Done (t=0.54s)
creating index...
index created!


In [5]:
from torchvision.ops import nms

def evaluate(model, data_loader):
    model.eval()
    with torch.no_grad():
        for images, targets in data_loader:
            images = list(image.to(device) for image in images)
            # targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            outputs = model(images)
            boxes = outputs[0]['boxes']
            labels = outputs[0]['labels']
            scores = outputs[0]['scores']

            indices = nms(boxes, scores, iou_threshold=0.5)
            selected_boxes = boxes[indices]
            selected_labels = labels[indices]
            selected_scores = scores[indices]

            return selected_boxes, selected_labels, selected_scores

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
evaluate(model, data_loader)

(tensor([[  5.0359,  19.4487, 140.5931, 253.1077],
         [ 37.3300, 107.7694,  76.8065, 249.5420],
         [122.6981, 129.7103, 242.2197, 248.1404],
         [ 36.3553,  88.8828, 107.6137, 252.0932],
         [115.9403, 196.4411, 242.0595, 253.2674],
         [ 48.4634,  24.2834, 234.7047, 249.4842],
         [ 93.0840, 136.7837, 140.3878, 253.0228],
         [  0.0000, 148.5559, 228.3966, 256.0000],
         [118.2290, 180.5661, 144.5681, 242.0710],
         [ 39.5941, 184.7172,  63.1983, 254.2012],
         [ 25.1460, 148.4094,  94.7674, 253.0098],
         [ 62.0749, 127.2452, 132.5699, 253.3212],
         [137.4582, 122.8106, 143.6246, 156.4497],
         [104.4709, 141.3264, 166.0285, 248.7953],
         [144.2860, 118.9004, 149.5965, 154.7631],
         [141.0437, 120.5501, 145.2446, 155.6102],
         [  9.9006,  97.6681,  85.8156, 254.7923],
         [141.9387, 130.9515, 241.8840, 196.3711],
         [171.4481, 156.2611, 242.2077, 251.8884]]),
 tensor([ 1, 32, 63, 32, 63, 

In [6]:
from pycocotools.cocoeval import COCOeval
import json

# Chuyển đổi kết quả dự đoán của bạn thành định dạng COCO
coco_results = []
for i, (images, targets) in enumerate(data_loader):
    images = list(image.to(device) for image in images)
    outputs = model(images)

    for j, output in enumerate(outputs):
        try:

            image_id = targets[j]["image_id"].item()
            boxes = output["boxes"].detach().numpy()
            scores = output["scores"].detach().numpy()
            labels = output["labels"].detach().numpy()
        except Exception as e:
            print(f"Error on image {i+1}, {e}")
            print(targets)
            break

        for box, score, label in zip(boxes, scores, labels):
            x_min, y_min, x_max, y_max = box
            width = x_max - x_min
            height = y_max - y_min

            coco_results.append({
                "image_id": image_id,
                "category_id": label,
                "bbox": [x_min, y_min, width, height],
                "score": score
            })

# Lưu kết quả dưới dạng file JSON
with open("coco_results.json", "w") as f:
    json.dump(coco_results, f)

# Sử dụng COCOeval để tính toán mAP
coco_gt = dataset.coco
coco_dt = coco_gt.loadRes("coco_results.json")
coco_eval = COCOeval(coco_gt, coco_dt, "bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()


Error on image 149, list index out of range
[]
Error on image 503, list index out of range
[]
Error on image 542, list index out of range
[]


KeyboardInterrupt: 