In [1]:
import numpy as np
import json
from collections import OrderedDict
import xml.etree.ElementTree as ET


def get_annotation(image_id):
    annotation_file = "/home/taeho/data/coco2014/annotations/annotations/instances_val2014.json"
        
    with open(annotation_file, encoding="utf-8") as data_file:
        data = json.load(data_file, object_pairs_hook=OrderedDict)

    #pprint(data) #data는 json 전체를 dictionary 형태로 저장하고 있음
        
    boxes = []
    labels = []
    is_difficult = []
    image_number = int(image_id[-6:])
    
    for index, name in enumerate(data["images"]):
        if name["id"] == image_number:
            image_height = name["height"]
            image_width = name["width"]
    
    class_names = ('BACKGROUND', 'person', 'bicycle', 'car', 'motorcycle',
                            'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
                            'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
                            'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
                            'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
                            'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
                            'kite', 'baseball bat', 'baseball glove', 'skateboard',
                            'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
                            'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
                            'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                            'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table',
                            'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
                            'cell phone', 'microwave', 'oven', 'toaster', 'sink',
                            'refrigerator', 'book', 'clock', 'vase', 'scissors',
                            'teddy bear', 'hair drier', 'toothbrush'
                           )
    class_dict = {class_name: i for i, class_name in enumerate(class_names)}
    #print(class_dict)
    #print(class_dict['clock'])
    for index, name in enumerate(data["annotations"]):
        category_id = name["category_id"]
        bbox = name["bbox"]

        if name["image_id"] == image_number:
            for index, name in enumerate(data["categories"]):

                if name["id"] == category_id:
                    #print(name["name"])
                    #print(bbox)
                    #print(category_id)
                    xmin = round(bbox[0])
                    if xmin == 0:
                        xmin += 1
                    ymin = round(bbox[1])
                    if ymin == 0:
                        ymin += 1
                    xmax = round(bbox[0] + bbox[2])
                    if xmax == image_width:
                        xmax -= 1
                    ymax = round(bbox[1] + bbox[3])
                    if ymax == image_height:
                        ymax -= 1
                    bbox = [xmin, ymin, xmax, ymax]
                    
                    boxes.append(bbox)
                    is_difficult.append(0)
                    labels.append(class_dict[name["name"]])
    
    a = np.array(boxes, dtype=np.float32)
    b = np.array(labels, dtype=np.int64)
    c = np.array(is_difficult, dtype=np.uint8)
    if a.size==0 or b.size==0 or c.size==0:
        a = np.array([[0,0,0,0]], dtype=np.float32)
        b = np.array([0], dtype=np.int64)
        c = np.array([0], dtype=np.uint8)
        return (a,b,c)
    else:
        pass
    return (a,b,c)

print(get_annotation("COCO_val2014_000000052759"), "\n")
print(get_annotation("COCO_val2014_000000289393"), "\n")
print(get_annotation("COCO_val2014_000000568623"), "\n")
print(get_annotation("COCO_val2014_000000128939"), "\n")
print(get_annotation("COCO_val2014_000000339974"), "\n")
print(get_annotation("COCO_val2014_000000561100"), "\n")
print(get_annotation("COCO_val2014_000000150410"), "\n")
print(get_annotation("COCO_val2014_000000009426"), "\n")
print(get_annotation("COCO_val2014_000000014869"), "\n")
print(get_annotation("COCO_val2014_000000522713"), "\n")#10
print(get_annotation("COCO_val2014_000000281221"), "\n")
print(get_annotation("COCO_val2014_000000108169"), "\n")#12
print(get_annotation("COCO_val2014_000000440575"), "\n")

print("NEU")
def get_annotation2(image_id):
    annotation_file = f"/home/taeho/다운로드/NEU-DET/ANNOTATIONS/{image_id}.xml"
    objects = ET.parse(annotation_file).findall("object")
    boxes = []
    labels = []
    is_difficult = []
    
    class_names = ('BACKGROUND', 'crazing',
            'inclusion', 'patches', 'pitted_surface',
            'rolled-in_scale', 'scratches'
        )
    class_dict = {class_name: i for i, class_name in enumerate(class_names)}
    #print(class_dict)
    
    for object in objects:
        class_name = object.find('name').text.lower().strip()
        bbox = object.find('bndbox')
        # VOC dataset format follows Matlab, in which indexes start from 0
        x1 = float(bbox.find('xmin').text) - 1
        y1 = float(bbox.find('ymin').text) - 1
        x2 = float(bbox.find('xmax').text) - 1
        y2 = float(bbox.find('ymax').text) - 1
        boxes.append([x1, y1, x2, y2]) #[xmin, ymin, xmax, ymax]
        labels.append(class_dict[class_name])
        is_difficult_str = object.find('difficult').text
        is_difficult.append(int(is_difficult_str) if is_difficult_str else 0)

    return (np.array(boxes, dtype=np.float32),
            np.array(labels, dtype=np.int64),
            np.array(is_difficult, dtype=np.uint8))

print(get_annotation2("crazing_100"))

(array([[105.,  44., 473., 220.],
       [ 48.,  40., 204.,  94.],
       [307., 215., 335., 272.],
       [129., 117., 137., 132.],
       [418.,  84., 470., 100.],
       [301., 217., 389., 294.],
       [ 35.,  45.,  57.,  80.],
       [212., 182., 275., 254.],
       [217.,  82., 221.,  86.],
       [  1.,  77.,  16.,  98.],
       [211.,  56., 223.,  68.]], dtype=float32), array([5, 5, 1, 1, 8, 8, 5, 8, 1, 8, 5]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)) 

(array([[523., 333., 639., 474.],
       [ 33.,  86., 275., 235.],
       [ 81., 201., 356., 412.],
       [  1.,  20.,  39., 317.]], dtype=float32), array([15, 24, 20, 59]), array([0, 0, 0, 0], dtype=uint8)) 

(array([[  1.,  33., 209., 210.],
       [439.,  88., 637., 379.],
       [141.,  81., 191., 178.],
       [ 94.,  95., 137., 234.],
       [325., 110., 396., 144.],
       [ 69.,  28.,  81.,  46.],
       [ 53.,  24.,  64.,  44.],
       [340.,  71., 351.,  93.],
       [414.,  54., 422.,  60.],
       [377