In [5]:
import json
import numpy as np

class_map = ['button', 'input', 'select', 'search', 'list', 'img', 'block', 'text', 'icon']


def read_detect_result(file_name):
    '''
    :return: {list of [[col_min, row_min, col_max, row_max]], list of [class id]
    '''
    file = open(file_name, 'r')
    bboxes = []
    categories = []
    for l in file.readlines():
        labels = l.split()[1:]
        for label in labels:
            label = label.split(',')
            bboxes.append([int(b) for b in label[:-1]])
            categories.append(class_map[int(label[-1])])

    return {file_name.split('_')[0]: {'bboxes':bboxes, 'categories':categories}}


def read_ground_truth():
    def get_img_by_id(img_id):
        for image in images:
            if image['id'] == img_id:
                return image['file_name'].split('/')[-1][:-4], (image['height'], image['width'])

    def cvt_bbox(bbox):
        '''
        :param bbox: [x,y,width,height]
        :return: [col_min, row_min, col_max, row_max]
        '''
        bbox = [int(b) for b in bbox]
        return [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]

    data = json.load(open('instances_test.json', 'r'))

    images = data['images']
    annots = data['annotations']

    compos = {}
    for annot in annots:
        img_name, size = get_img_by_id(annot['image_id'])
        if img_name not in compos:
            compos[img_name] = {'bboxes': [annot['bbox']], 'categories': [annot['category_id']], 'size':size}
        else:
            compos[img_name]['bboxes'].append(cvt_bbox(annot['bbox']))
            compos[img_name]['categories'].append(annot['category_id'])
    return compos


def match(d_bbox, gt_bboxes, matched):
    '''
    :param matched: mark if the ground truth component is matched
    :param d_bbox: [col_min, row_min, col_max, row_max]
    :param gt_bboxes: list of ground truth [[col_min, row_min, col_max, row_max]]
    :return: Boolean: if IOU large enough or detected box is contained by ground truth
    '''
    area_d = (d_bbox[2] - d_bbox[0]) * (d_bbox[3] - d_bbox[1])
    for i, gt_bbox in enumerate(gt_bboxes):
        if matched[i] == 0:
            continue
        area_gt = (gt_bbox[2] - gt_bbox[0]) * (gt_bbox[3] - gt_bbox[0])
        col_min = max(d_bbox[0], gt_bbox[0])
        row_min = max(d_bbox[1], gt_bbox[1])
        col_max = min(d_bbox[2], gt_bbox[2])
        row_max = min(d_bbox[3], gt_bbox[3])
        # if not intersected, w or h should be 0
        w = max(0, col_max - col_min)
        h = max(0, row_max - row_min)
        area_inter = w*h
        if area_inter == 0:
            continue

        iod = area_inter / area_d
        iou = area_inter / (area_d + area_gt)
        # the interaction is d itself, so d is contained in gt, considered as correct detection
        if iod == area_d and area_d / area_gt > 0.1:
            matched[i] = 0
            return True
        if iou > 0.5:
            return True
    return False


def eval(detection, ground_truth):
    TP, FP, FN = 0, 0, 0
    for image in detection:
        d_compo = detection[image]
        gt_compo = ground_truth[image]
        matched = np.ones(len(gt_compo), dtype=int)
        for d_bbox in d_compo['bboxes']:
            if match(d_bbox, gt_compo['bboxes'], matched):
                TP += 1
            else:
                FP += 1
        FN += sum(matched)

    print(TP, FP, FN)

In [6]:
gt = read_ground_truth()

In [7]:
gt

{'59347': {'bboxes': [[114.0, 1611.0, 852.0, 47.0],
   [21, 1679, 1059, 1773],
   [0, 63, 147, 210],
   [723, 73, 849, 199],
   [849, 73, 975, 199],
   [975, 73, 1080, 199]],
  'categories': [9, 4, 4, 4, 4, 4],
  'size': (1920, 1080)},
 '60202': {'bboxes': [[0.0, 63.0, 178.0, 147.0]],
  'categories': [4],
  'size': (1920, 1080)},
 '64797': {'bboxes': [[0.0, 0.0, 1080.0, 63.0],
   [0, 84, 84, 168],
   [677, 102, 727, 149],
   [996, 84, 1080, 168],
   [324, 803, 348, 834],
   [431, 803, 455, 834],
   [153, 1324, 205, 1377],
   [513, 1324, 565, 1377],
   [873, 1324, 926, 1377],
   [153, 1509, 205, 1561],
   [513, 1509, 565, 1561],
   [873, 1506, 926, 1559],
   [240, 1680, 298, 1738],
   [780, 1680, 838, 1738]],
  'categories': [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
  'size': (1920, 1080)},
 '17284': {'bboxes': [[954.0, 94.0, 84.0, 84.0], [0, 63, 147, 210]],
  'categories': [5, 4],
  'size': (1920, 1080)},
 '3418': {'bboxes': [[60.0, 1646.0, 95.0, 94.0],
   [276, 1646, 371, 1740],
   

In [8]:
data = json.load(open('instances_test.json', 'r'))

In [10]:
data['categories']

[{'id': 0, 'name': 'Button', 'supercategory': 'Button'},
 {'id': 1, 'name': 'CheckBox', 'supercategory': 'CheckBox'},
 {'id': 2, 'name': 'Chronometer', 'supercategory': 'Chronometer'},
 {'id': 3, 'name': 'EditText', 'supercategory': 'EditText'},
 {'id': 4, 'name': 'ImageButton', 'supercategory': 'ImageButton'},
 {'id': 5, 'name': 'ImageView', 'supercategory': 'ImageView'},
 {'id': 6, 'name': 'ProgressBar', 'supercategory': 'ProgressBar'},
 {'id': 7, 'name': 'RadioButton', 'supercategory': 'RadioButton'},
 {'id': 8, 'name': 'RatingBar', 'supercategory': 'RatingBar'},
 {'id': 9, 'name': 'SeekBar', 'supercategory': 'SeekBar'},
 {'id': 10, 'name': 'Spinner', 'supercategory': 'Spinner'},
 {'id': 11, 'name': 'Switch', 'supercategory': 'Switch'},
 {'id': 12, 'name': 'ToggleButton', 'supercategory': 'ToggleButton'},
 {'id': 13, 'name': 'VideoView', 'supercategory': 'VideoView'}]