In [1]:
import json
import numpy as np
import cv2
from glob import glob
from os.path import join as pjoin
from tqdm import tqdm
import os

class_map = {'0':'Button', '1':'CheckBox', '2':'Chronometer', '3':'EditText', '4':'ImageButton', '5':'ImageView',
               '6':'ProgressBar', '7':'RadioButton', '8':'RatingBar', '9':'SeekBar', '10':'Spinner', '11':'Switch',
               '12':'ToggleButton', '13':'VideoView', '14':'TextView'}

In [2]:
def clipping(org, bbox, write_path=None, show=False, padding=(30, 20)):
    (column_min, row_min, column_max, row_max) = bbox
    column_min = max(column_min - padding[0], 0)
    column_max = min(column_max + padding[0], org.shape[1])
    row_min = max(row_min - padding[1], 0)
    row_max = min(row_max + padding[1], org.shape[0])
    clip = org[row_min:row_max, column_min:column_max]
    if show:
        cv2.imshow('clipping', clip)
        cv2.waitKey()
    if write_path is not None:
        cv2.imwrite(write_path, clip)
    return clip


def resize_label(bboxes, d_height, gt_height, bias=0):
    bboxes_new = []
    scale = gt_height / d_height
    for bbox in bboxes:
        bbox = [int(b * scale + bias) for b in bbox]
        bboxes_new.append(bbox)
    return bboxes_new


def draw_bounding_box(org, corners, color=(0, 255, 0), line=2, show=False):
    board = org.copy()
    for i in range(len(corners)):
        board = cv2.rectangle(board, (corners[i][0], corners[i][1]), (corners[i][2], corners[i][3]), color, line)
    if show:
        cv2.imshow('a', cv2.resize(board, (500, 1000)))
        cv2.waitKey(0)
    return board

In [3]:
def load_detect_result_json(reslut_file_root, shrink=0):
    def is_bottom_or_top(corner):
        column_min, row_min, column_max, row_max = corner
        if row_max < 36 or row_min > 725:
            return True
        return False

    result_files = glob(pjoin(reslut_file_root, '*.json'))
    compos_reform = {}
    print('Loading %d detection results' % len(result_files))
    for reslut_file in tqdm(result_files[:]):
        img_name = reslut_file.split('\\')[-1].split('.')[0]
        compos = json.load(open(reslut_file, 'r'))['compos']
        for compo in compos:
            if compo['column_max'] - compo['column_min'] < 10 or compo['row_max'] - compo['row_min'] < 10:
                continue
            if is_bottom_or_top((compo['column_min'], compo['row_min'], compo['column_max'], compo['row_max'])):
                continue
            if img_name not in compos_reform:
                compos_reform[img_name] = {'bboxes': [[compo['column_min'] + shrink, compo['row_min'] + shrink, compo['column_max'] - shrink, compo['row_max'] - shrink]],
                                           'categories': [compo['category']]}
            else:
                compos_reform[img_name]['bboxes'].append([compo['column_min'] + shrink, compo['row_min'] + shrink, compo['column_max'] - shrink, compo['row_max'] - shrink])
                compos_reform[img_name]['categories'].append(compo['category'])
    return compos_reform


def load_ground_truth_json(gt_file):
    def get_img_by_id(img_id):
        for image in images:
            if image['id'] == img_id:
                return image['file_name'].split('/')[-1][:-4], (image['height'], image['width'])

    def cvt_bbox(bbox):
        '''
        :param bbox: [x,y,width,height]
        :return: [col_min, row_min, col_max, row_max]
        '''
        bbox = [int(b) for b in bbox]
        return [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]

    data = json.load(open(gt_file, 'r'))
    images = data['images']
    annots = data['annotations']
    compos = {}
    print('Loading %d ground truth' % len(annots))
    for annot in tqdm(annots):
        img_name, size = get_img_by_id(annot['image_id'])
        if img_name not in compos:
            compos[img_name] = {'bboxes': [cvt_bbox(annot['bbox'])], 'categories': [class_map[str(annot['category_id'])]], 'size': size}
        else:
            compos[img_name]['bboxes'].append(cvt_bbox(annot['bbox']))
            compos[img_name]['categories'].append(class_map[str(annot['category_id'])])
    return compos

In [4]:
detect = load_detect_result_json('E:\\Mulong\\Result\\rico\\rico_uied\\rico_new_uied_cls\\merge')
gt = load_ground_truth_json('E:\\Mulong\\Datasets\\rico\\instances_test.json')

  0%|▏                                                                              | 11/4708 [00:00<00:45, 102.69it/s]

Loading 4708 detection results


100%|█████████████████████████████████████████████████████████████████████████████| 4708/4708 [00:05<00:00, 822.68it/s]
  8%|██████▏                                                                   | 7194/86646 [00:00<00:01, 71672.13it/s]

Loading 86646 ground truth


100%|██████████████████████████████████████████████████████████████████████████| 86646/86646 [00:12<00:00, 7104.79it/s]


In [5]:
def eval(detection, ground_truth, img_root, cnn, show=False):

    store_gt_categories = []
    store_det_clips = []

    def match(org, d_bbox, gt_compos, matched):
        '''
        :param matched: mark if the ground truth component is matched
        :param d_bbox: [col_min, row_min, col_max, row_max]
        :param gt_bboxes: list of ground truth [[col_min, row_min, col_max, row_max]]
        :return: Boolean: if IOU large enough or detected box is contained by ground truth
        '''
        area_d = (d_bbox[2] - d_bbox[0]) * (d_bbox[3] - d_bbox[1])
        gt_bboxes = gt_compos['bboxes']
        gt_categories = gt_compos['categories']
        for i, gt_bbox in enumerate(gt_bboxes):
            if matched[i] == 0:
                continue
            area_gt = (gt_bbox[2] - gt_bbox[0]) * (gt_bbox[3] - gt_bbox[1])
            col_min = max(d_bbox[0], gt_bbox[0])
            row_min = max(d_bbox[1], gt_bbox[1])
            col_max = min(d_bbox[2], gt_bbox[2])
            row_max = min(d_bbox[3], gt_bbox[3])
            # if not intersected, area intersection should be 0
            w = max(0, col_max - col_min)
            h = max(0, row_max - row_min)
            area_inter = w * h
            if area_inter == 0:
                continue
            iod = area_inter / area_d
            iou = area_inter / (area_d + area_gt - area_inter)
            if iou > 0.9 or iod == 1:
                matched[i] = 0
                store_gt_categories.append(gt_categories[i])
                store_det_clips.append(clipping(org, d_bbox, show=show))
                return True
        return False

    amount = len(detection)
    FP, FN = 0, 0
    for i, image_id in enumerate(detection):
        img = cv2.imread(pjoin(img_root, image_id + '.jpg'))
        d_compos = detection[image_id]
        if image_id not in ground_truth:
            continue
        gt_compos = ground_truth[image_id]
        org_height = gt_compos['size'][0]

        d_compos['bboxes'] = resize_label(d_compos['bboxes'], 800, org_height)
        matched = np.ones(len(gt_compos['bboxes']), dtype=int)
        for d_bbox in d_compos['bboxes']:
            if not match(img, d_bbox, gt_compos, matched):
                FP += 1
        FN += sum(matched)
        
        print("[%d/%d]" %(i, amount))
        if i > 1000:
            break

    return store_det_clips, store_gt_categories, (FP, FN)

In [7]:
from CNN import CNN
cnn = CNN('E:/Mulong/Model/rico_compos/cnn-rico-1.h5')

Using TensorFlow backend.


Model Loaded From E:/Mulong/Model/rico_compos/cnn-rico-1.h5


In [8]:
det_clips, gt_labels, (FP, FN) = eval(detect, gt, 'E:\\Mulong\\Datasets\\rico\\combined', cnn, show=False)

[0/4706]
[1/4706]
[2/4706]
[3/4706]
[4/4706]
[5/4706]
[6/4706]
[7/4706]
[8/4706]
[9/4706]
[10/4706]
[11/4706]
[12/4706]
[13/4706]
[14/4706]
[15/4706]
[16/4706]
[17/4706]
[18/4706]
[19/4706]
[20/4706]
[21/4706]
[22/4706]
[23/4706]
[24/4706]
[25/4706]
[26/4706]
[27/4706]
[28/4706]
[29/4706]
[30/4706]
[31/4706]
[32/4706]
[33/4706]
[34/4706]
[35/4706]
[36/4706]
[37/4706]
[38/4706]
[39/4706]
[40/4706]
[41/4706]
[42/4706]
[43/4706]
[44/4706]
[45/4706]
[46/4706]
[47/4706]
[48/4706]
[49/4706]
[50/4706]
[51/4706]
[52/4706]
[53/4706]
[54/4706]
[55/4706]
[56/4706]
[57/4706]
[58/4706]
[59/4706]
[60/4706]
[61/4706]
[62/4706]
[63/4706]
[64/4706]
[65/4706]
[66/4706]
[67/4706]
[68/4706]
[69/4706]
[70/4706]
[71/4706]
[72/4706]
[73/4706]
[74/4706]
[75/4706]
[76/4706]
[77/4706]
[78/4706]
[79/4706]
[80/4706]
[81/4706]
[82/4706]
[83/4706]
[84/4706]
[85/4706]
[86/4706]
[87/4706]
[88/4706]
[89/4706]
[90/4706]
[91/4706]
[92/4706]
[93/4706]
[94/4706]
[95/4706]
[96/4706]
[97/4706]
[98/4706]
[99/4706]
[100/4706]

[756/4706]
[757/4706]
[758/4706]
[759/4706]
[760/4706]
[761/4706]
[762/4706]
[763/4706]
[764/4706]
[765/4706]
[766/4706]
[767/4706]
[768/4706]
[769/4706]
[770/4706]
[771/4706]
[772/4706]
[773/4706]
[774/4706]
[775/4706]
[776/4706]
[777/4706]
[778/4706]
[779/4706]
[780/4706]
[781/4706]
[782/4706]
[783/4706]
[784/4706]
[785/4706]
[786/4706]
[787/4706]
[788/4706]
[789/4706]
[790/4706]
[791/4706]
[792/4706]
[793/4706]
[794/4706]
[795/4706]
[796/4706]
[797/4706]
[798/4706]
[799/4706]
[800/4706]
[801/4706]
[802/4706]
[803/4706]
[804/4706]
[805/4706]
[806/4706]
[807/4706]
[808/4706]
[809/4706]
[810/4706]
[811/4706]
[812/4706]
[813/4706]
[814/4706]
[815/4706]
[816/4706]
[817/4706]
[818/4706]
[819/4706]
[820/4706]
[821/4706]
[822/4706]
[823/4706]
[824/4706]
[825/4706]
[826/4706]
[827/4706]
[828/4706]
[829/4706]
[830/4706]
[831/4706]
[832/4706]
[833/4706]
[834/4706]
[835/4706]
[836/4706]
[837/4706]
[838/4706]
[839/4706]
[840/4706]
[841/4706]
[842/4706]
[843/4706]
[844/4706]
[845/4706]
[846/4706]

In [11]:
len(gt_labels)

9437

In [10]:
len(det_clips)

9437

In [13]:
pre_labels = cnn.predict(det_clips)

In [12]:
FP, FN

(9470, 9409)

In [15]:
len(pre_labels)

9437

In [17]:
right = [1 for i in range(len(pre_labels)) if pre_labels[i] == gt_labels[i]]

In [19]:
sum(right) / len(pre_labels)

0.9174525802691533

In [20]:
TP = sum(right)

In [22]:
FP

9470

In [23]:
FP += len(pre_labels) - TP 

In [24]:
FP

10249

In [25]:
FN += len(pre_labels) - TP

In [26]:
FN

10188

In [32]:
recall = TP / (TP + FP)
precision = TP / (TP + FN)
f1 = 2*(recall * precision) / (recall + precision) 

In [30]:
recall

0.45792563600782776

In [31]:
precision

0.4594078319006686

In [33]:
f1

0.4586655365136545

In [None]:
def accuracy(gt, pre, fp, ):
    correct = 0
    for i in range(len(gt)):
        if gt[i] == pre[i]:
            correct += 1