-
Notifications
You must be signed in to change notification settings - Fork 15
/
evaluate.py
616 lines (578 loc) · 30.6 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
from __future__ import absolute_import
import sys
import os
import json
from itertools import chain
import click
import numpy as np
from skimage import draw
from PIL import Image
from shapely.geometry import Polygon
from ocrd import Processor
from ocrd_utils import (
getLogger,
initLogging,
assert_file_grp_cardinality,
xywh_from_polygon,
polygon_from_points,
coordinates_of_segment,
MIMETYPE_PAGE
)
from ocrd_modelfactory import page_from_file
from ocrd_models.ocrd_page import parse as parse_page
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from pycocotools.mask import (
encode as encodeMask,
merge as mergeMasks,
area as maskArea
)
from .config import OCRD_TOOL
TOOL = 'ocrd-segment-evaluate'
class EvaluateSegmentation(Processor):
def __init__(self, *args, **kwargs):
kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL]
kwargs['version'] = OCRD_TOOL['version']
super(EvaluateSegmentation, self).__init__(*args, **kwargs)
def process(self):
"""Performs segmentation evaluation with pycocotools on the workspace.
Open and deserialize PAGE files from the first and second input file group
(the first as ground truth, the second as prediction).
Then iterate over the element hierarchy down to ``level-of-operation``.
Aggregate and convert all pages' segmentation (coordinates and classes)
to COCO:
- On the region level, unless ``ignore-subtype``, differentiate segment
classes by their `@type`, if applicable.
- On the region level, unless ``for-categories`` is empty, select only
segment classes in that (comma-separated) list.
- If ``only-fg``, then use the foreground mask from the binarized
image inside each segment for overlap calculations.
Next, configure and run COCOEval for comparison of all pages. Show the matching
pairs (GT segment ID, prediction segment ID, IoU) for every overlap on each page.
Also, calculate per-class precision and recall (at the point of maximum recall).
Finally, get the typical summary mean average precision / recall (but without
restriction on the number of segments).
Write a JSON report to the output file group.
"""
LOG = getLogger('processor.EvaluateSegmentation')
assert_file_grp_cardinality(self.output_file_grp, 1)
assert_file_grp_cardinality(self.input_file_grp, 2, 'GT and evaluation data')
# region or line level?
level = self.parameter['level-of-operation']
onlyfg = self.parameter['only-fg']
typed = not self.parameter['ignore-subtype']
selected = self.parameter['for-categories']
if selected:
selected = selected.split(',')
# get input file groups
ifgs = self.input_file_grp.split(",")
# get input file tuples
ifts = self.zip_input_files(mimetype=MIMETYPE_PAGE)
# convert to 2 COCO datasets from all page pairs
categories = ["bg"] # needed by cocoeval
images = []
annotations_gt = []
annotations_dt = []
for ift in ifts:
file_gt, file_dt = ift
if not file_gt:
LOG.warning("skipping page %s missing from GT", file_gt.pageId)
continue
if not file_dt:
LOG.warning("skipping page %s missing from prediction", file_gt.pageId)
continue
LOG.info("processing page %s", file_gt.pageId)
pcgts_gt = page_from_file(self.workspace.download_file(file_gt))
pcgts_dt = page_from_file(self.workspace.download_file(file_dt))
page_gt = pcgts_gt.get_Page()
page_dt = pcgts_dt.get_Page()
if onlyfg:
page_image, page_coords, _ = self.workspace.image_from_page(
page_gt, file_gt.pageId,
feature_selector='binarized',
feature_filter='clipped')
page_mask = ~ np.array(page_image.convert('L'))
imgid = len(images)
images.append({'file_name': file_gt.pageId,
'width': page_gt.get_imageWidth(),
'height': page_gt.get_imageHeight(),
})
# read annotations from each page recursively (all categories including subtypes)
# and merge GT and prediction categories
_add_annotations(annotations_gt, page_gt, imgid, categories,
level=level, typed=typed,
coords=page_coords if onlyfg else None,
mask=page_mask if onlyfg else None)
_add_annotations(annotations_dt, page_dt, imgid, categories,
level=level, typed=typed,
coords=page_coords if onlyfg else None,
mask=page_mask if onlyfg else None)
if level == 'line':
categories.append('textline')
elif selected:
selected = [categories.index(cat) for cat in selected if cat in categories]
_add_ids(categories)
_add_ids(images)
_add_ids(annotations_gt, 1) # cocoeval expects annotation IDs starting at 1
_add_ids(annotations_dt, 1) # cocoeval expects annotation IDs starting at 1
LOG.info(f"found {len(annotations_gt)} GT / {len(annotations_dt)} DT segments"
f" in {len(categories) - 1} categories for {len(images)} images")
coco_gt = _create_coco(categories, images, annotations_gt)
coco_dt = _create_coco(categories, images, annotations_dt)
stats = evaluate_coco(coco_gt, coco_dt, self.parameter, selected)
# write regions to custom JSON for this page
file_id = 'id' + self.output_file_grp + '_report'
self.workspace.add_file(
ID=file_id,
file_grp=self.output_file_grp,
pageId=None,
local_filename=os.path.join(self.output_file_grp, file_id + '.json'),
mimetype='application/json',
content=json.dumps(stats, indent=2))
# todo: also write report for each page
@click.command()
@click.option('-G', '--gt-page-filelst', type=click.File('r'),
help="list file of ground-truth page file paths")
@click.option('-D', '--dt-page-filelst', type=click.File('r'),
help="list file of detection page file paths")
@click.option('-I', '--bin-img-filelst', type=click.File('r'),
help="list file of binarized image file paths")
@click.option('-L', '--level-of-operation', type=click.Choice(['region', 'line']), default='region',
help="hierarchy level of segments to compare")
@click.option('-T', '--ignore-subtype', is_flag=True,
help="on region level, ignore @type distinction")
@click.option('-C', '--for-categories', default='', type=str,
help="on region level, comma-separated list of category names to evaluate (empty for all)")
@click.option('-R', '--report-file', type=click.File('w'), default="eval.log",
help="file name to write evaluation results to")
@click.argument('tabfile', type=click.File('r'), required=False)
def standalone_cli(gt_page_filelst,
dt_page_filelst,
bin_img_filelst,
level_of_operation,
ignore_subtype,
for_categories,
report_file,
tabfile):
"""Performs segmentation evaluation with pycocotools on the given PAGE-XML files.
\b
Open and deserialize PAGE files from the list files.
Then iterate over the element hierarchy down to ``level-of-operation``.
Aggregate and convert all pages' segmentation (coordinates and classes)
to COCO:
\b
- On the region level, unless ``ignore-subtype``, differentiate segment
classes by their `@type`, if applicable.
- On the region level, unless ``for-categories`` is empty, select only
segment classes in that (comma-separated) list.
- If image files are given (as separate file list or in the 3rd column
of the tab-separated list file), then for each PAGE file pair, use
the foreground mask from the binarized image inside all segments for
overlap calculations.
\b
Next, configure and run COCOEval for comparison of all pages. Show the
matching pairs (GT segment ID, prediction segment ID, IoU) for every
overlap on each page.
Also, calculate per-class precision and recall (at maximum recall).
Finally, get the typical summary mean average precision / recall
(but without restriction on the number of segments), and write all
statistics to ``report-file``.
\b
Write a JSON report to the output file group.
"""
assert (tabfile is None) == (gt_page_filelst is not None) == (dt_page_filelst is not None), \
"pass file lists either as tab-separated single file or as separate files"
if tabfile is None:
gt_page_files = [line.strip() for line in gt_page_filelst.readlines()]
dt_page_files = [line.strip() for line in dt_page_filelst.readlines()]
assert len(gt_page_files) == len(dt_page_files), \
"number of DT files must match number of GT files"
if bin_img_filelst is not None:
bin_img_files = [line.strip() for line in bin_img_filelst.readlines()]
assert len(bin_img_files) == len(gt_page_files), \
"number of image files must match number of GT files"
else:
bin_img_files = None
else:
files = [line.strip().split('\t') for line in tabfile.readlines()]
assert len(files), "list of files is empty"
len0 = len(files[0])
assert 2 <= len0 <= 3, "list of files must be tab-separated (GT, DT[, bin-img])"
assert all(map(lambda line: len(line) == len0, files)), \
"number of DT files must match number of GT files"
if len0 == 2:
gt_page_files, dt_page_files = zip(*files)
bin_img_files = None
else:
gt_page_files, dt_page_files, bin_img_files = zip(*files)
stats = evaluate_files(gt_page_files,
dt_page_files,
bin_img_files,
level_of_operation,
not ignore_subtype,
for_categories)
json.dump(stats, report_file, indent=2)
# standalone entry point
def evaluate_files(gt_files, dt_files, img_files=None, level='region', typed=True, selected=None):
initLogging()
LOG = getLogger('processor.EvaluateSegmentation')
categories = ["bg"] # needed by cocoeval
images = []
annotations_gt = []
annotations_dt = []
for gt_file, dt_file, img_file in zip(gt_files, dt_files,
img_files or [None] * len(gt_files)):
pcgts_gt = parse_page(gt_file)
pcgts_dt = parse_page(dt_file)
page_id = pcgts_gt.pcGtsId or gt_file
LOG.info("processing page %s", page_id)
page_gt = pcgts_gt.get_Page()
page_dt = pcgts_dt.get_Page()
if img_file:
page_image = Image.open(img_file)
assert page_image.mode == '1', "input images must already be binarized"
assert page_image.width - 2 < page_gt.get_imageWidth() < page_image.width + 2, \
"mismatch between width of binary image and PAGE description"
assert page_image.height - 2 < page_gt.get_imageHeight() < page_image.height + 2, \
"mismatch between height of binary image and PAGE description"
page_mask = ~ np.array(page_image)
page_coords = {"transform": np.eye(3), "angle": 0, "features": "binarized"}
imgid = len(images)
images.append({'file_name': page_id,
'width': page_gt.get_imageWidth(),
'height': page_gt.get_imageHeight(),
})
# read annotations from each page recursively (all categories including subtypes)
# and merge GT and prediction categories
_add_annotations(annotations_gt, page_gt, imgid, categories,
level=level, typed=typed,
coords=page_coords if img_file else None,
mask=page_mask if img_file else None)
_add_annotations(annotations_dt, page_dt, imgid, categories,
level=level, typed=typed,
coords=page_coords if img_file else None,
mask=page_mask if img_file else None)
if level == 'line':
categories.append('textline')
elif selected:
selected = [categories.index(cat) for cat in selected if cat in categories]
_add_ids(categories)
_add_ids(images)
_add_ids(annotations_gt, 1) # cocoeval expects annotation IDs starting at 1
_add_ids(annotations_dt, 1) # cocoeval expects annotation IDs starting at 1
LOG.info(f"found {len(annotations_gt)} GT / {len(annotations_dt)} DT segments"
f" in {len(categories) - 1} categories for {len(images)} images")
coco_gt = _create_coco(categories, images, annotations_gt)
coco_dt = _create_coco(categories, images, annotations_dt)
parameters = {"level-of-operation": level,
"only-fg": bool(img_files),
"ignore-subtype": not typed,
"for-categories": selected}
stats = evaluate_coco(coco_gt, coco_dt, parameters, selected)
return stats
def evaluate_coco(coco_gt, coco_dt, parameters, catIds=None):
LOG = getLogger('processor.EvaluateSegmentation')
LOG.info("comparing segmentations")
stats = dict(parameters)
coco_eval = COCOeval(coco_gt, coco_dt, 'segm') # bbox
if catIds:
coco_eval.params.catIds = catIds
#coco_eval.params.iouThrs = [.5:.05:.95]
#coco_eval.params.iouThrs = np.linspace(.3, .95, 14)
coco_eval.params.maxDets = [None] # unlimited nr of detections (requires pycocotools#559)
#coco_eval.params.areaRng = [(0, np.inf)] # unlimited region size
#coco_eval.params.areaRngLbl = ['all'] # unlimited region size
# Note: The IoU threshold criterion is inadequate for flat segmentation,
# because over-/undersegmentation can quickly become false negative/positive.
# The pycocotools implementation is especially inadequate, because
# it only counts 1:1 matches (and not even the largest or best-scoring, #564).
# On the other hand, purely pixel-wise measures do not distinguish instances,
# i.e. neighbours can quickly become merged or instances torn apart.
# Our approach therefore does not build on pycocotools for matching
# and aggregation, only for fast IoU calculation. All non-zero pairs
# are considered matches if their intersection over union > 0.5 _or_
# their intersection over either side > 0.5. Matches can thus be n:m.
# Non-matches are counted as well (false positives and false negatives).
# Aggregation uses microaveraging over images. Besides counting segments,
# the pixel areas are counted and averaged (as ratios).
# FIXME: We must differentiate between allowable and non-allowable over/under-segmentation (splits/merges).
# (A region's split is allowable if it flows in the textLineOrder of the respective GT,
# i.e. lines are likely to be either on one side or the other, but not both.
# For top-to-bottom/bottom-to-top regions, vertical splits are allowable.
# For left-to-right/right-to-left regions, horizontal splits are allowable.
# To be sure, we could also validate that explicitly – evaluating both levels at the same time.
# Analogously, a number of regions' merge is allowable if it flows in the textLineOrder
# of them all, and the GT global reading order has no other regions in between.
# For top-to-bottom/bottom-to-top regions, vertical merges are allowable.
# For left-to-right/right-to-left regions, horizontal merges are allowable.
# Again, we could also validate that the overall textline flow is equivalent.)
# This difference can in turn be used to weigh a match pair's score accordingly
# when aggregating. For precision-like scores, we would rule out non-allowable merges
# (by counting them as FP), and for recall-like scores, we would rule out non-allowable splits
# (by counting them as FN).
# We can also weigh these non-allowable cases by their share of height
# (in vertical textLineOrder and horizontal writing) or width
# (in horizontal textLineOrder and vertical writing) which is in disagreement,
# or the share of its textlines that have been split or lost.
# Furthermore, we can weigh matches by the share of non-text regions or fg pixels involved.
coco_eval.evaluate()
# get by-page alignment (ignoring inadequate 1:1 matching by pycocotools)
def get(arg):
return lambda x: x[arg]
numImgs = len(coco_eval.params.imgIds)
numAreas = len(coco_eval.params.areaRng)
for imgind, imgId in enumerate(coco_eval.params.imgIds):
img = coco_gt.imgs[imgId]
pageId = img['file_name']
for catind, catId in enumerate(coco_eval.params.catIds):
cat = coco_gt.cats[catId]
catName = cat['name']
if not catId:
continue
# bypassing COCOeval.evaluateImg, hook onto its results
# (again, we stay at areaRng[0]=all and maxDets[0]=all)
start = catind * numImgs * numAreas
evalimg = coco_eval.evalImgs[start + imgind]
if evalimg is None:
continue # no DT and GT here
# record as dict by pageId / by category
imgstats = stats.setdefault('by-image', dict())
pagestats = imgstats.setdefault(pageId, dict())
# get matches and ious and scores
ious = coco_eval.ious[imgId, catId]
if len(ious):
overlaps_dt, overlaps_gt = ious.nonzero()
else:
overlaps_dt = overlaps_gt = []
# reconstruct score sorting in computeIoU
gt = coco_eval._gts[imgId, catId]
dt = coco_eval._dts[imgId, catId]
dtind = np.argsort([-d['score'] for d in dt], kind='mergesort')
dt = [dt[i] for i in dtind]
matches = list()
gtmatches = dict()
dtmatches = dict()
for dtind, gtind in zip(overlaps_dt, overlaps_gt):
d = dt[dtind]
g = gt[gtind]
iou = ious[dtind, gtind]
union = maskArea(mergeMasks([g['segmentation'], d['segmentation']]))
intersection = int(iou * union)
# cannot use g or d['area'] here, because mask might be fractional (only-fg) instead of outline
areag = int(maskArea(g['segmentation']))
aread = int(maskArea(d['segmentation']))
iogt = intersection / areag
iodt = intersection / aread
if iou < 0.5 and iogt < 0.5 and iodt < 0.5:
continue
gtmatches.setdefault(gtind, list()).append(dtind)
dtmatches.setdefault(dtind, list()).append(gtind)
matches.append((g['id'],
d['id'],
iogt, iodt, iou, intersection))
pagestats.setdefault('true_positives', dict()).setdefault(catName, list()).append(
{'GT.ID': g['segment_id'],
'DT.ID': d['segment_id'],
'GT.area': areag,
'DT.area': aread,
'I.area': intersection,
'IoGT': iogt,
'IoDT': iodt,
'IoU': iou})
dtmisses = []
for dtind, d in enumerate(dt):
if dtind in dtmatches:
continue
dtmisses.append((d['id'], maskArea(d['segmentation'])))
pagestats.setdefault('false_positives', dict()).setdefault(catName, list()).append(
{'DT.ID': d['segment_id'],
'area': int(d['area'])})
gtmisses = []
for gtind, g in enumerate(gt):
if gtind in gtmatches:
continue
gtmisses.append((g['id'], maskArea(g['segmentation'])))
pagestats.setdefault('false_negatives', dict()).setdefault(catName, list()).append(
{'GT.ID': g['segment_id'],
'area': int(g['area'])})
# measure under/oversegmentation for this image and category
# (follows Zhang et al 2021: Rethinking Semantic Segmentation Evaluation [arXiv:2101.08418])
over_gt = set(gtind for gtind in gtmatches if len(gtmatches[gtind]) > 1)
over_dt = set(chain.from_iterable(
gtmatches[gtind] for gtind in gtmatches if len(gtmatches[gtind]) > 1))
under_dt = set(dtind for dtind in dtmatches if len(dtmatches[dtind]) > 1)
under_gt = set(chain.from_iterable(
dtmatches[dtind] for dtind in dtmatches if len(dtmatches[dtind]) > 1))
over_degree = sum(len(gtmatches[gtind]) - 1 for gtind in gtmatches)
under_degree = sum(len(dtmatches[dtind]) - 1 for dtind in dtmatches)
if len(dt) and len(gt):
oversegmentation = len(over_gt) * len(over_dt) / len(gt) / len(dt)
undersegmentation = len(under_gt) * len(under_dt) / len(gt) / len(dt)
# Zhang's idea of attenuating the under/oversegmentation ratio with a "penalty"
# to account for the degree of further sub-segmentation is misguided IMHO,
# because its degree term depends on the total number of segments:
# oversegmentation = np.tanh(oversegmentation * over_degree)
# undersegmentation = np.tanh(undersegmentation * under_degree)
pagestats.setdefault('oversegmentation', dict())[catName] = oversegmentation
pagestats.setdefault('undersegmentation', dict())[catName] = undersegmentation
pagestats.setdefault('precision', dict())[catName] = (len(dt) - len(dtmisses)) / len(dt)
pagestats.setdefault('recall', dict())[catName] = (len(gt) - len(gtmisses)) / len(gt)
tparea = sum(map(get(5), matches)) # sum(inter)
fparea = sum(map(get(1), dtmisses)) # sum(area)
fnarea = sum(map(get(1), gtmisses)) # sum(area)
if tparea or (fparea and fnarea):
pagestats.setdefault('pixel_precision', dict())[catName] = tparea / (tparea + fparea)
pagestats.setdefault('pixel_recall', dict())[catName] = tparea / (tparea + fnarea)
pagestats.setdefault('pixel_iou', dict())[catName] = tparea / (tparea + fparea + fnarea)
# aggregate per-img/per-cat IoUs for microaveraging
evalimg['matches'] = matches # TP
evalimg['dtMisses'] = dtmisses # FP
evalimg['gtMisses'] = gtmisses # FN
evalimg['dtIdsOver'] = [dt[dtind]['id'] for dtind in over_dt]
evalimg['gtIdsOver'] = [gt[gtind]['id'] for gtind in over_gt]
evalimg['dtIdsUnder'] = [dt[dtind]['id'] for dtind in under_dt]
evalimg['gtIdsUnder'] = [gt[gtind]['id'] for gtind in under_gt]
catstats = stats.setdefault('by-category', dict())
# accumulate our over-/undersegmentation and IoU ratios
numImgs = len(coco_eval.params.imgIds)
numAreas = len(coco_eval.params.areaRng)
for catind, catId in enumerate(coco_eval.params.catIds):
cat = coco_gt.cats[catId]
catstats.setdefault(cat['name'], dict())
start = catind * numImgs * numAreas
# again, we stay at areaRng[0]=all and maxDets[0]=all
evalimgs = [coco_eval.evalImgs[start + imgind] for imgind in range(numImgs)]
evalimgs = [img for img in evalimgs if img is not None]
assert all(img['category_id'] == catId for img in evalimgs)
assert all(img['maxDet'] is None for img in evalimgs)
assert all(img['aRng'] == coco_eval.params.areaRng[0] for img in evalimgs)
if not len(evalimgs):
continue
# again, we can ignore gtIgnore here, because we only look at areaRng[0]=all
# again, we can ignore dtIgnore here, because we only look at maxDet=None
numDTs = sum(len(img['dtIds']) for img in evalimgs)
numGTs = sum(len(img['gtIds']) for img in evalimgs)
overDTs = sum(len(img['dtIdsOver']) for img in evalimgs)
overGTs = sum(len(img['gtIdsOver']) for img in evalimgs)
underDTs = sum(len(img['dtIdsUnder']) for img in evalimgs)
underGTs = sum(len(img['gtIdsUnder']) for img in evalimgs)
numIoUs = sum(len(img['matches']) for img in evalimgs)
numFPs = sum(len(img['dtMisses']) for img in evalimgs)
numFNs = sum(len(img['gtMisses']) for img in evalimgs)
sumIoUs = sum(sum(map(get(4), img['matches'])) for img in evalimgs) # sum(iou)
sumIoGTs = sum(sum(map(get(2), img['matches'])) for img in evalimgs) # sum(iogt)
sumIoDTs = sum(sum(map(get(3), img['matches'])) for img in evalimgs) # sum(iodt)
sumTParea = sum(sum(map(get(5), img['matches'])) for img in evalimgs) # sum(inter)
sumFParea = sum(sum(map(get(1), img['dtMisses'])) for img in evalimgs) # sum(area)
sumFNarea = sum(sum(map(get(1), img['gtMisses'])) for img in evalimgs) # sum(area)
if numDTs and numGTs:
oversegmentation = overDTs * overGTs / numDTs / numGTs
undersegmentation = underDTs * underGTs / numDTs / numGTs
precision = (numDTs - numFPs) / numDTs
recall = (numGTs - numFNs) / numGTs
else:
oversegmentation = undersegmentation = precision = recall = -1
if numIoUs:
iou = sumIoUs / numIoUs
iogt = sumIoGTs / numIoUs
iodt = sumIoDTs / numIoUs
else:
iou = iogt = iodt = -1
if sumTParea or (sumFParea and sumFNarea):
pixel_precision = sumTParea / (sumTParea + sumFParea)
pixel_recall = sumTParea / (sumTParea + sumFNarea)
pixel_iou = sumTParea / (sumTParea + sumFParea + sumFNarea)
else:
pixel_precision = pixel_recall = pixel_iou = -1
catstats[cat['name']]['oversegmentation'] = oversegmentation
catstats[cat['name']]['undersegmentation'] = undersegmentation
catstats[cat['name']]['segment-precision'] = precision
catstats[cat['name']]['segment-recall'] = recall
catstats[cat['name']]['IoGT'] = iogt # i.e. per-match pixel-recall
catstats[cat['name']]['IoDT'] = iodt # i.e. per-match pixel-precision
catstats[cat['name']]['IoU'] = iou # i.e. per-match pixel-jaccardindex
catstats[cat['name']]['pixel-precision'] = pixel_precision
catstats[cat['name']]['pixel-recall'] = pixel_recall
catstats[cat['name']]['pixel-iou'] = pixel_iou
coco_eval.accumulate()
coco_eval.summarize()
statInds = np.ones(12, bool)
statInds[7] = False # AR maxDet[1]
statInds[8] = False # AR maxDet[2]
coco_eval.stats = coco_eval.stats[statInds]
stats['scores'] = dict(zip([
'Average Precision (AP) @[ IoU=0.50:0.95 | area= all ]',
'Average Precision (AP) @[ IoU=0.50 | area= all ]',
'Average Precision (AP) @[ IoU=0.75 | area= all ]',
'Average Precision (AP) @[ IoU=0.50:0.95 | area= small ]',
'Average Precision (AP) @[ IoU=0.50:0.95 | area=medium ]',
'Average Precision (AP) @[ IoU=0.50:0.95 | area= large ]',
'Average Recall (AR) @[ IoU=0.50:0.95 | area= all ]',
'Average Recall (AR) @[ IoU=0.50:0.95 | area= small ]',
'Average Recall (AR) @[ IoU=0.50:0.95 | area=medium ]',
'Average Recall (AR) @[ IoU=0.50:0.95 | area= large ]',
], coco_eval.stats.tolist()))
return stats
def _create_coco(categories, images, annotations):
coco = COCO()
coco.dataset = {'categories': categories, 'images': images, 'annotations': annotations}
with NoStdout():
coco.createIndex()
return coco
def _add_annotations(annotations, page, imgid, categories,
level='region', typed=True, coords=None, mask=None):
for region in page.get_AllRegions(classes=None if level == 'region' else ['Text']):
if level == 'region':
cat = region.__class__.__name__[:-4]
if typed and hasattr(region, 'get_type') and region.get_type():
cat += '.' + region.get_type()
if cat not in categories:
categories.append(cat)
catid = categories.index(cat)
_add_annotation(annotations, region, imgid, catid,
coords=coords, mask=mask)
continue
for line in region.get_TextLine():
_add_annotation(annotations, line, imgid, 1,
coords=coords, mask=mask)
def _add_annotation(annotations, segment, imgid, catid, coords=None, mask=None):
LOG = getLogger('processor.EvaluateSegmentation')
score = segment.get_Coords().get_conf() or 1.0
polygon = polygon_from_points(segment.get_Coords().points)
if len(polygon) < 3:
LOG.warning('ignoring segment "%s" with only %d points', segment.id, len(polygon))
return
xywh = xywh_from_polygon(polygon)
if mask is None:
segmentation = np.array(polygon).reshape(1, -1).tolist()
else:
polygon = coordinates_of_segment(segment, None, coords)
py, px = draw.polygon(polygon[:,1], polygon[:,0], mask.shape)
masked = np.zeros(mask.shape, dtype=np.uint8, order='F') # pycocotools.mask wants Fortran-contiguous arrays
masked[py, px] = 1 * mask[py, px]
segmentation = encodeMask(masked)
annotations.append(
{'segment_id': segment.id, # non-standard string-valued in addition to 'id'
'image_id': imgid,
'category_id': catid,
'segmentation': segmentation,
'area': Polygon(polygon).area,
'bbox': [xywh['x'], xywh['y'], xywh['w'], xywh['h']],
'score': score,
'iscrowd': 0})
def _add_ids(entries, start=0):
for i, entry in enumerate(entries, start):
if isinstance(entry, dict):
entry['id'] = i
else:
entries[i] = {'id': i, 'name': entry}
class NoStdout():
def __enter__(self):
self.stdout = sys.stdout
sys.stdout = self
def __exit__(self, etype, evalue, etrace):
sys.stdout = self.stdout
if etype is not None:
return False # reraise
def write(self, value):
pass