# Evaluation Demo
ref: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb

In [1]:
import os
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

from fashionpedia.fp import Fashionpedia
from fashionpedia.fp_eval import FPEval

In [2]:
# set up for Fashionpedia eval type
annType = ['segm','bbox']
annType = annType[0]      #specify type here
print('Running demo for *%s* results.'%(annType))

Running demo for *segm* results.


In [3]:
# annotation and prediction file names here
anno_file = "data/sample.json"
res_file = "data/fake_results.json"

## Demo

In [4]:
# initialize Fashionpedia groudtruth and prediction api
fpGt=Fashionpedia(anno_file)
fpDt=fpGt.loadRes(res_file)
imgIds=sorted(fpGt.getImgIds())

# run evaluation
fp_eval = FPEval(fpGt,fpDt,annType)
fp_eval.params.imgIds  = imgIds
fp_eval.run()

loading annotations into memory...
Done (t=0.07s)
creating index...
index created!
loading attributes...
attributes index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
loading attributes...
attributes index created!
Running per image evaluation...
Evaluate annotation type *segm*
DONE (t=0.04s).
Accumulating evaluation results...
DONE (t=0.45s).


In [5]:
# print out results
fp_eval.print() # print out result using both Iou AND F1 constraint

results with both IoU and F1 thresholds
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=0.50:0.95 |area=all | maxDets=100 | superCat=      all] = 0.590
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=0.50:0.95 |area=all | maxDets=100 | superCat=outerwear] = 1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=0.50:0.95 |area=all | maxDets=100 | superCat=     part] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | F1=0.50:0.95 |area=all | maxDets=100 | superCat=      all] = 0.590
 Average Precision  (AP) @[ IoU=0.75      | F1=0.50:0.95 |area=all | maxDets=100 | superCat=      all] = 0.590
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=0.50      |area=all | maxDets=100 | superCat=      all] = 0.532
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=0.75      |area=all | maxDets=100 | superCat=      all] = 0.532
 Average Precision  (AP) @[ IoU=0.50      | F1=0.50      |area=all | maxDets=100 | superCat=      all] = 0.532
 Average Precision  (AP) @[ IoU=0.50      | F1=0.75      |area=all | max

In [6]:
# print out result using IoU only, 
# here results should be the same as what coco API produces
fp_eval.print(f1=False)

results with IoU thresholds (f1_threshold = -1.0)
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=none      |area=all | maxDets=100 | superCat=      all] = 0.532
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=none      |area=all | maxDets=100 | superCat=outerwear] = 1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=none      |area=all | maxDets=100 | superCat=accessory] = 0.626
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=none      |area=all | maxDets=100 | superCat=     part] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | F1=none      |area=all | maxDets=100 | superCat=      all] = 0.532
 Average Precision  (AP) @[ IoU=0.75      | F1=none      |area=all | maxDets=100 | superCat=      all] = 0.532
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=none      |area=  s | maxDets=100 | superCat=      all] = 0.505
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=none      |area=  m | maxDets=100 | superCat=      all] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=none      |area

In [7]:
fp_eval.print(iou=False) # print out result using f1 only

results with F1 thresholds (iou_threshold = -1.0)
 Average Precision  (AP) @[ IoU=none      | F1=0.50:0.95 |area=all | maxDets=100 | superCat=      all] = 0.699
 Average Precision  (AP) @[ IoU=none      | F1=0.50:0.95 |area=all | maxDets=100 | superCat=outerwear] = 1.000
 Average Precision  (AP) @[ IoU=none      | F1=0.50:0.95 |area=all | maxDets=100 | superCat=     part] = 0.240
 Average Precision  (AP) @[ IoU=none      | F1=0.50      |area=all | maxDets=100 | superCat=      all] = 0.688
 Average Precision  (AP) @[ IoU=none      | F1=0.75      |area=all | maxDets=100 | superCat=      all] = 0.688
 Average Precision  (AP) @[ IoU=none      | F1=0.50:0.95 |area=  s | maxDets=100 | superCat=      all] = 1.000
 Average Precision  (AP) @[ IoU=none      | F1=0.50:0.95 |area=  m | maxDets=100 | superCat=      all] = 0.476
 Average Precision  (AP) @[ IoU=none      | F1=0.50:0.95 |area=  l | maxDets=100 | superCat=      all] = 0.756
 Average Recall     (AR) @[ IoU=none      | F1=0.50:0.95 |area

### [Optional] Get per-class and/or per-superclass results

In [8]:
fp_eval.summarize_class(perSuperClass=False, perCls=True)

In [11]:
# check out results for individual class
class_name = 'shirt, blouse'

fp_eval.print_class_result(class_name, f1=True, iou=True)
fp_eval.print_class_result(class_name, f1=False, iou=True)
fp_eval.print_class_result(class_name, f1=True, iou=False)

shirt, blouse results with both IoU and F1 thresholds
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=0.50:0.95 |area=all | maxDets=100 ] = 0.590
 Average Precision  (AP) @[ IoU=0.50      | F1=0.50:0.95 |area=all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | F1=0.50:0.95 |area=all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=0.50      |area=all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=0.75      |area=all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50      | F1=0.50      |area=all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50      | F1=0.75      |area=all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | F1=0.50      |area=all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | F1=0.75      |area=all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | F1=0.50:0.95 |area=  s | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.5

### Sanity check with COCO API
COCO get detection result only, equivalent to $AP_{IoU}$ in Fashionpedia

In [12]:
cocoGt=COCO(anno_file)
cocoDt=cocoGt.loadRes(res_file)
imgIds=sorted(cocoGt.getImgIds())

cocoEval = COCOeval(cocoGt,cocoDt,annType)
cocoEval.params.imgIds  = imgIds
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *segm*
DONE (t=0.02s).
Accumulating evaluation results...
DONE (t=0.05s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.532
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.532
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.532
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.505
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.800
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.500
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.562
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets