**Evaluation metric: mean average F-2 score**

[Evaluation rule](https://www.kaggle.com/c/airbus-ship-detection#evaluation) explained by Kaggle.

In [2]:
import json
import boto3
import pandas as pd
from tqdm import tqdm_notebook
from itertools import combinations

In [3]:
bucket_name = 'object-detection-airbus-dataset'
true_folder = 'validation_annotation'
pred_folder = 'validation_prediction'

In [4]:
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)

In [5]:
# source: http://ronny.rest/tutorials/module/localization_001/iou/
def get_iou(a, b, epsilon=1e-5):
    """ Given two boxes `a` and `b` defined as a list of four numbers:
            [x1,y1,x2,y2]
        where:
            x1,y1 represent the upper left corner
            x2,y2 represent the lower right corner
        It returns the Intersect of Union score for these two boxes.

    Args:
        a:          (list of 4 numbers) [x1,y1,x2,y2]
        b:          (list of 4 numbers) [x1,y1,x2,y2]
        epsilon:    (float) Small value to prevent division by zero

    Returns:
        (float) The Intersect of Union score.
    """
    # COORDINATES OF THE INTERSECTION BOX
    x1 = max(a[0], b[0])
    y1 = max(a[1], b[1])
    x2 = min(a[2], b[2])
    y2 = min(a[3], b[3])

    # AREA OF OVERLAP - Area where the boxes intersect
    width = (x2 - x1)
    height = (y2 - y1)
    # handle case where there is NO overlap
    if (width<0) or (height <0):
        return 0.0
    area_overlap = width * height

    # COMBINED AREA
    area_a = (a[2] - a[0]) * (a[3] - a[1])
    area_b = (b[2] - b[0]) * (b[3] - b[1])
    area_combined = area_a + area_b - area_overlap

    # RATIO OF AREA OF OVERLAP OVER COMBINED AREA
    iou = area_overlap / (area_combined+epsilon)
    return iou

def get_json(file_name):
    """get json object from s3"""
    return json.load(bucket.Object(file_name).get()['Body'])

**Get Prediction Filenames**


In [6]:
true_bucket = bucket.objects.filter(Prefix=true_folder)
pred_bucket = bucket.objects.filter(Prefix=pred_folder)

In [7]:
true_arr, pred_arr = [], []

for chunk in true_bucket.pages():
    true_arr.extend([x.key for x in chunk])

for chunk in pred_bucket.pages():
    pred_arr.extend([x.key for x in chunk])

sanity check

In [8]:
len(true_arr), len(pred_arr)

(10639, 10639)

In [9]:
sorted([x.split('/')[1] for x in true_arr]) == sorted([x.split('/')[1] for x in pred_arr])

True

In [10]:
img_list = [x.split('/')[1] for x in true_arr]

**calculate IoU per Image**

In [12]:
get_json(file_name='validation_prediction/{}'.format(img_list[0]))

{'file': '000532683.jpg',
 'image_size': [{'width': 768, 'height': 768, 'depth': 3}],
 'predictions': [{'class_id': 0.0,
   'score': 0.6680644750595093,
   'top': 756,
   'left': 740,
   'width': 16,
   'height': 9},
  {'class_id': 0.0,
   'score': 0.41235649585723877,
   'top': 459,
   'left': 597,
   'width': 8,
   'height': 17}]}

In [13]:
to_eval = []
for img_name in tqdm_notebook(img_list):

    pred_lst = get_json(file_name=f'validation_prediction/{img_name}')['predictions']
    true_lst = get_json(file_name=f'validation_annotation/{img_name}')['annotations']
    
    for i in pred_lst:
        confidence = i['score']
        pred_bbox = [i['top'], i['left'], i['left']+i['width'], i['top']+i['height']]
        for j in true_lst:
            true_bbox = [i['top'], i['left'], i['left']+i['width'], i['top']+i['height']]
            to_eval.append([img_name, confidence, true_bbox, pred_bbox])

HBox(children=(IntProgress(value=0, max=10639), HTML(value='')))




In [14]:
to_eval = pd.DataFrame(to_eval, columns=['img', 'confidence', 'true_bbox', 'pred_bbox'])

In [25]:
to_eval['iou'] = to_eval.apply(lambda x: get_iou(x['true_bbox'], x['pred_bbox']), 1)

In [28]:
to_eval[:20]

Unnamed: 0,img,confidence,true_bbox,pred_bbox,iou
0,000532683.json,0.668064,"[756, 740, 756, 765]","[756, 740, 756, 765]",0.0
1,000532683.json,0.668064,"[756, 740, 756, 765]","[756, 740, 756, 765]",0.0
2,000532683.json,0.412356,"[459, 597, 605, 476]","[459, 597, 605, 476]",0.0
3,000532683.json,0.412356,"[459, 597, 605, 476]","[459, 597, 605, 476]",0.0
4,001234638.json,0.99992,"[406, 176, 321, 530]","[406, 176, 321, 530]",0.0
5,0018e5d6c.json,0.999981,"[120, 62, 357, 211]","[120, 62, 357, 211]",1.0
6,001aee007.json,0.99999,"[542, 540, 608, 623]","[542, 540, 608, 623]",1.0
7,001aee007.json,0.99999,"[542, 540, 608, 623]","[542, 540, 608, 623]",1.0
8,001aee007.json,0.99999,"[542, 540, 608, 623]","[542, 540, 608, 623]",1.0
9,001aee007.json,0.99999,"[542, 540, 608, 623]","[542, 540, 608, 623]",1.0
