**Evaluation metric: mean average F-2 score**

[Evaluation rule](https://www.kaggle.com/c/airbus-ship-detection#evaluation) explained by Kaggle.

In [93]:
import json
import boto3
import pandas as pd
from tqdm import tqdm_notebook
from itertools import combinations

In [19]:
bucket_name = 'object-detection-airbus-dataset'
true_folder = 'validation_annotation'
pred_folder = 'validation_prediction'

In [20]:
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)

In [66]:
# source: http://ronny.rest/tutorials/module/localization_001/iou/
def get_iou(a, b, epsilon=1e-5):
    """ Given two boxes `a` and `b` defined as a list of four numbers:
            [x1,y1,x2,y2]
        where:
            x1,y1 represent the upper left corner
            x2,y2 represent the lower right corner
        It returns the Intersect of Union score for these two boxes.

    Args:
        a:          (list of 4 numbers) [x1,y1,x2,y2]
        b:          (list of 4 numbers) [x1,y1,x2,y2]
        epsilon:    (float) Small value to prevent division by zero

    Returns:
        (float) The Intersect of Union score.
    """
    # COORDINATES OF THE INTERSECTION BOX
    x1 = max(a[0], b[0])
    y1 = max(a[1], b[1])
    x2 = min(a[2], b[2])
    y2 = min(a[3], b[3])

    # AREA OF OVERLAP - Area where the boxes intersect
    width = (x2 - x1)
    height = (y2 - y1)
    # handle case where there is NO overlap
    if (width<0) or (height <0):
        return 0.0
    area_overlap = width * height

    # COMBINED AREA
    area_a = (a[2] - a[0]) * (a[3] - a[1])
    area_b = (b[2] - b[0]) * (b[3] - b[1])
    area_combined = area_a + area_b - area_overlap

    # RATIO OF AREA OF OVERLAP OVER COMBINED AREA
    iou = area_overlap / (area_combined+epsilon)
    return iou

def get_json(file_name):
    """get json object from s3"""
    return json.load(bucket.Object(file_name).get()['Body'])

**Get Prediction Filenames**


In [60]:
true_bucket = bucket.objects.filter(Prefix=true_folder)
pred_bucket = bucket.objects.filter(Prefix=pred_folder)

In [61]:
true_arr, pred_arr = [], []

for chunk in true_bucket.pages():
    true_arr.extend([x.key for x in chunk])

for chunk in pred_bucket.pages():
    pred_arr.extend([x.key for x in chunk])

sanity check

In [64]:
len(true_arr), len(pred_arr)

(10639, 10639)

In [76]:
sorted([x.split('/')[1] for x in true_arr]) == sorted([x.split('/')[1] for x in pred_arr])

True

In [77]:
img_list = [x.split('/')[1] for x in true_arr]

**calculate IoU per Image**

In [None]:
for img_name in tqdm_notebook(img_list):
    pred_lst = get_json(file_name=f'validation_prediction/{img_name}')['predictions']
    true_lst = get_json(file_name=f'validation_annotation/{img_name}')['annotations']
    

In [100]:
[[x['top'], x['left'], x['left']+x['width'], x['top']+x['height']] 
 for x in json.load(bucket.Object('validation_annotation/001aee007.json').get()['Body'])['annotations']]

[[61, 270, 327, 114],
 [712, 314, 384, 768],
 [549, 539, 608, 622],
 [65, 646, 768, 195]]

In [101]:
[[x['top'], x['left'], x['left']+x['width'], x['top']+x['height']] 
 for x in json.load(bucket.Object('validation_prediction/001aee007.json').get()['Body'])['predictions']]

[[542, 540, 608, 623],
 [62, 647, 767, 201],
 [709, 313, 383, 767],
 [58, 271, 325, 112]]

In [74]:
a = [406, 176, 406+145, 176+124]
b = [401, 170, 401+157, 170+129]

In [75]:
get_iou(a, b, epsilon=1e-5)

0.874350426083758