# Week 1

Generate the frames

In [None]:
%ffmpeg -i AICity_data/train/S03/c010/vdo.avi frames/output%03d.jpg

Load the annotation xml and get the tracks

In [None]:
from xml.dom import minidom

xml_parsed = minidom.parse('ai_challenge_s03_c010-full_annotation.xml')
annotations = xml_parsed.getElementsByTagName('annotations')
tracks = annotations[0].getElementsByTagName('track')

Get the bounding boxed grouped by frame number

In [None]:
from collections import defaultdict
import collections

BoundingBox = collections.namedtuple('BoundingBox', 'xtl ytl xbr ybr')
boxes_by_frame = defaultdict(list)

for track in tracks:
    if track.getAttribute('label') == 'car':
        boxes = track.getElementsByTagName('box')
        for box in boxes:
            frame_num = int(box.getAttribute('frame'))
            xtl = float(box.getAttribute('xtl'))
            ytl = float(box.getAttribute('ytl'))
            xbr = float(box.getAttribute('xbr'))
            ybr = float(box.getAttribute('ybr'))
            boxes_by_frame[frame_num].append(BoundingBox(xtl=xtl, ytl=ytl, xbr=xbr, ybr=ybr))     

Let's try to print some bboxes on the images

In [None]:
from PIL import Image, ImageDraw
from IPython.display import display 

for key, boxes in boxes_by_frame.items():
    image = 'frames/output%03d.jpg' % (int(key),)
    with Image.open(image) as im:
        for box in boxes:
            draw = ImageDraw.Draw(im)
            draw.rectangle([(box.xtl, box.ytl), (box.xbr, box.ybr)], outline='green')
        display(im)

In [None]:
def get_iou(bb1: BoundingBox, bb2: BoundingBox):
    """
    Calculate the Intersection over Union (IoU) of two bounding boxes.

    Parameters
    ----------
    bb1 : dict
        Keys: {'x1', 'x2', 'y1', 'y2'}
        The (x1, y1) position is at the top left corner,
        the (x2, y2) position is at the bottom right corner
    bb2 : dict
        Keys: {'x1', 'x2', 'y1', 'y2'}
        The (x, y) position is at the top left corner,
        the (x2, y2) position is at the bottom right corner

    Returns
    -------
    float
        in [0, 1]
    """
    
    assert bb1.xtl < bb1.xbr
    assert bb1.ytl < bb1.ybr
    assert bb2.xtl < bb2.xbr
    assert bb2.ytl < bb2.ybr
    
    # determine the coordinates of the intersection rectangle
    x_left = max(bb1.xtl, bb2.xtl)
    y_top = max(bb1.ytl, bb2.ytl)
    x_right = min(bb1.xbr, bb2.xbr)
    y_bottom = min(bb1.ybr, bb2.ybr)

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    # The intersection of two axis-aligned bounding boxes is always an
    # axis-aligned bounding box
    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # compute the area of both AABBs
    bb1_area = (bb1.xbr - bb1.xtl) * (bb1.ybr - bb1.ytl)
    bb2_area = (bb2.xbr - bb2.xtl) * (bb2.ybr - bb2.ytl)

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou

In [None]:
bbox = boxes_by_frame[1][0]

get_iou(bbox, bbox)

In [None]:
def compute_average_precision(mIOU: float, groundtruths_bbox: List[BoundingBox], resulting_bbox = List[BoundingBox])
    results = []
    for gt_bbox, result_bbox in zip(groundtruths_bbox, resulting_bbox):
        if get_iou(gt_bbox, result_bbox) >= mIOU:
            results.append(True)
        else:
            results.append(False)
            
    def precision(i):
        count = len(list(filter(lambda x: x is True, results[:i])))
        return count / i
        
    def recall(i):
        count = len(list(filter(lambda x: x is True, results[:i])))
        return count / len(gt_bbox)
    
    ap = 0
    for i in range(1, len(results)):
        ap += (recall(i) - recall(i - 1))* precision(i)
    
    return ap    