In [34]:
import numpy as np
import cv2
from matplotlib import pyplot as plt
import random
from tqdm import tqdm
%matplotlib inline
from PIL import Image, ImageDraw, ImageColor, ImageFont
import os
import pandas as pd

In [2]:
def iou(pred, gt):
    image = np.zeros((720, 1280))
    p = image.copy()
    
    xp11 = pred['x'] #xMin
    yp11 = pred['y'] #YMin
    xp12 = xp11 + pred['width']
    yp12 = yp11 + pred['height']
    p[yp11:yp12, xp11:xp12] = 1
    
    
    g = image.copy()
    xp21 = gt['x'] #xMin
    yp21 = gt['y'] #YMin
    xp22 = xp21 + gt['width']
    yp22 = yp21 + gt['height']
    g[yp21:yp22, xp21:xp22] = 1
    
    
    inter = cv2.bitwise_and(p, g)
    union = cv2.bitwise_or(p, g)
    
    iou = np.count_nonzero(inter) / np.count_nonzero(union)
    return iou

In [3]:
def fbeta_score(precision, recall, beta=2):
    return (1+beta**2)*precision*recall / ((beta**2)*precision + recall)

In [78]:
def F2SingleImage(predictions, groundTruths, iouThreshold):
    tp = 0
    for pbox in predictions:
        iou_temp = []
        for gbox in groundTruths:
            iou_temp.append(iou(pbox, gbox))
        iou_max = max(iou_temp)
        if iou_max >= iouThreshold:
            tp += 1

    precision = tp / len(predictions)
    recall = tp / len(groundTruths)
    if tp == 0:
        score = 1
    else:
        score = fbeta_score(precision, recall, )
    return (precision, recall, score)
    

In [5]:
def plot_img(path, pre_bboxes, gt_bboxes):
    img = Image.open(path)
    fnt = ImageFont.truetype("/Library/Fonts/Arial.ttf", 40)

    for box in pre_bboxes:
        shape = [(box['x'], box['y']), (box['x']+box['width'], box['y']+box['height'])]
        ImageDraw.Draw(img).rectangle(shape, outline="red", width=3)
    for box in gt_bboxes:
        shape = [(box['x'], box['y']), (box['x']+box['width'], box['y']+box['height'])]
        ImageDraw.Draw(img).rectangle(shape, outline="blue", width=3)
#     ImageDraw.Draw(img).text((5, 5), str((pre_bboxes)), font=fnt) 
    display(img)

In [8]:
path = 'train_images/video_1/9114.jpg'
gt_bboxes = [{'x': 628, 'y': 321, 'width': 42, 'height': 47}, {'x': 893, 'y': 497, 'width': 65, 'height': 61}, {'x': 853, 'y': 413, 'width': 49, 'height': 44}, {'x': 749, 'y': 666, 'width': 57, 'height': 53}, {'x': 625, 'y': 669, 'width': 57, 'height': 48}, {'x': 402, 'y': 162, 'width': 46, 'height': 48}, {'x': 687, 'y': 159, 'width': 38, 'height': 39}, {'x': 639, 'y': 65, 'width': 44, 'height': 32}, {'x': 614, 'y': 72, 'width': 40, 'height': 33}, {'x': 830, 'y': 164, 'width': 56, 'height': 50}, {'x': 537, 'y': 154, 'width': 26, 'height': 25}, {'x': 357, 'y': 85, 'width': 33, 'height': 25}, {'x': 405, 'y': 323, 'width': 28, 'height': 30}, {'x': 677, 'y': 69, 'width': 46, 'height': 31}, {'x': 314, 'y': 105, 'width': 24, 'height': 21}, {'x': 650, 'y': 356, 'width': 27, 'height': 42}, {'x': 1129, 'y': 689, 'width': 59, 'height': 30}, {'x': 1140, 'y': 674, 'width': 69, 'height': 36}]
pred_bboxes = [{'x': 628, 'y': 321, 'width': 140, 'height': 47}, {'x': 893, 'y': 497, 'width': 22, 'height': 61}, {'x': 853, 'y': 413, 'width': 49, 'height': 110}, {'x': 749, 'y': 666, 'width': 57, 'height': 23}, {'x': 625, 'y': 669, 'width': 57, 'height': 96}, {'x': 402, 'y': 162, 'width': 46, 'height': 2}, {'x': 687, 'y': 159, 'width': 38, 'height': 65}, {'x': 639, 'y': 65, 'width': 28, 'height': 32}, {'x': 614, 'y': 72, 'width': 28, 'height': 33}, {'x': 830, 'y': 164, 'width': 76, 'height': 50}, {'x': 537, 'y': 154, 'width': 32, 'height': 25}, {'x': 357, 'y': 85, 'width': 33, 'height': 30}, {'x': 405, 'y': 323, 'width': 28, 'height': 27}, {'x': 677, 'y': 69, 'width': 48, 'height': 31}, {'x': 314, 'y': 105, 'width': 24, 'height': 21}, {'x': 650, 'y': 356, 'width': 40, 'height': 42}, {'x': 1129, 'y': 689, 'width': 46, 'height': 30}, {'x': 1140, 'y': 674, 'width': 70, 'height': 35}]

In [11]:
confidence_threshold = 0.5
F2SingleImage(pred_bboxes,gt_bboxes,confidence_threshold)

(0.7222222222222222, 0.7222222222222222, 0.7222222222222222)

In [35]:
data_df = pd.read_csv('train.csv')
data_df['annotations'] = data_df['annotations'].map(lambda x : eval(x))
data_df['filepath'] = data_df.apply(lambda x : f"train_images/video_{x.video_id}/{x.video_frame}.jpg", axis=1)
data_df

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations,filepath
0,0,40258,0,0,0-0,[],train_images/video_0/0.jpg
1,0,40258,1,1,0-1,[],train_images/video_0/1.jpg
2,0,40258,2,2,0-2,[],train_images/video_0/2.jpg
3,0,40258,3,3,0-3,[],train_images/video_0/3.jpg
4,0,40258,4,4,0-4,[],train_images/video_0/4.jpg
...,...,...,...,...,...,...,...
23496,2,29859,10755,2983,2-10755,[],train_images/video_2/10755.jpg
23497,2,29859,10756,2984,2-10756,[],train_images/video_2/10756.jpg
23498,2,29859,10757,2985,2-10757,[],train_images/video_2/10757.jpg
23499,2,29859,10758,2986,2-10758,[],train_images/video_2/10758.jpg


In [79]:
output_dir = 'labels/'

precision_lst, recall_lst = [[],[],[],[],[],[],[],[],[],[],[]], [[],[],[],[],[],[],[],[],[],[],[]]
f2_score_lst = [[],[],[],[],[],[],[],[],[],[],[]]
for file in tqdm(os.listdir(output_dir)):
    idx = file.find('_')
    filepath = 'train_images/video_' + file[0] + '/' + file[2:idx] + '.jpg'
    path = os.path.join(output_dir, file)
    pred_bboxes = []
    with open(path) as f:
        for line in f.readlines():
            (xc, yc, w, h) = line.strip('\n').split(' ')[1:]
            width = int(eval(w) * 1280)
            height = int(eval(h) * 720)
            x = int(eval(xc) * 1280 - 0.5 * width)
            y = int(eval(yc) * 720 - 0.5 * height)
            pred_bboxes.append({'x': x, 'y':y, 'width':width, 'height':height})
    gt_bboxes = data_df[data_df.filepath == filepath].iloc[0]['annotations']
    for i in range(11):
        confidence_threshold = round(0.3 + 0.05 * i, 2)
        (precision, recall, score) = F2SingleImage(pred_bboxes, gt_bboxes, confidence_threshold)
        f2_score_lst[i].append(score)
        precision_lst[i].append(precision)
        recall_lst[i].append(recall)


100%|█████████████████████████████████████████| 401/401 [05:10<00:00,  1.29it/s]


In [83]:
for i in range(11):
    confidence_threshold = round(0.3 + 0.05 * i, 2)
    f2_score_avg = np.average(f2_score_lst[i])
#     precision_avg = np.average(precision_lst[i])
#     recall_avg = np.average(recall_lst[i])
#     f2_score_avg = fbeta_score(precision_avg, recall_avg, )
    print(f"Confidence threshold: {confidence_threshold:.2f}\t f2 score: {f2_score_avg:.3f}")

Confidence threshold: 0.30	 f2 score: 0.866
Confidence threshold: 0.35	 f2 score: 0.864
Confidence threshold: 0.40	 f2 score: 0.862
Confidence threshold: 0.45	 f2 score: 0.861
Confidence threshold: 0.50	 f2 score: 0.857
Confidence threshold: 0.55	 f2 score: 0.851
Confidence threshold: 0.60	 f2 score: 0.840
Confidence threshold: 0.65	 f2 score: 0.825
Confidence threshold: 0.70	 f2 score: 0.802
Confidence threshold: 0.75	 f2 score: 0.791
Confidence threshold: 0.80	 f2 score: 0.816


In [73]:
path = 'labels/1-4477_jpg.rf.683f420847c960b86cefc4b0cb58b9a5.txt'
pred_bboxes = []
with open(path) as f: 
    for line in f.readlines():
        (xc, yc, w, h) = line.strip('\n').split(' ')[1:]
        width = int(eval(w) * 1280)
        height = int(eval(h) * 720)
        x = int(eval(xc) * 1280 - 0.5 * width)
        y = int(eval(yc) * 720 - 0.5 * height)
        pred_bboxes.append({'x': x, 'y':y, 'width':width, 'height':height})
        
pred_bboxes

[{'x': 639, 'y': 386, 'width': 41, 'height': 51},
 {'x': 276, 'y': 45, 'width': 50, 'height': 45}]

In [74]:
file = '1-4477_jpg.rf.b0c73973136ab30f18cba753618c2165.txt'
idx = file.find('_')
filepath = 'train_images/video_' + file[0] + '/' + file[2:idx] + '.jpg'
filepath

'train_images/video_1/4477.jpg'

In [75]:
data_df[data_df.filepath == filepath].iloc[0]['annotations']

[{'x': 639, 'y': 386, 'width': 41, 'height': 51},
 {'x': 276, 'y': 45, 'width': 51, 'height': 45}]

In [77]:
0/0

ZeroDivisionError: division by zero