In [8]:
import pandas as pd
import numpy as np
import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.brain as fob
from fiftyone import ViewField as F

In [12]:
print(fo.list_datasets())

['CNNW', 'FOXNEWSW', 'MSNBCW', 'hodost-lv', 'news7-lv']


# Analysis for face size (PA22 Figures 5-6)

In [13]:
# Specify dataset
dataset_orig = fo.load_dataset("FOXNEWSW")
# Do "evaluate_detections" to compute iou to be able to threshold wrt iou for US data evaluation
dataset_orig.evaluate_detections("yolo-resnetv1-fcg_average_vote", "ground_truth", eval_key="eval", classwise=False)

# For NHK and hodo station
#years_list = [str(i) for i in range(2013, 2022)]
years_list = [str(i) for i in range(2000, 2022)]
view_analysis = dataset_orig.match(F("year").is_in(years_list))

# For US evaluation
# Filter the detections based on the IoU threshold
view_analysis = view_analysis.filter_labels("yolo-resnetv1-fcg_average_vote", F("eval_iou") > 0.001).clone()

# Generate different views depending on the bounding box sizes 
bbox_area = (
    F("$metadata.width") * F("bounding_box")[2] *
    F("$metadata.height") * F("bounding_box")[3]
)
# [very small, small, small-medium, medium, medium-large, large, very large]
# Average bbox for NHK = 78x78, HODO = 52x52. US dataset around 135 x 135.
# Smallest NHK = 3x3, HODO = 2x2. US = 35x35
# Largest NHK = 258x258, HODO = 174x174. US = 390x390

boxes_areas = list(map(int, list(np.asarray([8, 16, 32, 64, 96, 128, 156]) ** 2)))
boxes_filter_list = []

for i in range(len(boxes_areas)):
    if i == 0:
        # Skip 0, used for posterior plots
        boxes_filter = bbox_area <= boxes_areas[i]
    else:
        # Cases in the middle
        boxes_filter = (bbox_area > boxes_areas[i-1]) & (bbox_area <= boxes_areas[i])

    boxes_filter_list.append(boxes_filter)
        
# Last case
boxes_filter_list.append(bbox_area > boxes_areas[-1])


Evaluating detections...
 100% |███████████████| 1674/1674 [8.2s elapsed, 0s remaining, 183.8 samples/s]      


In [14]:
# Generate views that contains only the filtered bboxes depending on size
views_list = []

for box_filter in boxes_filter_list:
#for box_filter in [small_boxes, medium_boxes]:
    view_filtered = (
        view_analysis
        .filter_labels("ground_truth", box_filter)
        .filter_labels("yolo-resnetv1-fcg_average_vote", box_filter)
        .filter_labels("yolo-resnetv1-fcg_average_vote", F("label") != "-1")
    )
    views_list.append(view_filtered)

In [15]:
# Run evaluation for the generated filtered views
results_list = []

for view_filtered in views_list:
    results_filtered = view_filtered.evaluate_detections(
        "yolo-resnetv1-fcg_average_vote",
        gt_field="ground_truth",
        eval_key="eval",
        compute_mAP=True,
        iou_threshs=[0.4, 0.45, 0.5, 0.55, 0.6],  # For US evaluation
    )

    results_list.append(results_filtered)

Evaluating detections...
 100% |█████████████████████| 0/0 [9.4ms elapsed, ? remaining, ? samples/s] 
Performing IoU sweep...
 100% |█████████████████████| 0/0 [10.4ms elapsed, ? remaining, ? samples/s] 
Evaluating detections...
 100% |█████████████████████| 0/0 [11.1ms elapsed, ? remaining, ? samples/s] 
Performing IoU sweep...
 100% |█████████████████████| 0/0 [12.7ms elapsed, ? remaining, ? samples/s] 
Evaluating detections...
 100% |█████████████████████| 0/0 [11.4ms elapsed, ? remaining, ? samples/s] 
Performing IoU sweep...
 100% |█████████████████████| 0/0 [12.6ms elapsed, ? remaining, ? samples/s] 
Evaluating detections...
 100% |███████████████████| 66/66 [464.5ms elapsed, 0s remaining, 142.1 samples/s]      
Performing IoU sweep...
 100% |███████████████████| 66/66 [206.8ms elapsed, 0s remaining, 319.2 samples/s]      
Evaluating detections...
 100% |███████████████████| 96/96 [671.9ms elapsed, 0s remaining, 142.9 samples/s]      
Performing IoU sweep...
 100% |██████████████

In [16]:
rows_df = []
# 186**2 is for visualization purposes, representing [156-]
for res, box_area in zip(results_list, boxes_areas + [186**2]):
    res_map = round((max(res.mAP(), 0) * 100), 1)
    res_f1 = round(res.metrics()['fscore'], 3)
    box_size = int(np.sqrt(box_area))
    rows_df.append([box_area, box_size, res_map, res_f1])
    print(f"mAP: {res_map}, F1: {res_f1}")

df_res = pd.DataFrame(data=rows_df, columns=['area', 'box_size', 'map', 'f1'])
print(df_res)

mAP: 0, F1: 0.0
mAP: 0, F1: 0.0
mAP: 0, F1: 0.0
mAP: 91.6, F1: 0.909
mAP: 95.3, F1: 0.979
mAP: 100.0, F1: 1.0
mAP: 100.0, F1: 1.0
mAP: 92.4, F1: 0.989
    area  box_size    map     f1
0     64         8    0.0  0.000
1    256        16    0.0  0.000
2   1024        32    0.0  0.000
3   4096        64   91.6  0.909
4   9216        96   95.3  0.979
5  16384       128  100.0  1.000
6  24336       156  100.0  1.000
7  34596       186   92.4  0.989


In [17]:
import plotly.express as px

# mAP
fig = px.line(df_res, x="box_size", y="map", text="map", title=f"mAP per bounding box size for {dataset_orig.name}")
fig.update_traces(textposition="bottom right")

fig.update_xaxes(
    title="Bounding box size"
)
fig.update_yaxes(
    title="mAP"
)

fig.write_image(f"/home/agirbau/work/politics/figures/results_map_face_size_{dataset_orig.name}.pdf")
fig.show()

# F1 score
fig = px.line(df_res, x="box_size", y="f1", text="f1", title=f"F-score per bounding box size for {dataset_orig.name}")
fig.update_traces(textposition="bottom right")

fig.update_xaxes(
    title="Bounding box size"
)
fig.update_yaxes(
    title="F-score"
)

fig.write_image(f"/home/agirbau/work/politics/figures/results_f1_face_size_{dataset_orig.name}.pdf")
fig.show()