In [8]:
import pandas as pd
import numpy as np
import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.brain as fob
from fiftyone import ViewField as F

In [18]:
print(fo.list_datasets())

['2023.05.22.19.46.00', 'CNNW', 'FOXNEWSW', 'MSNBCW', 'hodost-lv', 'news7-lv']


# Analysis for face size (PA22 Figures 5-6)

In [36]:
# Specify dataset
dataset_orig = fo.load_dataset("MSNBCW")
us_dataset_list = ["CNNW", "FOXNEWSW", "MSNBCW"]
# Do "evaluate_detections" to compute iou to be able to threshold wrt iou for US data evaluation
if dataset_orig.name in us_dataset_list:
    dataset_orig.evaluate_detections("yolo-resnetv1-fcg_average_vote", "ground_truth", eval_key="eval", classwise=False)

# For NHK and hodo station
#years_list = [str(i) for i in range(2013, 2022)]
years_list = [str(i) for i in range(2000, 2022)]
view_analysis = dataset_orig.match(F("year").is_in(years_list))

# For US evaluation
# Filter the detections based on the IoU threshold
if dataset_orig.name in us_dataset_list:
    view_analysis = view_analysis.filter_labels("yolo-resnetv1-fcg_average_vote", F("eval_iou") > 0.001).clone()

# Generate different views depending on the bounding box sizes 
bbox_area = (
    F("$metadata.width") * F("bounding_box")[2] *
    F("$metadata.height") * F("bounding_box")[3]
)
# [very small, small, small-medium, medium, medium-large, large, very large]
# Average bbox for NHK = 78x78, HODO = 52x52. US dataset around 135 x 135.
# Smallest NHK = 3x3, HODO = 2x2. US = 35x35
# Largest NHK = 258x258, HODO = 174x174. US = 390x390

boxes_areas = list(map(int, list(np.asarray([8, 16, 32, 64, 96, 128, 156]) ** 2)))
boxes_filter_list = []

for i in range(len(boxes_areas)):
    if i == 0:
        # First case
        boxes_filter = bbox_area <= boxes_areas[i]
    else:
        # Cases in the middle
        boxes_filter = (bbox_area > boxes_areas[i-1]) & (bbox_area <= boxes_areas[i])

    boxes_filter_list.append(boxes_filter)
        
# Last case
boxes_filter_list.append(bbox_area > boxes_areas[-1])


Evaluating detections...
 100% |███████████████| 1589/1589 [8.8s elapsed, 0s remaining, 188.7 samples/s]      


In [37]:
# Generate views that contains only the filtered bboxes depending on size
views_list = []

for box_filter in boxes_filter_list:
#for box_filter in [small_boxes, medium_boxes]:
    view_filtered = (
        view_analysis
        .filter_labels("ground_truth", box_filter)
        .filter_labels("yolo-resnetv1-fcg_average_vote", box_filter)
        .filter_labels("yolo-resnetv1-fcg_average_vote", F("label") != "-1")
    )
    views_list.append(view_filtered)

In [38]:
# Run evaluation for the generated filtered views
results_list = []
if dataset_orig.name in us_dataset_list:
    iou_threshs = [0.4, 0.45, 0.5, 0.55, 0.6]
else:
    iou_threshs = None

for view_filtered in views_list:
    results_filtered = view_filtered.evaluate_detections(
        "yolo-resnetv1-fcg_average_vote",
        gt_field="ground_truth",
        eval_key="eval",
        compute_mAP=True,
        iou_threshs=iou_threshs,  # For US evaluation
    )

    results_list.append(results_filtered)

Evaluating detections...
 100% |█████████████████████| 0/0 [9.1ms elapsed, ? remaining, ? samples/s] 
Performing IoU sweep...
 100% |█████████████████████| 0/0 [11.5ms elapsed, ? remaining, ? samples/s] 
Evaluating detections...
 100% |█████████████████████| 0/0 [10.5ms elapsed, ? remaining, ? samples/s] 
Performing IoU sweep...
 100% |█████████████████████| 0/0 [11.4ms elapsed, ? remaining, ? samples/s] 
Evaluating detections...
 100% |█████████████████████| 0/0 [10.2ms elapsed, ? remaining, ? samples/s] 
Performing IoU sweep...
 100% |█████████████████████| 0/0 [11.8ms elapsed, ? remaining, ? samples/s] 
Evaluating detections...
 100% |█████████████████| 178/178 [1.2s elapsed, 0s remaining, 146.4 samples/s]         
Performing IoU sweep...
 100% |█████████████████| 178/178 [542.8ms elapsed, 0s remaining, 328.0 samples/s]      
Evaluating detections...
 100% |█████████████████| 209/209 [1.4s elapsed, 0s remaining, 146.6 samples/s]         
Performing IoU sweep...
 100% |██████████████

In [39]:
rows_df = []
# 186**2 is for visualization purposes, representing [156-]
for res, box_area in zip(results_list, boxes_areas + [186**2]):
    res_map = round((max(res.mAP(), 0) * 100), 1)
    res_f1 = round(res.metrics()['fscore'], 3)
    box_size = int(np.sqrt(box_area))
    rows_df.append([box_area, box_size, res_map, res_f1])
    print(f"mAP: {res_map}, F1: {res_f1}")

df_res = pd.DataFrame(data=rows_df, columns=['area', 'box_size', 'map', 'f1'])
print(df_res)

mAP: 0, F1: 0.0
mAP: 0, F1: 0.0
mAP: 0, F1: 0.0
mAP: 94.4, F1: 0.933
mAP: 93.0, F1: 0.967
mAP: 99.7, F1: 1.0
mAP: 90.0, F1: 0.9
mAP: 93.4, F1: 0.994
    area  box_size   map     f1
0     64         8   0.0  0.000
1    256        16   0.0  0.000
2   1024        32   0.0  0.000
3   4096        64  94.4  0.933
4   9216        96  93.0  0.967
5  16384       128  99.7  1.000
6  24336       156  90.0  0.900
7  34596       186  93.4  0.994


In [40]:
import plotly.express as px

# mAP
fig = px.line(df_res, x="box_size", y="map", text="map", title=f"mAP per bounding box size for {dataset_orig.name}")
fig.update_traces(textposition="bottom right")

fig.update_xaxes(
    title="Bounding box size"
)
fig.update_yaxes(
    title="mAP"
)

fig.write_image(f"/home/agirbau/work/politics/figures/results_map_face_size_{dataset_orig.name}.pdf")
fig.show()

# F1 score
fig = px.line(df_res, x="box_size", y="f1", text="f1", title=f"F-score per bounding box size for {dataset_orig.name}")
fig.update_traces(textposition="bottom right")

fig.update_xaxes(
    title="Bounding box size"
)
fig.update_yaxes(
    title="F-score"
)

fig.write_image(f"/home/agirbau/work/politics/figures/results_f1_face_size_{dataset_orig.name}.pdf")
fig.show()