In [26]:
import os
import json
import pandas as pd
import numpy as np
from torchvision.ops import nms, box_iou
import torch
import cv2
import matplotlib.pyplot as plt
import ast
#from plot import set_plot_layout

from tools_label_error_detection import construct_original_gt, construct_dont_care_regions, draw_boxes

#set_plot_layout(path_to_latex = '/home/jklees/texlive/bin/x86_64-linux');

In [27]:
# parameters

np.random.seed(0) # this just influences the random selection of images for display


iou_threshold_misfitting_box = 0.5  # IoU threshold for considering a misfitting box as a label error (Comparison of original and validated GT boxes)
iou_thresh_dont_care = 0.5 # IoU threshold for considering the intersection with dont care regions i.e. determining whether annotations are actual label errors or were simply not annotated on purpose.

# data parameters
validated_gt_prob_threshold = 0.0  # Probability threshold for ground truth class pedestrian (0.5 and 0.8 available)
min_bbox_height = 25 # minimal height of a bounding box to be considered as a pedestrian (according to KITTI Benchmark moderate and hard version). Applies to both original and validated GT boxes.
filter_small_boxes = False # Whether to filter out boxes that are too small
filter_dont_care = False

n_images_to_display = 0
path_to_kitti = '/home/datasets_archive/KITTI/training/'
path_to_split = "data/train_val_split.json"
# path_to_kitti_val_csv_files = "data/KITTI/gt/gt_val_csv_files/csv/"
# path_to_kitti_dont_care_regions = "data/KITTI/val_dont_care.json"
path_to_validated_gt = "data/validated_gt.csv"

# 1 Label errors in the original dataset 

Here, we evaluate the quality of the orginal dataset by comparing it to the validated ground truth we constructed. 
A label error is here defined as an overlooked pedestrian or a box for a pedestrian that does not fit well (IoU < 0.5 compared to our validated GT).

In [28]:
def filter_data_for_conditions(df):
    if filter_dont_care:
        df = df[df["iou_with_dont_care"] < iou_thresh_dont_care]
    if filter_small_boxes:
        df = df[df["height"] >= min_bbox_height]
    return df

In [29]:
with open(path_to_split, "r") as f:
    split = json.load(f)  # `data` is now a Python dict

# Construct the original annotations as a DataFrame
original_gt = construct_original_gt(path_to_kitti, split, filter_small_boxes, min_bbox_height)

# construct the dont care regions from the original dataset as a DataFrame
dont_care_regions = construct_dont_care_regions(path_to_kitti, split)

original_gt.head(2)

Unnamed: 0,filename,xmin,ymin,xmax,ymax,class_name,class,height
0,003206.png,584.97,172.95,590.64,194.6,Pedestrian,2,21.65
1,003206.png,590.29,173.84,596.29,194.88,Pedestrian,2,21.04


In [30]:
dont_care_regions.head(2)

Unnamed: 0,filename,xmin,ymin,xmax,ymax,class_name,class
0,007359.png,400.29,158.29,492.82,197.37,DontCare,-1
1,007359.png,875.12,155.21,911.12,195.32,DontCare,-1


In [31]:
dont_care_regions.to_csv("data/original_gt_dont_care_regions.csv", index=False)

In [32]:
# load the validated ground truth data
validated_gt = pd.read_csv(path_to_validated_gt)
validated_gt = validated_gt[validated_gt["score"] >= validated_gt_prob_threshold] # filter by the probability threshold
validated_gt.reset_index(drop=True, inplace=True)
# Ensure the 'filename' column is in the correct format
validated_gt["filename"] = validated_gt['filename'].astype(str).str.zfill(6) + '.png'
# Convert box format from x_mean, y_mean, width, height to xmin, ymin, xmax, ymax
validated_gt["xmin"] = validated_gt["bbox"].apply(lambda x: ast.literal_eval(x)[0] - ast.literal_eval(x)[2] / 2)
validated_gt["ymin"] = validated_gt["bbox"].apply(lambda x: ast.literal_eval(x)[1] - ast.literal_eval(x)[3] / 2)
validated_gt["xmax"] = validated_gt["bbox"].apply(lambda x: ast.literal_eval(x)[0] + ast.literal_eval(x)[2] / 2 )
validated_gt["ymax"] = validated_gt["bbox"].apply(lambda x: ast.literal_eval(x)[1] + ast.literal_eval(x)[3] / 2)

validated_gt["height"] = validated_gt["ymax"] - validated_gt["ymin"]
if filter_small_boxes:
    print(f"Filtering out {len(validated_gt[validated_gt['height'] < min_bbox_height])} boxes with height < {min_bbox_height} pixels")
    validated_gt = validated_gt[validated_gt["height"] >= min_bbox_height]  # Filter out boxes that are too small
    validated_gt.reset_index(drop=True, inplace=True)  # Reset the index

validated_gt.head(2)

Unnamed: 0,media_object_id,media_id,filename,bbox,score,xmin,ymin,xmax,ymax,height
0,02bbfce0-6cd9-45ef-b21a-082aab0ff1dd,d9051eda-343d-4e4c-839a-3b8f8c24bcd4,002112.png,"[554.18, 189.065, 10.240000000000009, 25.43000...",0.197628,549.06,176.35,559.3,201.78,25.43
1,718cc290-c20a-4e54-980f-eabeabf5dec7,04156827-cee7-4ad0-8329-a9464988ffc8,007256.png,"[556.235, 190.315, 7.690000000000055, 15.37000...",0.083333,552.39,182.63,560.08,198.0,15.37


In [33]:
print(f"Number of pedestrian boxes in the original dataset: {len(original_gt)}")
print(f"Number of pedestrian boxes in the validated dataset: {len(validated_gt)}")
print(f"Validated ground truth contains {len(validated_gt) - len(original_gt)} more boxes than the orig. GT.")

Number of pedestrian boxes in the original dataset: 896
Number of pedestrian boxes in the validated dataset: 3078
Validated ground truth contains 2182 more boxes than the orig. GT.


In [34]:
# compare annotations of the original ground truth with the validated ground truth also plotting the dont care regions
image_ids = np.random.choice(validated_gt['filename'].unique(), size = n_images_to_display, replace=False) # Get unique image filenames

for image_id in image_ids: 
    # Read the predicted boxes from the DataFrame
    box_indices = validated_gt[validated_gt['filename'] == image_id].index
    boxes = [validated_gt.iloc[i][['xmin', 'ymin', 'xmax', 'ymax', 'score']].values.reshape(1, -1) for i in box_indices]
    boxes = np.vstack(boxes)  # Stack all predicted boxes for this image

    dont_care_box_indices = dont_care_regions[dont_care_regions['filename'] == image_id].index
    dont_care_boxes = [dont_care_regions.iloc[i][['xmin', 'ymin', 'xmax', 'ymax']].values.reshape(1, -1) for i in dont_care_box_indices]
    dont_care_boxes = np.vstack(dont_care_boxes) if len(dont_care_boxes) > 0 else None # Stack all predicted boxes for this image

    image_id = image_id.split('.')[0]  # Remove file extension if present

    draw_boxes(f'{path_to_kitti}image_2/{image_id}.png', f'{path_to_kitti}label_2/{image_id}.txt', pred_boxes=boxes, add_pred_boxes=dont_care_boxes)

In [35]:
# def determine_label_errors_in_original_gt(original_gt, validated_gt, iou_threshold_misfitting_box):
#     """
#     Determine label errors in the original ground truth through comparison with the validated ground truth.
    
#     Parameters:
#     - original_gt: DataFrame containing the original ground truth boxes.
#     - validated_gt: DataFrame containing the validated ground truth boxes.
#     - iou_threshold_misfitting_box: IoU threshold for considering a misfitting box as a label error.
    
#     Returns:
#     - updated validated_gt DataFrame with evaluation of all boxes with respect to whether they were not part of the original GT.
#       (For each box, we not whether this box marks an actual Label Error of the original GT and store the Label Error Type).
#     """

#     bool_label_errors = [False] * len(validated_gt)
#     label_error_type = ["no"]*len(validated_gt)  # To store the type of label error (no, overlooked pedestrian or misfitting box)
#     label_errors = 0 # not returned because it can be easily determined from the DataFrame
#     for i, row in validated_gt.iterrows():

#         original_boxes = original_gt[(original_gt['filename'] == row['filename']) ]
        
#         if original_boxes.empty: # if there are no boxes with the same filename in the original ground truth we have a label error (pedestrian was overlooked in that image)
#             label_errors += 1
#             bool_label_errors[i] = True
#             label_error_type[i] = "overlooked pedestrian"
#         else:
#             # If there are boxes annotated with pedestrian in the GT for this image, we check if any of them matches the validated ground truth box (IoU >= 0.5)
#             ious = []
#             for _, original_box in original_boxes.iterrows():
#                 # Calculate IoU between the validated ground truth box and the original boxes on this image
#                 # If the IoU is >= 0.5 for one of the boxes, we consider it a match and do not count it as a label error
#                 original_box = original_box.to_frame().T # pd.Series to DataFrame
#                 # Calculate IoU using torchvision.ops.box_iou

#                 validated_box = torch.tensor(np.array(row[['xmin', 'ymin', 'xmax', 'ymax']].values, dtype = float), dtype=torch.float32).unsqueeze(0)
#                 original_box = torch.tensor(np.array([original_box.xmin, original_box.ymin, original_box.xmax, original_box.ymax], dtype=float).flatten(), dtype=torch.float32).unsqueeze(0)

#                 iou = box_iou(validated_box, original_box)[0, 0].item()
#                 ious.append(iou)

#             if max(ious) < iou_threshold_misfitting_box:
#                 label_errors += 1
#                 bool_label_errors[i] = True
#                 if max(ious) == 0:
#                     label_error_type[i] = "overlooked pedestrian"
#                 else:
#                     # If the IoU is below the threshold but not zero, we consider it a misfitting box
#                     label_error_type[i] = "misfitting box"

#     validated_gt['label_error'] = bool_label_errors
#     validated_gt['label_error_type'] = label_error_type

#     return validated_gt

def determine_label_errors_in_original_gt(original_gt, validated_gt, iou_threshold_misfitting_box):
    bool_label_errors = [False] * len(validated_gt)
    label_error_type = ["no"] * len(validated_gt)
    label_errors = 0

    matched_original_indices = set()

    for i, row in validated_gt.iterrows():
        filename = row['filename']
        validated_box = torch.tensor(row[['xmin', 'ymin', 'xmax', 'ymax']].values.astype(float), dtype=torch.float32).unsqueeze(0)

        original_boxes = original_gt[original_gt['filename'] == filename]

        best_iou = 0
        best_match_idx = None

        for j, original_box in original_boxes.iterrows():
            if j in matched_original_indices:
                continue  # already matched

            original_box_tensor = torch.tensor([
                float(original_box['xmin']), float(original_box['ymin']),
                float(original_box['xmax']), float(original_box['ymax'])
            ], dtype=torch.float32).unsqueeze(0)

            iou = box_iou(validated_box, original_box_tensor)[0, 0].item()
            if iou > best_iou:
                best_iou = iou
                best_match_idx = j

        if best_iou >= iou_threshold_misfitting_box:
            matched_original_indices.add(best_match_idx)  # mark match
        else:
            label_errors += 1
            bool_label_errors[i] = True
            if best_iou == 0:
                label_error_type[i] = "overlooked pedestrian"
            else:
                label_error_type[i] = "misfitting box"

    validated_gt['label_error'] = bool_label_errors
    validated_gt['label_error_type'] = label_error_type
    return validated_gt

def get_ious_with_orig_and_val_gt(df):
    """
    This function calculates the IoU of each predicted box in the DataFrame with the original ground truth (GT) and the validated GT.
    It adds two new columns to the DataFrame: 'iou_with_original_gt' and 'iou_with_val_gt'.
    """
    # get IoU with original GT for each predicted box to select label error proposals (IoU < 0.5)
    orig_ious = []
    for i, row in df.iterrows():
        # Get the corresponding predicted box
        pred_box = torch.tensor([row['xmin'], row['ymin'], row['xmax'], row['ymax']], dtype=torch.float32).unsqueeze(0)
        
        # Get the ground truth boxes for the same image
        original_gt_boxes = original_gt[original_gt['filename'] == row['filename']][['xmin', 'ymin', 'xmax', 'ymax']].values
        
        if len(original_gt_boxes) > 0:
            original_gt_boxes_tensor = torch.tensor(original_gt_boxes, dtype=torch.float32)
            iou = box_iou(pred_box, original_gt_boxes_tensor).squeeze().numpy()
            orig_ious.append(iou.max())
        else:
            orig_ious.append(0)

    df['iou_with_original_gt'] = orig_ious

    # get IoU with dont care regions of the original GT for each predicted box to determine whether the object has been flagged as not of interest.
    dc_ious = []
    for i, row in df.iterrows():
        # Get the corresponding predicted box
        pred_box = torch.tensor([row['xmin'], row['ymin'], row['xmax'], row['ymax']], dtype=torch.float32).unsqueeze(0)
        
        # Get the ground truth boxes for the same image
        dc_boxes = dont_care_regions[dont_care_regions['filename'] == row['filename']][['xmin', 'ymin', 'xmax', 'ymax']].values
        
        if len(dc_boxes) > 0:
            dc_boxes_tensor = torch.tensor(dc_boxes, dtype=torch.float32)
            iou = box_iou(pred_box, dc_boxes_tensor).squeeze().numpy()
            dc_ious.append(iou.max())
        else:
            dc_ious.append(0)

    df['iou_with_dont_care'] = dc_ious

    # get IoU with validated GT for each predicted box in the overlooked DataFrame to determine label errors of this type
    val_ious = []
    for i, row in df.iterrows():
        # Get the corresponding predicted box
        pred_box = torch.tensor([row['xmin'], row['ymin'], row['xmax'], row['ymax']], dtype=torch.float32).unsqueeze(0)
        
        # Get the validated ground truth boxes for the same image
        val_gt_boxes = validated_gt[validated_gt['filename'] == row['filename']][['xmin', 'ymin', 'xmax', 'ymax']].values
        
        if len(val_gt_boxes) > 0:
            val_gt_boxes_tensor = torch.tensor(val_gt_boxes, dtype=torch.float32)
            iou = box_iou(pred_box, val_gt_boxes_tensor).squeeze().numpy()
            val_ious.append(iou.max())
        else:
            val_ious.append(0)

    df['iou_with_val_gt'] = val_ious

    return df

In [36]:
validated_gt = determine_label_errors_in_original_gt(original_gt, validated_gt, iou_threshold_misfitting_box)
print(f"Number of label errors (overlooked pedestrians or misfitting boxes) in the original dataset: {len(validated_gt[validated_gt['label_error'] == True])}")
print(f"Overlooked pedestrians (orig. IoU = 0): {len(validated_gt[validated_gt['label_error_type'] == 'overlooked pedestrian'])}")
print(f"Misfitting boxes (0 < orig. IoU < thresh = {iou_threshold_misfitting_box}): {len(validated_gt[validated_gt['label_error_type'] == 'misfitting box'])}")

Number of label errors (overlooked pedestrians or misfitting boxes) in the original dataset: 2353
Overlooked pedestrians (orig. IoU = 0): 2094
Misfitting boxes (0 < orig. IoU < thresh = 0.5): 259


In [37]:
# visualize the label errors for review purposes
# Display a few images with label errors distinguishing between overlooked pedestrians and misfitting boxes

# Green: orig. GT boxes, Red: validated GT boxes with label errors (overlooked pedestrians), Blue: validated GT boxes with label errors (misfitting boxes)

n = n_images_to_display #10

image_ids = np.random.choice(validated_gt[validated_gt['label_error'] == True]['filename'].unique(), size = n, replace=False) # Get unique image filenames with label errors
for image_id in image_ids: 
    # Read the predicted boxes from the DataFrame
    box_indices = validated_gt[(validated_gt['filename'] == image_id) & (validated_gt["label_error_type"] == "overlooked pedestrian")].index
    overlooked_boxes = [validated_gt.iloc[i][['xmin', 'ymin', 'xmax', 'ymax', 'score']].values.reshape(1, -1) for i in box_indices]
    overlooked_boxes = None if len(overlooked_boxes) == 0 else np.vstack(overlooked_boxes) # Stack all predicted boxes for this image

    box_indices = validated_gt[(validated_gt['filename'] == image_id) & (validated_gt["label_error_type"] == "misfitting box")].index
    misfitting_boxes = [validated_gt.iloc[i][['xmin', 'ymin', 'xmax', 'ymax', 'score']].values.reshape(1, -1) for i in box_indices]
    misfitting_boxes = None if len(misfitting_boxes) == 0 else np.vstack(misfitting_boxes) # Stack all predicted boxes for this image

    image_id = image_id.split('.')[0]  # Remove file extension if present
    draw_boxes(f'{path_to_kitti}image_2/{image_id}.png', f'{path_to_kitti}label_2/{image_id}.txt', pred_boxes=overlooked_boxes, add_pred_boxes=misfitting_boxes)

In [38]:
# Account for the dont care regions here to determine the actual label errors taking into account that objects within these regions are not annotated on purpose
# For our study however, we consider the entire images and do not account for these regions

validated_gt = get_ious_with_orig_and_val_gt(validated_gt)
original_gt = get_ious_with_orig_and_val_gt(original_gt)

actual_overlooked_pedestrians = len(validated_gt[(validated_gt["label_error_type"] == "overlooked pedestrian") & (validated_gt["iou_with_dont_care"] < iou_thresh_dont_care)])
actual_misfitting_annotations = len(validated_gt[(validated_gt["label_error_type"] == "misfitting box") & (validated_gt["iou_with_dont_care"] < iou_thresh_dont_care)])
print(f"Number of actual label errors (overlooked pedestrians or misfitting boxes) outside of dont care regions: {actual_overlooked_pedestrians + actual_misfitting_annotations}")
print(f"Overlooked pedestrians (orig. IoU = 0): {actual_overlooked_pedestrians}")
print(f"Misfitting boxes (0 < orig. IoU < thresh = {iou_threshold_misfitting_box}): {actual_misfitting_annotations}")

Number of actual label errors (overlooked pedestrians or misfitting boxes) outside of dont care regions: 2217
Overlooked pedestrians (orig. IoU = 0): 1962
Misfitting boxes (0 < orig. IoU < thresh = 0.5): 255


In [39]:
# # plot annotations that are in the original gt but not in the val. GT

# These examples exist due to the fact that we set a threshold for the probability of a box actually containing a pedestrian

# original_gt = get_ious_with_orig_and_val_gt(original_gt)

# image_ids = original_gt['filename'][original_gt["iou_with_val_gt"] == 0]
# for image_id in image_ids: 
#     # Read the predicted boxes from the DataFrame
#     box_indices = validated_gt[validated_gt['filename'] == image_id].index
#     boxes = [validated_gt.iloc[i][['xmin', 'ymin', 'xmax', 'ymax', 'score']].values.reshape(1, -1) for i in box_indices]
#     if boxes:
#         boxes = np.vstack(boxes)  # Stack all predicted boxes for this image
#     image_id = image_id.split('.')[0]  # Remove file extension if present
#     draw_boxes(f'{path_to_kitti}image_2/{image_id}.png', f'{path_to_kitti}label_2/{image_id}.txt', pred_boxes=boxes)

In [40]:
# validated_gt = validated_gt.drop(columns=["iou_with_val_gt"])
# validated_gt.to_csv("data/validated_gt_new.csv", index=False)

# original_gt = original_gt.drop(columns=["iou_with_original_gt", "iou_with_val_gt"])
# original_gt.to_csv("data/original_gt.csv", index=False)

# tmp print results

In [41]:
print(f"Overlooked pedestrians (orig. IoU = 0): {len(validated_gt[validated_gt['label_error_type'] == 'overlooked pedestrian'])}")
print(f"Misfitting boxes (0 < orig. IoU < thresh = {iou_threshold_misfitting_box}): {len(validated_gt[validated_gt['label_error_type'] == 'misfitting box'])}")

print("OUTSIDE DONT CARE")
print(f"Overlooked pedestrians (orig. IoU = 0): {actual_overlooked_pedestrians}")
print(f"Misfitting boxes (0 < orig. IoU < thresh = {iou_threshold_misfitting_box}): {actual_misfitting_annotations}")

Overlooked pedestrians (orig. IoU = 0): 2094
Misfitting boxes (0 < orig. IoU < thresh = 0.5): 259
OUTSIDE DONT CARE
Overlooked pedestrians (orig. IoU = 0): 1962
Misfitting boxes (0 < orig. IoU < thresh = 0.5): 255


In [42]:
def read_kitti_labels(label_path):
    boxes = []
    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            cls = parts[0]
            # 2D bounding box: [left, top, right, bottom]
            bbox = list(map(float, parts[4:8]))
            boxes.append((cls, bbox))
    return boxes

def plot_and_store_label_error(image_id, pred_boxes=None, add_pred_boxes=None, scores_pred_boxes = None, scores_add_pred_boxes = None, filename=None):
    image_id = image_id.split('.')[0]  # Remove file extension if present
    image_path = f'{path_to_kitti}/image_2/{image_id}.png'
    label_path = f'{path_to_kitti}label_2/{image_id}.txt'
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Draw ground truth boxes
    gt_boxes = read_kitti_labels(label_path)
    for cls, (left, top, right, bottom) in gt_boxes:
        if cls == "Pedestrian": # we only care about pedestrians
            pt1 = (int(left), int(top))
            pt2 = (int(right), int(bottom))
            cv2.rectangle(image, pt1, pt2, color=(0, 255, 0), thickness=2)

    # Draw predicted boxes (if provided)
    if pred_boxes is not None:
        for i, pred in enumerate(pred_boxes):
            xmin, ymin, xmax, ymax = pred[:4]
            pt1 = (int(xmin), int(ymin))
            pt2 = (int(xmax), int(ymax))
            cv2.rectangle(image, pt1, pt2, color=(255, 0, 0), thickness=2)  # Red
            if scores_pred_boxes is not None:
                score = scores_pred_boxes[i]
                label = f" ({score:.4f})"
                cv2.putText(image, label, (int(xmin), int(ymin) - 5), cv2.FONT_HERSHEY_SIMPLEX,
                        0.5, (255, 0, 0), 1)

            # Crop the image around the center of the bounding box
            center_x = int((xmin + xmax) / 2)
            center_y = int((ymin + ymax) / 2)
            crop_x_min = max(center_x - 100, 0)
            crop_y_min = max(center_y - 100, 0)
            crop_x_max = min(center_x + 100, image.shape[1])
            crop_y_max = min(center_y + 100, image.shape[0])
            cropped_image = image[crop_y_min:crop_y_max, crop_x_min:crop_x_max]

            # Display the cropped image
            plt.figure(figsize=(5, 5))
            plt.imshow(cropped_image)
            plt.axis('off')
            if filename:
                plt.savefig(filename, dpi = 100)
            plt.show()

In [43]:
# %%capture
# # Small but unambiguous cases. Filter for p > 0.8 and height < 40 
# # plot and store label errors
# np.random.seed(0) # set seed because a random subset is considered

# filename_map = {}

# # --- CONFIGURATION ---
# image_dir = "label_error_imgs/small_but_confident/" 
# if not os.path.exists(image_dir):
#     os.makedirs(image_dir)
# output_file = "label_error_imgs_small_but_confident.tex"

# label_errors = validated_gt.copy()
# label_errors = label_errors[label_errors["label_error_type"] == "overlooked pedestrian"]
# label_errors = label_errors[label_errors["iou_with_dont_care"] < 0.5]
# label_errors = label_errors[label_errors["height"] < 40]
# label_errors = label_errors[label_errors["score"] >= 0.8] # score is soft label probability
# label_errors.reset_index(drop=True, inplace=True)

# N = 12

# indices = np.random.choice(label_errors.index, size = N, replace = False)

# for loop_index in indices:
#     index = label_errors.index[loop_index]

#     label_error = label_errors.iloc[index][['xmin', 'ymin', 'xmax', 'ymax', 'score']].values.reshape(1, -1)
#     image_id = label_errors.iloc[index][['filename']].values[0]

#     filename = f"label_error_{loop_index}.png"
#     filename_map[filename] = image_id

#     plot_and_store_label_error(image_id, pred_boxes=label_error, filename = image_dir + filename)


# # write latex code for inclusion in paper
# latex_lines = []
# images_per_row = 4

# for i, (img_name, original_name) in enumerate(filename_map.items()):
#     if i % images_per_row == 0:
#         latex_lines.append(r"\noindent")  # Start of a new row

#     latex_lines.append(
#         rf"""\begin{{minipage}}[t]{{0.24\linewidth}}
#   \centering
#   \includegraphics[width=\linewidth, , height=\linewidth, keepaspectratio]{{imgs/{image_dir}{img_name}}}
#   \captionsetup{{labelformat=empty, hypcap=false}}
#   \captionof{{figure}}{{{original_name}}}
# \end{{minipage}}"""
#     )

#     if (i + 1) % images_per_row == 0:
#         latex_lines.append(r"\par")  # Space between rows with \smallskip

# # Save LaTeX code to file
# with open(output_file, "w") as f:
#     f.write("\n".join(latex_lines))

In [44]:
# %%capture
# # ambiguous cases. Filter for 0.5 < p < 0.8

# # plot and store label errors
# np.random.seed(0) # set seed because a random subset is considered

# filename_map = {}

# # --- CONFIGURATION ---
# image_dir = "label_error_imgs/ambiguous/" 
# if not os.path.exists(image_dir):
#     os.makedirs(image_dir)
# output_file = "label_error_imgs_ambiguous.tex"

# label_errors = validated_gt.copy()
# label_errors = label_errors[label_errors["label_error_type"] == "overlooked pedestrian"]
# label_errors = label_errors[label_errors["iou_with_dont_care"] < 0.5]
# label_errors = label_errors[label_errors["score"] < 0.8] # score is soft label probability filter for 0.5 is set already
# label_errors.reset_index(drop=True, inplace=True)

# N = 12

# indices = np.random.choice(label_errors.index, size = N, replace = False)

# for loop_index in indices:
#     index = label_errors.index[loop_index]

#     label_error = label_errors.iloc[index][['xmin', 'ymin', 'xmax', 'ymax', 'score']].values.reshape(1, -1)
#     image_id = label_errors.iloc[index][['filename']].values[0]

#     filename = f"label_error_{loop_index}.png"
#     filename_map[filename] = image_id

#     plot_and_store_label_error(image_id, pred_boxes=label_error, filename = image_dir + filename)


# # write latex code for inclusion in paper
# latex_lines = []
# images_per_row = 4

# for i, (img_name, original_name) in enumerate(filename_map.items()):
#     if i % images_per_row == 0:
#         latex_lines.append(r"\noindent")  # Start of a new row

#     latex_lines.append(
#         rf"""\begin{{minipage}}[t]{{0.24\linewidth}}
#   \centering
#   \includegraphics[width=\linewidth, , height=\linewidth, keepaspectratio]{{imgs/{image_dir}{img_name}}}
#   \captionsetup{{labelformat=empty, hypcap=false}}
#   \captionof{{figure}}{{{original_name}}}
# \end{{minipage}}"""
#     )

#     if (i + 1) % images_per_row == 0:
#         latex_lines.append(r"\par")  # Space between rows with \smallskip

# # Save LaTeX code to file
# with open(output_file, "w") as f:
#     f.write("\n".join(latex_lines))

In [45]:
# %%capture
# # This one requires filtering for p > 0.8 and height > 40 which was done before and needs to be added below if left out above
# # plot and store label errors
# np.random.seed(0) # if random subset is considered

# filename_map = {}

# label_errors = validated_gt.copy()

# # --- CONFIGURATION ---
# image_dir = "label_error_imgs/top_overlooked/" 
# if not os.path.exists(image_dir):
#     os.makedirs(image_dir)
# output_file = "label_error_imgs_top_overlooked.tex"

# label_errors = label_errors[label_errors["label_error_type"] == "overlooked pedestrian"]
# label_errors = label_errors[label_errors["iou_with_dont_care"] < 0.5]
# label_errors = label_errors.sort_values(by="score", ascending=False)
# label_errors.reset_index(drop=True, inplace=True)

# N = len(label_errors)

# for loop_index in range(N):
#     index = label_errors.index[loop_index]

#     label_error = label_errors.iloc[index][['xmin', 'ymin', 'xmax', 'ymax', 'score']].values.reshape(1, -1)
#     image_id = label_errors.iloc[index][['filename']].values[0]

#     filename = f"label_error_{loop_index}.png"
#     filename_map[filename] = image_id

#     plot_and_store_label_error(image_id, pred_boxes=label_error, filename = image_dir + filename)


# # write latex code for inclusion in paper
# latex_lines = []
# images_per_row = 4

# for i, (img_name, original_name) in enumerate(filename_map.items()):
#     if i % images_per_row == 0:
#         latex_lines.append(r"\noindent")  # Start of a new row

#     latex_lines.append(
#         rf"""\begin{{minipage}}[t]{{0.24\linewidth}}
#   \centering
#   \includegraphics[width=\linewidth, , height=\linewidth, keepaspectratio]{{imgs/{image_dir}{img_name}}}
#   \captionsetup{{labelformat=empty, hypcap=false}}
#   \captionof{{figure}}{{{original_name}}}
# \end{{minipage}}"""
#     )

#     if (i + 1) % images_per_row == 0:
#         latex_lines.append(r"\par")  # Space between rows with \smallskip

# # Save LaTeX code to file
# with open(output_file, "w") as f:
#     f.write("\n".join(latex_lines))


# ###

# # label_errors = validated_gt.copy()
# # label_errors = label_errors[label_errors["label_error_type"] == "overlooked pedestrian"]
# # label_errors = label_errors[label_errors["iou_with_dont_care"] < 0.5]
# # label_errors = label_errors[label_errors["height"] >= 40]
# # label_errors = label_errors[label_errors["score"] >= 0.8] # score is soft label probability
# # label_errors = label_errors.sort_values(by="score", ascending=False)
# # label_errors.reset_index(drop=True, inplace=True)



In [46]:
# %%capture
# # plot and store label errors
# np.random.seed(0) # if random subset is considered

# filename_map = {}

# label_errors = validated_gt.copy()

# # --- CONFIGURATION ---
# image_dir = "label_error_imgs/top_misfitting/" 
# if not os.path.exists(image_dir):
#     os.makedirs(image_dir)
# output_file = "label_error_imgs_top_misfitting.tex"

# label_errors = label_errors[label_errors["label_error_type"] == "misfitting box"]
# label_errors = label_errors[label_errors["iou_with_dont_care"] < 0.5]
# label_errors = label_errors.sort_values(by="iou_with_original_gt", ascending=True)
# label_errors.reset_index(drop=True, inplace=True)

# N = 20

# for loop_index in range(N):
#     index = label_errors.index[loop_index]

#     label_error = label_errors.iloc[index][['xmin', 'ymin', 'xmax', 'ymax', 'score']].values.reshape(1, -1)
#     image_id = label_errors.iloc[index][['filename']].values[0]

#     filename = f"label_error_{loop_index}.png"
#     filename_map[filename] = image_id

#     plot_and_store_label_error(image_id, pred_boxes=label_error, filename = image_dir + filename)


# # write latex code for inclusion in paper
# latex_lines = []
# images_per_row = 4

# for i, (img_name, original_name) in enumerate(filename_map.items()):
#     if i % images_per_row == 0:
#         latex_lines.append(r"\noindent")  # Start of a new row

#     latex_lines.append(
#         rf"""\begin{{minipage}}[t]{{0.24\linewidth}}
#   \centering
#   \includegraphics[width=\linewidth, , height=\linewidth, keepaspectratio]{{imgs/{image_dir}{img_name}}}
#   \captionsetup{{labelformat=empty, hypcap=false}}
#   \captionof{{figure}}{{{original_name}}}
# \end{{minipage}}"""
#     )

#     if (i + 1) % images_per_row == 0:
#         latex_lines.append(r"\par")  # Space between rows with \smallskip

# # Save LaTeX code to file
# with open(output_file, "w") as f:
#     f.write("\n".join(latex_lines))

# # label_errors = validated_gt.copy()
# # label_errors = label_errors[label_errors["label_error_type"] == "misfitting box"]
# # label_errors = label_errors[label_errors["iou_with_dont_care"] < 0.5]
# # label_errors = label_errors[label_errors["height"] >= 40]
# # label_errors = label_errors[label_errors["score"] >= 0.8] # score is soft label probability
# # label_errors = label_errors.sort_values(by="iou_with_original_gt", ascending=True)
# # label_errors.reset_index(drop=True, inplace=True)

In [47]:
# # manually generate larger crops for three of those images.

# def plot_and_store_label_error(image_id, pred_boxes=None, add_pred_boxes=None, scores_pred_boxes = None, scores_add_pred_boxes = None, filename=None):
#     image_id = image_id.split('.')[0]  # Remove file extension if present
#     image_path = f'{path_to_kitti}/image_2/{image_id}.png'
#     label_path = f'{path_to_kitti}label_2/{image_id}.txt'
#     image = cv2.imread(image_path)
#     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

#     # Draw ground truth boxes
#     gt_boxes = read_kitti_labels(label_path)
#     for cls, (left, top, right, bottom) in gt_boxes:
#         if cls == "Pedestrian": # we only care about pedestrians
#             pt1 = (int(left), int(top))
#             pt2 = (int(right), int(bottom))
#             cv2.rectangle(image, pt1, pt2, color=(0, 255, 0), thickness=2)

#     # Draw predicted boxes (if provided)
#     if pred_boxes is not None:
#         for i, pred in enumerate(pred_boxes):
#             xmin, ymin, xmax, ymax = pred[:4]
#             pt1 = (int(xmin), int(ymin))
#             pt2 = (int(xmax), int(ymax))
#             cv2.rectangle(image, pt1, pt2, color=(255, 0, 0), thickness=2)  # Red
#             if scores_pred_boxes is not None:
#                 score = scores_pred_boxes[i]
#                 label = f" ({score:.4f})"
#                 cv2.putText(image, label, (int(xmin), int(ymin) - 5), cv2.FONT_HERSHEY_SIMPLEX,
#                         0.5, (255, 0, 0), 1)

#             # Crop the image around the center of the bounding box
#             center_x = int((xmin + xmax) / 2)
#             center_y = int((ymin + ymax) / 2)
#             crop_x_min = max(center_x - 200, 0)
#             crop_y_min = max(center_y - 200, 0)
#             crop_x_max = min(center_x + 200, image.shape[1])
#             crop_y_max = min(center_y + 200, image.shape[0])
#             cropped_image = image[crop_y_min:crop_y_max, crop_x_min:crop_x_max]

#             # Display the cropped image
#             plt.figure(figsize=(5, 5))
#             plt.imshow(cropped_image)
#             plt.axis('off')
#             if filename:
#                 plt.savefig(filename, dpi = 100)
#             plt.show()


# indices = [11, 16, 19]

# for loop_index in indices:
#     index = label_errors.index[loop_index]

#     label_error = label_errors.iloc[index][['xmin', 'ymin', 'xmax', 'ymax', 'score']].values.reshape(1, -1)
#     image_id = label_errors.iloc[index][['filename']].values[0]

#     filename = f"label_error_{loop_index}.png"
#     filename_map[filename] = image_id

#     plot_and_store_label_error(image_id, pred_boxes=label_error, filename = image_dir + filename)

# 2 Evaluate proposed label errors of label error detection methods

In [48]:
def predict_label_errors(df, top_n, threshold_label_error, target_col = "target", prediction_col = "prediction"):
    # this function returns n predictions for label errors based on the meta-regression performed by MetaDetect
    # for this, the bounding boxes with true IoU < thresh are ordered in descending order according to the predicted IoU

    df = df[ df[target_col] < threshold_label_error] # those predicted boxes that have a true IoU below the threshold are considered label error proposals
    
    # Note that for a metaclassification model the target col is binary and the threshold was set in Metadetect 
    df = df.sort_values(by = prediction_col, ascending=False)
    if top_n == "all":
        return df
    else:
        return df[:top_n]

## 2.1 MetaDetect

The MetaDetect model predicts IoU values for all detected boxes with scores above a certain threshold

In [54]:
## apply all steps for evaluation here for benchmarking purposes

md_casc = pd.read_csv(f"data/predictions/cascade_rcnn.csv")

In [None]:
iou_threshold_TP = 0.1  # IoU threshold for considering a box as a true positive (Comparison of predicted boxes and validated GT boxes)

nms_threshold = 0.5  # We can further remove overlapping boxes from the predictions via NMS. Set to None to disable NMS

In [57]:
md_casc = predict_label_errors(md_casc, "all", iou_threshold_misfitting_box)

KeyError: 'target'

In [55]:
md_casc.head()

Unnamed: 0,filename,xmin,ymin,xmax,ymax,score
0,000002.png,308.440491,0.0,1242.0,375.0,0.060403
1,000002.png,6.365904,179.167618,200.370529,370.994568,0.038095
2,000002.png,933.774231,166.599594,972.888855,264.797424,0.026021
3,000002.png,688.459717,169.650024,716.337524,224.204132,0.025676
4,000002.png,922.277832,165.763916,1009.1875,324.516968,0.015925


### 2.1.1. MetaDetect on CascadeRCNN Predictions

In [49]:
# parameters
object_detector_score_threshold = 0.01 # FIXED ATM. Would require re-runnning MetaDetect
object_detector = "cascadercnn"

iou_threshold_label_error = iou_threshold_misfitting_box  # IoU threshold for considering a predicted box as a label error proposal (Comparison of predicted boxes and original GT boxes)
#####
# NOTE: In MetaDetect for a metaclassification model the iou_threshold_label_error is set internally (0.5) and accordingly, Boolean values are returned -> Changing it requires re-running MetaDetect
#####

iou_threshold_TP = 0.1  # IoU threshold for considering a box as a true positive (Comparison of predicted boxes and validated GT boxes)

nms_threshold = 0.5  # We can further remove overlapping boxes from the predictions via NMS. Set to None to disable NMS

n = "all" # first n label error proposals not used anymore # n = 100

from tools_label_error_detection import prepare_data, analysis_plots

In [52]:
# md_casc = prepare_data(object_detector = object_detector, object_detector_score_threshold = object_detector_score_threshold,
#                         filter_small_boxes = filter_small_boxes, min_bbox_height = min_bbox_height, nms_threshold = nms_threshold)

md_casc = pd.read_csv(f"data/predictions/cascade_rcnn.csv")

# X = md_casc["target"].values
# Y = md_casc["prediction"].values
# fig = plt.figure()
# ax = plt.gca()
# ax.scatter(X,Y, s = 3)
# plt.xlabel("Target")
# plt.ylabel("Prediction")
# plt.show()

In [53]:
md_casc.head()

Unnamed: 0,filename,xmin,ymin,xmax,ymax,score
0,000002.png,308.440491,0.0,1242.0,375.0,0.060403
1,000002.png,6.365904,179.167618,200.370529,370.994568,0.038095
2,000002.png,933.774231,166.599594,972.888855,264.797424,0.026021
3,000002.png,688.459717,169.650024,716.337524,224.204132,0.025676
4,000002.png,922.277832,165.763916,1009.1875,324.516968,0.015925


In [None]:
# # review that NMS has been applied succesfully indeed

# orig_ious = []
# for i, row in md_casc.iterrows():
#     # Get the corresponding predicted box
#     pred_box = torch.tensor([row['xmin'], row['ymin'], row['xmax'], row['ymax']], dtype=torch.float32).unsqueeze(0)
    
#     # Get the ground truth boxes for the same image
#     other_boxes = md_casc[(md_casc['filename'] == row['filename']) & (md_casc.index != i)][['xmin', 'ymin', 'xmax', 'ymax']].values
    
#     if len(other_boxes) > 0:
#         other_boxes_tensor = torch.tensor(other_boxes, dtype=torch.float32)
#         iou = box_iou(pred_box, other_boxes_tensor).squeeze().numpy()
#         orig_ious.append(iou.max())
#     else:
#         orig_ious.append(0)

# np.array(orig_ious).max()

In [None]:
# merge with object detector predictions and export
json_file = pd.read_json(f"data/KITTI/predictions/results_{object_detector}_val_pedestrians_score_thresh_{object_detector_score_threshold}.json")
md_casc_scores = []
for filename in json_file["filename"]:
    scores = md_casc[md_casc["filename"] == filename]["prediction"].values
    md_casc_scores.append(scores)
    
json_file["metadetect_scores"] = md_casc_scores
json_file.to_json("cascade_rcnn_predictions_with_exported_scores.json", orient="index", indent=4)

In [None]:
md_casc = predict_label_errors(md_casc, n, iou_threshold_label_error)
md_casc.reset_index(drop=True, inplace=True)
md_casc.head(2)

In [None]:
md_casc = get_ious_with_orig_and_val_gt(md_casc)
md_casc = filter_data_for_conditions(md_casc)

In [None]:
# display the first few proposed label errors
for i in range(n_images_to_display): 
    # Display the first few proposed label errors
    image_id = md_casc['filename'][i]
    image_id = image_id.split('.')[0]  # Remove file extension if present
    # Read the predicted boxes from the DataFrame
    pred_boxes = md_casc.iloc[i][['xmin', 'ymin', 'xmax', 'ymax', 'score']].values.reshape(1, -1)

    draw_boxes(f'{path_to_kitti}/image_2/{image_id}.png', f'{path_to_kitti}label_2/{image_id}.txt', pred_boxes=pred_boxes)

In [None]:
# # evaluate the proposed label errors
# def evaluate_proposed_label_errors(df, iou_threshold):
#     # this function evaluates the proposed label errors by checking if there is a box in the val. GT for which the IoU with the predicted box exceeds the threshold
#     # df is assumed to contain the columns 'filename', 'xmin', 'ymin', 'xmax', 'ymax' and 'score' for the predicted boxes and filtered according to no IoU > thresh with original GT boxes
#     # the column 'TP' will be added to indicate whether the predicted box is a true positive (TP) or not i.e. an actual label error


#     df['TP'] = False  # Initialize a column for true positives
#     for i, row in df.iterrows(): # over all proposed label errors
#         # Get the predicted box
#         pred_box = torch.tensor([row['xmin'], row['ymin'], row['xmax'], row['ymax']], dtype=torch.float32).unsqueeze(0)
#         # Get the validated ground truth boxes for the same image
#         gt_boxes = validated_gt[validated_gt['filename'] == row['filename']][['xmin', 'ymin', 'xmax', 'ymax']].values
        
#         if len(gt_boxes) > 0: # only if there are validated ground truth boxes for this image the prediction might be a true positive, else TP[i] simply remains False.
#             gt_boxes_tensor = torch.tensor(gt_boxes, dtype=torch.float32)
#             ious = box_iou(pred_box, gt_boxes_tensor).squeeze().numpy()
#             if np.any(ious >= iou_threshold):
#                 df.at[i, 'TP'] = True  # Mark as true positive if IoU exceeds the threshold for any of the val. GT boxes
#     return df

def evaluate_proposed_label_errors(proposal_df, validated_gt):

    proposal_df["TP"] = [False]*len(proposal_df)
    overlooked_objects = 0
    misfitting_boxes = 0
    label_errors = 0
    validated_gt["matched"] = validated_gt["iou_with_original_gt"] > iou_threshold_misfitting_box # initialize boolean column with matches of original annotations

    # loop over label error proposals and match them to val GT to simulate refinement of annotations

    for i, row in proposal_df.iterrows(): # assuming it is ordered according to a meta model score

        proposal = torch.tensor([row['xmin'], row['ymin'], row['xmax'], row['ymax']], dtype=torch.float32).unsqueeze(0)
            
        # Get the val ground truth boxes for the same image
        val_gt_boxes = validated_gt[validated_gt['filename'] == row['filename']][['xmin', 'ymin', 'xmax', 'ymax']].values
        indices = validated_gt[validated_gt['filename'] == row['filename']][['xmin', 'ymin', 'xmax', 'ymax']].index
        
        if len(val_gt_boxes) > 0: # only if there are val GT annoations in this image, consider them for IoU matching, else continue with the next proposal
            val_gt_boxes_tensor = torch.tensor(val_gt_boxes, dtype=torch.float32)
            iou = box_iou(proposal, val_gt_boxes_tensor).squeeze().numpy()

            if iou.max() > iou_threshold_TP: # if there is a matching box
                index = indices[iou.argmax()]
                if not validated_gt.at[index, "matched"]:  # Check if the box is not already matched
                    validated_gt.at[index, "matched"] = True  # Flag this box as matched
                    label_errors += 1
                    proposal_df.at[i, "TP"] = True

                    # here we would add IoU computation with orig. GT to distinguish overlooked and misfitting
    return proposal_df  

# label_errors =  evaluate_proposed_label_errors(md_casc, validated_gt, iou_threshold_TP)
# print(f"Number of proposed label errors: {len(md_casc)}")
# print(f"Number of true positives: {label_errors}", f"TP rate: {label_errors / len(md_casc)}")

md_casc = evaluate_proposed_label_errors(md_casc, validated_gt)
md_casc.to_json(f"{object_detector}_proposed_label_errors_MetaDetect_kitti.json", orient = "index")
print(f"Number of proposed label errors: {len(md_casc)}")
print(f"Number of true positives: {len(md_casc[md_casc['TP'] == True])}", f"TP rate: {len(md_casc[md_casc['TP'] == True]) / len(md_casc)}")
print(f"Number of false positives: {len(md_casc[md_casc['TP'] == False])}")

In [None]:
analysis_plots(md_casc, score_col = "prediction", method="MetaDetect", object_detector = "cascadercnn")

In [None]:
md_casc.head()

In [None]:
def get_costs_and_fnr(proposal_df, validated_gt, cost_factor, iou_threshold_TP):

    # assuming proposal_df contains the label error proposals in the order of likelihood
    # validated_gt is a dataframe containing val. GT bboxes and the IoUs of which with the original GT annotations
    # both dataframes should be filtered according to the conditions imposed i.e. dont care regions and object height. proposal_df should also filtered for IoU < 0.5 with original annotations (only FPs)

    costs = []
    FNRs = []
    validated_gt["matched"] = validated_gt["iou_with_original_gt"] > iou_threshold_TP # initialize boolean column with matches of original annotations

    # initial values
    costs.append(0)
    FNRs.append(len(validated_gt[ validated_gt["matched"] == False ]) / len(validated_gt))

    # loop over label error proposals and match them to val GT to simulate refinement of annotations

    for i, row in proposal_df.iterrows(): # assuming it is ordered according to a meta model score

        costs.append((i+1)*cost_factor )

        proposal = torch.tensor([row['xmin'], row['ymin'], row['xmax'], row['ymax']], dtype=torch.float32).unsqueeze(0)
            
        # Get the val ground truth boxes for the same image
        val_gt_boxes = validated_gt[validated_gt['filename'] == row['filename']][['xmin', 'ymin', 'xmax', 'ymax']].values
        indices = validated_gt[validated_gt['filename'] == row['filename']][['xmin', 'ymin', 'xmax', 'ymax']].index
        
        if len(val_gt_boxes) > 0: # only if there are val GT annoations in this image, consider them for IoU matching, else continue with the next proposal
            val_gt_boxes_tensor = torch.tensor(val_gt_boxes, dtype=torch.float32)
            iou = box_iou(proposal, val_gt_boxes_tensor).squeeze().numpy()

            if iou.max() > iou_threshold_TP: # if there is a matching box
                index = indices[iou.argmax()]
                validated_gt.at[index, "matched"] = True  # flag this box as matched. If it was matched already, it does not count twice.

        FNRs.append(len(validated_gt[ validated_gt["matched"] == False ]) / len(validated_gt))

    return costs, FNRs

In [None]:
print(validated_gt["height"].min(), validated_gt["iou_with_dont_care"].max())
print(original_gt["height"].min(), original_gt["iou_with_dont_care"].max())

In [None]:
cost_factor = 15.59 #  costs per bbox

md_casc_costs, md_casc_FNRs = get_costs_and_fnr(md_casc, validated_gt, cost_factor, iou_threshold_TP)

In [None]:
fig = plt.figure()
ax = plt.gca()
ax.plot(md_casc_costs, md_casc_FNRs)

plt.xlabel("Costs in cents")
plt.ylabel("FNR")

plt.show()

### 2.1.2 MetaDetect on YOLOX Predictions

In [None]:
object_detector = "yolox"

md_yolox = prepare_data(object_detector = object_detector, object_detector_score_threshold = object_detector_score_threshold,
                        filter_small_boxes = filter_small_boxes, min_bbox_height = min_bbox_height, nms_threshold = nms_threshold)

X = md_yolox["target"].values
Y = md_yolox["prediction"].values
fig = plt.figure()
ax = plt.gca()
ax.scatter(X,Y, s = 3)
plt.xlabel("Target")
plt.ylabel("Prediction")
plt.show()

In [None]:
# review that NMS has been applied succesfully indeed

orig_ious = []
for i, row in md_yolox.iterrows():
    # Get the corresponding predicted box
    pred_box = torch.tensor([row['xmin'], row['ymin'], row['xmax'], row['ymax']], dtype=torch.float32).unsqueeze(0)
    
    # Get the ground truth boxes for the same image
    other_boxes = md_yolox[(md_yolox['filename'] == row['filename']) & (md_yolox.index != i)][['xmin', 'ymin', 'xmax', 'ymax']].values
    
    if len(other_boxes) > 0:
        other_boxes_tensor = torch.tensor(other_boxes, dtype=torch.float32)
        iou = box_iou(pred_box, other_boxes_tensor).squeeze().numpy()
        orig_ious.append(iou.max())
    else:
        orig_ious.append(0)

np.array(orig_ious).max()

In [None]:
# merge with object detector predictions and export
json_file = pd.read_json(f"data/KITTI/predictions/results_{object_detector}_val_pedestrians_score_thresh_{object_detector_score_threshold}.json")
md_yolox_scores = []
for filename in json_file["filename"]:
    scores = md_yolox[md_yolox["filename"] == filename]["prediction"].values
    md_yolox_scores.append(scores)
    
json_file["metadetect_scores"] = md_yolox_scores
json_file.to_json("yolox_predictions_with_exported_scores.json", orient="index", indent=4)

In [None]:
md_yolox = predict_label_errors(md_yolox, n, iou_threshold_label_error)
md_yolox.reset_index(drop=True, inplace=True)
md_casc = get_ious_with_orig_and_val_gt(md_casc)
md_casc = filter_data_for_conditions(md_casc)
md_yolox.head(2)

In [None]:
# # compare annotations of the original ground truth with the predictions of YOLOX
# image_ids = np.random.choice(md_yolox['filename'].unique(), size = n_images_to_display, replace=False) # Get unique image filenames
# for image_id in image_ids: 
#     # Read the predicted boxes from the DataFrame
#     box_indices = md_yolox[md_yolox['filename'] == image_id].index
#     boxes = [md_yolox.iloc[i][['xmin', 'ymin', 'xmax', 'ymax', 'score']].values.reshape(1, -1) for i in box_indices]
#     boxes = np.vstack(boxes)  # Stack all predicted boxes for this image
#     image_id = image_id.split('.')[0]  # Remove file extension if present
#     draw_boxes(f'{path_to_kitti}image_2/{image_id}.png', f'{path_to_kitti}label_2/{image_id}.txt', pred_boxes=boxes)

In [None]:
# display the first few proposed label errors
for i in range(n_images_to_display): 
    # Display the first few proposed label errors
    image_id = md_yolox['filename'][i]
    image_id = image_id.split('.')[0]  # Remove file extension if present
    # Read the predicted boxes from the DataFrame
    pred_boxes = md_yolox.iloc[i][['xmin', 'ymin', 'xmax', 'ymax', 'score']].values.reshape(1, -1)

    draw_boxes(f'{path_to_kitti}/image_2/{image_id}.png', f'{path_to_kitti}label_2/{image_id}.txt', pred_boxes=pred_boxes)

In [None]:
md_yolox = evaluate_proposed_label_errors(md_yolox, validated_gt)
md_yolox.to_json(f"{object_detector}_proposed_label_errors_MetaDetect_kitti.json", orient = "index")
print(f"Number of proposed label errors: {len(md_yolox)}")
print(f"Number of true positives: {len(md_yolox[md_yolox['TP'] == True])}")
print(f"Number of false positives: {len(md_yolox[md_yolox['TP'] == False])}")
print(f"TP rate: {len(md_yolox[md_yolox['TP'] == True]) / len(md_yolox)}")

In [None]:
analysis_plots(md_yolox, score_col = "prediction", method="MetaDetect", object_detector = "yolox")

In [None]:
md_yolox = get_ious_with_orig_and_val_gt(md_yolox)
md_yolox = filter_data_for_conditions(md_yolox)

md_yolox_costs, md_yolox_FNRs = get_costs_and_fnr(md_yolox, validated_gt, cost_factor, iou_threshold_TP)

In [None]:
fig = plt.figure()
ax = plt.gca()
ax.plot(md_casc_costs, md_casc_FNRs, label = "Cascade R-CNN + MetaDetect")
ax.plot(md_yolox_costs, md_yolox_FNRs, label = "YOLOX + MetaDetect")

plt.xlabel("Costs in cents")
plt.ylabel("FNR")

plt.legend()

plt.savefig("plots/FNRS.png")
plt.show()

## 2.2 Cleanlab

Needs to be updated such that determining label errors is correct.

In [None]:
from cleanlab.object_detection.filter import find_label_issues, compute_scores_boxwise
from cleanlab.object_detection.rank import (
    _separate_label,
    _separate_prediction,
    get_label_quality_scores,
    issues_from_scores,
)
from cleanlab.object_detection.summary import visualize

from tools_label_error_detection import construct_cleanlab_input

### 2.2.1 Cleanlab on Cascade RCNN Predictions

In [None]:
n_classes = 8
thresh = 0.01
object_detector = "cascadercnn"

path_to_predictions = f"data/KITTI/predictions/results_{object_detector}_val_pedestrians_score_thresh_{0.01}.json"

labels, predictions = construct_cleanlab_input(path_to_kitti, split, path_to_predictions, n_classes, thresh)

In [None]:
# to get boolean vector of label issues for all images
has_label_issue = find_label_issues(labels, predictions, return_indices_ranked_by_score=True)
num_examples_to_show = n_images_to_display # view this many images flagged with the most severe label issues
has_label_issue[:num_examples_to_show]

# to get label quality scores for all images
label_quality_scores = get_label_quality_scores(labels, predictions)
label_quality_scores[:num_examples_to_show]

issue_idx = issues_from_scores(label_quality_scores, threshold=0.5)  # lower threshold will return fewer (but more confident) label issues
issue_idx[:num_examples_to_show], label_quality_scores[issue_idx][:num_examples_to_show]

In [None]:
for idx in issue_idx[:num_examples_to_show]:
    issue_to_visualize = idx  # change this to view other images
    class_names = {"2": "pedestrian"}

    label = labels[issue_to_visualize]
    prediction = predictions[issue_to_visualize]
    score = label_quality_scores[issue_to_visualize]
    image_path = os.path.join(path_to_kitti, 'image_2', label['filename'])  # Assuming images are in 'image_2' folder
    # image_path = "/home/penquitt/KITTI_pedestrian/val/images/" + label['img_name']

    print(image_path, '| idx', issue_to_visualize , '| label quality score:', score, '| is issue: True')
    visualize(image_path, label=label, prediction=prediction, class_names=class_names, overlay=False, save_path=None)

In [None]:
# bad loc scores and swapped scores are computed for every bbox in the GT
# overlooked_scores are computed for every bbox in the predictions

# internal high probability threshold is 0.95 which is way to high for our purposes -> changed it to 0 so that all predictions are considered

# Now it only computes scores if the predicted prob. is above the threshold AND there is no GT bbox with a non-zero IoU
# One might consider introducing a lower threshold for the IoU by means of which a bbox is considered as being "overlooked"?

overlooked_scores, bad_loc_scores, swapped_scores = compute_scores_boxwise(labels, predictions)

In [None]:
# fill nan values with 1 and then set 1 - scores as the probability for the object to be overlooked
overlooked_scores = [np.nan_to_num(arr, nan=1.0) for arr in overlooked_scores]
overlooked_probs = [1 - arr for arr in overlooked_scores]
overlooked_probs = np.concatenate([arr for arr in overlooked_probs if arr.size > 0])

bad_loc_scores = [np.nan_to_num(arr, nan=1.0) for arr in bad_loc_scores]
bad_loc_probs = [1 - arr for arr in bad_loc_scores]
bad_loc_probs = np.concatenate([arr for arr in bad_loc_probs if arr.size > 0])

swapped_scores = [np.nan_to_num(arr, nan=1.0) for arr in swapped_scores]
swapped_probs = [1 - arr for arr in swapped_scores]
swapped_probs = np.concatenate([arr for arr in swapped_probs if arr.size > 0])

fig, ax = plt.subplots(1, 3, figsize=(15, 4))
ax[0].hist(overlooked_probs, bins=50, alpha=0.7)
ax[0].set_title('Overlooked')
ax[0].set_xlabel('Probability')
ax[0].set_ylabel('Frequency')
ax[1].hist(bad_loc_probs, bins=50, alpha=0.7)
ax[1].set_title('Bad Location')
ax[1].set_xlabel('Probability')
ax[1].set_ylabel('Frequency')
ax[2].hist(swapped_probs, bins=50, alpha=0.7)
ax[2].set_title('Swapped Class')
ax[2].set_xlabel('Probability')
ax[2].set_ylabel('Frequency')

plt.savefig(f"plots/cleanlab_scores_{object_detector}.png", bbox_inches='tight')
plt.show()

In [None]:
# bad location score -> label error proposals
bad_loc_df = pd.DataFrame({
    'filename': [label['filename'] for label in labels for _ in range(len(label['bboxes']))],
    'xmin': [bbox[0] for label in labels for bbox in label['bboxes']],
    'ymin': [bbox[1] for label in labels for bbox in label['bboxes']],
    'xmax': [bbox[2] for label in labels for bbox in label['bboxes']],
    'ymax': [bbox[3] for label in labels for bbox in label['bboxes']],
    'score': [score for label_scores in bad_loc_scores for score in label_scores],
    'prob': [1 - score for label_scores in bad_loc_scores for score in label_scores],
    })

In [None]:
# get IoU with validated GT for each GT box in the bad location DataFrame to determine label errors of this type
bad_loc_df = get_ious_with_orig_and_val_gt(bad_loc_df)

In [None]:
bad_loc_df = bad_loc_df.sort_values(by = "prob", ascending=False)

bad_loc_df["TP"] = bad_loc_df["iou_with_val_gt"] < iou_threshold_misfitting_box

print(f"Number of GT Boxes: {len(bad_loc_df)}")
print(f"Out of which label errors due to bad location: {len(bad_loc_df[bad_loc_df['TP'] == True])}")

print(bad_loc_df["iou_with_val_gt"].min()) # 0 -> there are boxes in the original GT that are not in the val. GT

bad_loc_df.head()

In [None]:
X = bad_loc_df["prob"].values
Y = bad_loc_df["iou_with_val_gt"].values

fig = plt.figure()
ax = plt.gca()
ax.scatter(X,Y, s = 3)
plt.xlabel("Pred. Probability of Bad Location")
plt.ylabel("IoU of GT box with Val. GT")

plt.show()

In [None]:
analysis_plots(bad_loc_df, score_col = "prob", method="Cleanlab_bad_loc", object_detector = "cascadercnn")

In [None]:
# overlooked score -> prediction error proposals
overlooked_df = pd.DataFrame({
    'filename': [labels[i]['filename'] for i in range(len(predictions)) for _ in range(len(predictions[i][2]))], # labels and predictions are per image in the same order of images
    'xmin': [bbox[0] for prediction in predictions for bbox in prediction[2]],
    'ymin': [bbox[1] for prediction in predictions for bbox in prediction[2]],
    'xmax': [bbox[2] for prediction in predictions for bbox in prediction[2]],
    'ymax': [bbox[3] for prediction in predictions for bbox in prediction[2]],
    'cleanlab_overlooked_score': [score for prediction_scores in overlooked_scores for score in prediction_scores],
    'prob': [float(1.0 - score) for prediction_scores in overlooked_scores for score in prediction_scores],
    })

# # get IoU with original GT for each predicted box to select label error proposals (IoU < 0.5)
# # get IoU with validated GT for each predicted box in the overlooked DataFrame to determine label errors of this type
overlooked_df = get_ious_with_orig_and_val_gt(overlooked_df)

In [None]:
# merge with object detector predictions and export
json_file = pd.read_json("cascade_rcnn_predictions_with_exported_scores.json").T # transpose here
cleanlab_scores = []
for filename in json_file["filename"]:
    scores = overlooked_df[overlooked_df["filename"] == filename]["prob"].values
    cleanlab_scores.append(scores)
    
json_file["cleanlab_overlooked_scores"] = cleanlab_scores
json_file.to_json("cascade_rcnn_predictions_with_exported_scores.json", orient="index", indent=4)

In [None]:
# get label error proposals and evaluate them
overlooked_df = overlooked_df[overlooked_df['iou_with_original_gt'] < iou_threshold_label_error]  # Filter for label error proposals (orig. IoU < threshold)
overlooked_df = overlooked_df.sort_values(by = "prob", ascending=False)

overlooked_df["TP"] = overlooked_df["iou_with_val_gt"] >= iou_threshold_TP

print(f"Number of predicted Boxes: {len(overlooked_df)}")
print(f"Number of Label Errors detected through predicted boxes: {len(overlooked_df[(overlooked_df['TP'] == True)])}")
print(f"Out of which label errors due to the box being overlooked previously (orig. IoU = 0): {len(overlooked_df[(overlooked_df['TP'] == True) & (overlooked_df['iou_with_original_gt'] == 0)])}")
print(f"Out of which label errors due to a misfitting bbox (0 < orig. IoU < thresh = {iou_threshold_label_error}): {len(overlooked_df[(overlooked_df['TP'] == True) & (overlooked_df['iou_with_original_gt'] > 0)])}")
overlooked_df.head()

In [None]:
X = overlooked_df["prob"].values
Y = overlooked_df["iou_with_val_gt"].values

fig = plt.figure()
ax = plt.gca()
ax.scatter(X,Y, s = 3)
plt.xlabel("Pred. Probability of Overlooked Object")
plt.ylabel("IoU of GT box with Val. GT")

plt.show()

In [None]:
analysis_plots(overlooked_df, score_col = "prob", method="Cleanlab_overlooked", object_detector = "cascadercnn")

### 2.2.2 Cleanlab on YOLOX Predictions

In [None]:
object_detector = "yolox"

path_to_predictions = f"data/KITTI/predictions/results_{object_detector}_val_pedestrians_score_thresh_{0.01}.json"

labels, predictions = construct_cleanlab_input(path_to_kitti, split, path_to_predictions, n_classes, thresh)

# to get boolean vector of label issues for all images
has_label_issue = find_label_issues(labels, predictions, return_indices_ranked_by_score=True)
num_examples_to_show = n_images_to_display # view this many images flagged with the most severe label issues
has_label_issue[:num_examples_to_show]

# to get label quality scores for all images
label_quality_scores = get_label_quality_scores(labels, predictions)
label_quality_scores[:num_examples_to_show]

issue_idx = issues_from_scores(label_quality_scores, threshold=0.5)  # lower threshold will return fewer (but more confident) label issues
issue_idx[:num_examples_to_show], label_quality_scores[issue_idx][:num_examples_to_show]

In [None]:
for idx in issue_idx[:num_examples_to_show]:
    issue_to_visualize = idx  # change this to view other images
    class_names = {"2": "pedestrian"}

    label = labels[issue_to_visualize]
    prediction = predictions[issue_to_visualize]
    score = label_quality_scores[issue_to_visualize]
    image_path = os.path.join(path_to_kitti, 'image_2', label['filename'])  # Assuming images are in 'image_2' folder
    # image_path = "/home/penquitt/KITTI_pedestrian/val/images/" + label['img_name']

    print(image_path, '| idx', issue_to_visualize , '| label quality score:', score, '| is issue: True')
    visualize(image_path, label=label, prediction=prediction, class_names=class_names, overlay=False, save_path=None)

In [None]:
overlooked_scores, bad_loc_scores, swapped_scores = compute_scores_boxwise(labels, predictions)

# fill nan values with 1 and then set 1 - scores as the probability for the object to be overlooked
overlooked_scores = [np.nan_to_num(arr, nan=1.0) for arr in overlooked_scores]
overlooked_probs = [1 - arr for arr in overlooked_scores]
overlooked_probs = np.concatenate([arr for arr in overlooked_probs if arr.size > 0])

bad_loc_scores = [np.nan_to_num(arr, nan=1.0) for arr in bad_loc_scores]
bad_loc_probs = [1 - arr for arr in bad_loc_scores]
bad_loc_probs = np.concatenate([arr for arr in bad_loc_probs if arr.size > 0])

swapped_scores = [np.nan_to_num(arr, nan=1.0) for arr in swapped_scores]
swapped_probs = [1 - arr for arr in swapped_scores]
swapped_probs = np.concatenate([arr for arr in swapped_probs if arr.size > 0])

fig, ax = plt.subplots(1, 3, figsize=(15, 4))
ax[0].hist(overlooked_probs, bins=50, alpha=0.7)
ax[0].set_title('Overlooked')
ax[0].set_xlabel('Probability')
ax[0].set_ylabel('Frequency')
ax[1].hist(bad_loc_probs, bins=50, alpha=0.7)
ax[1].set_title('Bad Location')
ax[1].set_xlabel('Probability')
ax[1].set_ylabel('Frequency')
ax[2].hist(swapped_probs, bins=50, alpha=0.7)
ax[2].set_title('Swapped Class')
ax[2].set_xlabel('Probability')
ax[2].set_ylabel('Frequency')

plt.savefig(f"plots/cleanlab_scores_{object_detector}.png", bbox_inches='tight')
plt.show()

In [None]:
# bad location score -> label error proposals
bad_loc_df = pd.DataFrame({
    'filename': [label['filename'] for label in labels for _ in range(len(label['bboxes']))],
    'xmin': [bbox[0] for label in labels for bbox in label['bboxes']],
    'ymin': [bbox[1] for label in labels for bbox in label['bboxes']],
    'xmax': [bbox[2] for label in labels for bbox in label['bboxes']],
    'ymax': [bbox[3] for label in labels for bbox in label['bboxes']],
    'score': [score for label_scores in bad_loc_scores for score in label_scores],
    'prob': [1 - score for label_scores in bad_loc_scores for score in label_scores],
    })

# get IoU with validated GT for each GT box in the bad location DataFrame to determine label errors of this type
bad_loc_df = get_ious_with_orig_and_val_gt(bad_loc_df)

bad_loc_df = bad_loc_df.sort_values(by = "prob", ascending=False)

bad_loc_df["TP"] = bad_loc_df["iou_with_val_gt"] < iou_threshold_misfitting_box

print(f"Number of GT Boxes: {len(bad_loc_df)}")
print(f"Out of which label errors due to bad location: {len(bad_loc_df[bad_loc_df['TP'] == True])}")

X = bad_loc_df["prob"].values
Y = bad_loc_df["iou_with_val_gt"].values

fig = plt.figure()
ax = plt.gca()
ax.scatter(X,Y, s = 3)
plt.xlabel("Pred. Probability of Bad Location")
plt.ylabel("IoU of GT box with Val. GT")

plt.show()

In [None]:
analysis_plots(bad_loc_df, score_col = "prob", method="Cleanlab_bad_loc", object_detector = "cascadercnn")

In [None]:
# overlooked score -> prediction error proposals
overlooked_df = pd.DataFrame({
    'filename': [labels[i]['filename'] for i in range(len(predictions)) for _ in range(len(predictions[i][2]))], # labels and predictions are per image in the same order of images
    'xmin': [bbox[0] for prediction in predictions for bbox in prediction[2]],
    'ymin': [bbox[1] for prediction in predictions for bbox in prediction[2]],
    'xmax': [bbox[2] for prediction in predictions for bbox in prediction[2]],
    'ymax': [bbox[3] for prediction in predictions for bbox in prediction[2]],
    'cleanlab_overlooked_score': [score for prediction_scores in overlooked_scores for score in prediction_scores],
    'prob': [float(1.0 - score) for prediction_scores in overlooked_scores for score in prediction_scores],
    })

# # get IoU with original GT for each predicted box to select label error proposals (IoU < 0.5)
# # get IoU with validated GT for each predicted box in the overlooked DataFrame to determine label errors of this type
overlooked_df = get_ious_with_orig_and_val_gt(overlooked_df)

# merge with object detector predictions and export
json_file = pd.read_json("yolox_predictions_with_exported_scores.json").T # transpose here
cleanlab_scores = []
for filename in json_file["filename"]:
    scores = overlooked_df[overlooked_df["filename"] == filename]["prob"].values
    cleanlab_scores.append(scores)
    
json_file["cleanlab_overlooked_scores"] = cleanlab_scores
json_file.to_json("yolox_predictions_with_exported_scores.json", orient="index", indent=4)

## 2.3 Loss-based Method

In [None]:
object_detector = "cascadercnn"

loss_based = pd.read_csv(f"data/KITTI/predictions/results_loss_based_method.csv").drop(columns=["Unnamed: 0"])
loss_based["filename"] = [path.split("/")[-1] for path in loss_based["img_path"]]  # get the image name from the path
loss_based = loss_based[loss_based['class_id'] == 2]  # Filter for pedestrian predictions 
loss_based = loss_based.rename(columns={"s": "score"})
loss_based = loss_based[loss_based["score"] > object_detector_score_threshold]
loss_based["height"] = loss_based["ymax"] - loss_based["ymin"]
if filter_small_boxes:
    print(f"Filtering out {len(loss_based[loss_based['height'] < min_bbox_height])} boxes with height < {min_bbox_height} pixels")
    loss_based = loss_based[loss_based["height"] >= min_bbox_height]  # Filter out boxes that are too small
    loss_based.reset_index(drop=True, inplace=True)  # Reset the index

# proposed label errors do not overlap much due to NMS being applied in the object detector already.
#  Here, we could further reduce this threshold if we wanted to.
# nms_df = perform_nms_on_dataframe(df, nms_threshold)
# print(f"Number of Boxes: {len(df)}", f", After NMS: {len(nms_df)}" )

loss_based = get_ious_with_orig_and_val_gt(loss_based)
print(len(loss_based))
loss_based = predict_label_errors(loss_based, n, iou_threshold_label_error, target_col = "iou_with_original_gt", prediction_col = "rpn_s")
loss_based.reset_index(drop=True, inplace=True)
print(len(loss_based))
loss_based.head(2)

In [None]:
loss_based["ymax"].max()

In [None]:
md_casc["ymax"].max()

In [None]:
X = loss_based["iou_with_original_gt"].values
Y = loss_based["iou_with_val_gt"].values
fig = plt.figure()
ax = plt.gca()
ax.scatter(X,Y, s = 3)
plt.xlabel("IoU original GT")
plt.ylabel("IoU val. GT")
plt.show()

In [None]:
i = 0
image_id = loss_based['filename'][i]
image_id = image_id.split('.')[0]  # Remove file extension if present
# Read the predicted box from the DataFrame

# pred_boxes = loss_based.iloc[i][['xmin', 'ymin', 'xmax', 'ymax', 'score']].values.reshape(1, -1)
pred_boxes = [np.array([348, 358, 373, 460, 0.33])]

def read_kitti_labels(label_path):
    boxes = []
    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            cls = parts[0]
            # 2D bounding box: [left, top, right, bottom]
            bbox = list(map(float, parts[4:8]))
            boxes.append((cls, bbox))
    return boxes


image_path = f'{path_to_kitti}/image_2/{image_id}.png'
label_path = f'{path_to_kitti}label_2/{image_id}.txt'
scores_pred_boxes = None
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Draw ground truth boxes
gt_boxes = read_kitti_labels(label_path)
for cls, (left, top, right, bottom) in gt_boxes:
    if cls == "Pedestrian": # we only care about pedestrians
        pt1 = (int(left), int(top))
        pt2 = (int(right), int(bottom))
        print(left, top, right, bottom)
        cv2.rectangle(image, pt1, pt2, color=(0, 255, 0), thickness=2)  # Green
        # cv2.putText(image, "GT", (int(left), int(top) - 5), cv2.FONT_HERSHEY_SIMPLEX,
        #     0.5, (0, 255, 0), 1)

# Draw predicted boxes (if provided)
if pred_boxes is not None:
    for i, pred in enumerate(pred_boxes):
        xmin, ymin, xmax, ymax = pred[:4]

        print(xmin, ymin, xmax, ymax)
        pt1 = (int(xmin), int(ymin/2))
        pt2 = (int(xmax), int(ymax/2))
        cv2.rectangle(image, pt1, pt2, color=(255, 0, 0), thickness=2)  # Red
        if scores_pred_boxes is not None:
            score = scores_pred_boxes[i]
            label = f" ({score:.4f})"
            cv2.putText(image, label, (int(xmin), int(ymin) - 5), cv2.FONT_HERSHEY_SIMPLEX,
                        0.5, (255, 0, 0), 1)
            

plt.figure(figsize=(15, 6))
plt.imshow(image)
plt.axis('off')
plt.show()

In [None]:
n_images_to_display = 5
# display the first few proposed label errors
for i in range(n_images_to_display): 
    # Display the first few proposed label errors
    image_id = loss_based['filename'][i]
    image_id = image_id.split('.')[0]  # Remove file extension if present
    # Read the predicted box from the DataFrame
    pred_boxes = loss_based.iloc[i][['xmin', 'ymin', 'xmax', 'ymax', 'score']].values.reshape(1, -1)

    draw_boxes(f'{path_to_kitti}/image_2/{image_id}.png', f'{path_to_kitti}label_2/{image_id}.txt', pred_boxes=pred_boxes)

# 3 Analyis of bbox sizes

In [None]:
# load the validated ground truth data
validated_gt = pd.read_csv(path_to_validated_gt + f"validated_gt_{validated_gt_prob_threshold}.csv")
# Ensure the 'filename' column is in the correct format
validated_gt["filename"] = validated_gt['filename'].astype(str).str.zfill(6) + '.png'
# Convert box format from x_mean, y_mean, width, height to xmin, ymin, xmax, ymax
validated_gt["xmin"] = validated_gt["bbox"].apply(lambda x: ast.literal_eval(x)[0] - ast.literal_eval(x)[2] / 2)
validated_gt["ymin"] = validated_gt["bbox"].apply(lambda x: ast.literal_eval(x)[1] - ast.literal_eval(x)[3] / 2)
validated_gt["xmax"] = validated_gt["bbox"].apply(lambda x: ast.literal_eval(x)[0] + ast.literal_eval(x)[2] / 2)
validated_gt["ymax"] = validated_gt["bbox"].apply(lambda x: ast.literal_eval(x)[1] + ast.literal_eval(x)[3] / 2)

validated_gt["height"] = validated_gt["ymax"] - validated_gt["ymin"]

original_gt = construct_original_gt(path_to_kitti, split, False, 0) # No filtering applied


In [None]:
small_fontsize = 9
very_small_fontsize = 6

# compare histograms of the bbox sizes of the original and validated ground truth
original_gt['area'] = (original_gt['xmax'] - original_gt['xmin']) * (original_gt['ymax'] - original_gt['ymin'])
validated_gt['area'] = (validated_gt['xmax'] - validated_gt['xmin']) * (validated_gt['ymax'] - validated_gt['ymin'])

fig = plt.figure(figsize=(1.7,1.25))
ax = plt.gca()
ax.hist(original_gt['area'], bins=np.logspace(np.log10(original_gt['area'].min()), np.log10(original_gt['area'].max()), 50), alpha=0.5, label='Original GT')
ax.hist(validated_gt['area'], bins=np.logspace(np.log10(validated_gt['area'].min()), np.log10(validated_gt['area'].max()), 50), alpha=0.5, label='Validated GT')
plt.xlabel('Bounding Box Area', fontsize = small_fontsize)
plt.ylabel('Frequency', fontsize = small_fontsize)

plt.xscale('log')  # Log scale for better visibility of small areas
plt.grid()

plt.yticks([0,20,40, 60], fontsize = small_fontsize)
plt.xticks([10, 1000, 100000], fontsize = small_fontsize)

plt.legend(fontsize=very_small_fontsize, ncols=2, bbox_to_anchor=(0.34, 1.1), loc='center', columnspacing = 0.75)

plt.savefig(f"plots/bbox_area_histogram_kitti_orig_and_val_gt.png", bbox_inches='tight')
plt.show()

In [None]:
md_casc = prepare_data(object_detector = "cascadercnn", object_detector_score_threshold = object_detector_score_threshold,
                        filter_small_boxes = False, min_bbox_height = 0, nms_threshold = 0.5)

md_yolox = prepare_data(object_detector = "yolox", object_detector_score_threshold = object_detector_score_threshold,
                        filter_small_boxes = False, min_bbox_height = 0, nms_threshold = 0.5)

In [None]:
md_casc["area"] = (md_casc['xmax'] - md_casc['xmin']) * (md_casc['ymax'] - md_casc['ymin'])

md_yolox["area"] = (md_yolox['xmax'] - md_yolox['xmin']) * (md_yolox['ymax'] - md_yolox['ymin'])

alpha = 0.7

fig = plt.figure()
ax = plt.gca()
ax.hist(md_casc['area'], bins=np.logspace(np.log10(original_gt['area'].min()), np.log10(original_gt['area'].max()), 50), alpha=alpha, label='Predictions Cascade R-CNN')
ax.hist(md_yolox['area'], bins=np.logspace(np.log10(original_gt['area'].min()), np.log10(original_gt['area'].max()), 50), alpha=alpha, label='Predictions YOLOX')
ax.hist(validated_gt['area'], bins=np.logspace(np.log10(validated_gt['area'].min()), np.log10(validated_gt['area'].max()), 50), alpha=alpha, label='Validated GT')


plt.xlabel('Bounding Box Area')
plt.ylabel('Frequency')
plt.legend()
#plt.title('Histogram of Bounding Box Areas in Original and Validated GT')
plt.xscale('log')  # Log scale for better visibility of small areas
plt.grid()

plt.savefig(f"plots/bbox_area_histogram_object_detectors.png", bbox_inches='tight')
plt.show()

## 3.2 Distribution of ambiguity and correlation with object size

In [None]:
# load the validated ground truth data
val_gt_zero = pd.read_csv(path_to_validated_gt + f"validated_gt_{0.0}.csv")
# Ensure the 'filename' column is in the correct format
val_gt_zero["filename"] = val_gt_zero['filename'].astype(str).str.zfill(6) + '.png'
# Convert box format from x_mean, y_mean, width, height to xmin, ymin, xmax, ymax
val_gt_zero["xmin"] = val_gt_zero["bbox"].apply(lambda x: ast.literal_eval(x)[0] - ast.literal_eval(x)[2] / 2)
val_gt_zero["ymin"] = val_gt_zero["bbox"].apply(lambda x: ast.literal_eval(x)[1] - ast.literal_eval(x)[3] / 2)
val_gt_zero["xmax"] = val_gt_zero["bbox"].apply(lambda x: ast.literal_eval(x)[0] + ast.literal_eval(x)[2] / 2)
val_gt_zero["ymax"] = val_gt_zero["bbox"].apply(lambda x: ast.literal_eval(x)[1] + ast.literal_eval(x)[3] / 2)

val_gt_zero["area"] = (val_gt_zero['xmax'] - val_gt_zero['xmin']) * (val_gt_zero['ymax'] - val_gt_zero['ymin'])

# plot distribution of probability for the bounding box containing a pedestrian

fig = plt.figure(figsize=(1.7,1.25))
ax = plt.gca()
ax.hist(val_gt_zero['score'], 25, ec = "darkslategrey")
plt.xlabel('Probability', fontsize = small_fontsize)
plt.ylabel('Frequency', fontsize = small_fontsize)
plt.yticks([0,100,200,300], fontsize = small_fontsize)
plt.xticks([0,0.5, 1], fontsize = small_fontsize)
plt.grid(zorder=0)
ax.set_axisbelow(True)
plt.savefig(f"plots/soft_label_distribution.png", bbox_inches='tight')
plt.show()

In [None]:
# scatter plot of probability for the bounding box containing a pedestrian and bbox area

fig = plt.figure()
ax = plt.gca()
ax.scatter(val_gt_zero['score'], val_gt_zero['area'], s = 3)
plt.ylabel('Bounding Box Area')
plt.xlabel('Probability')
#plt.yticks([0,100,200,300])
plt.grid(zorder=0)
ax.set_axisbelow(True)

plt.yscale("log")

# plt.savefig(f"plots/correlation_ambiguity_object_size.png", bbox_inches='tight')
plt.show()

# 4 Analysis of Detected Label Errors

## 4.1 Cascade R-CNN Predictions

In [None]:
# # Interesting is the difference between the IoU with the original GT and the IoU with the validated GT as this kind of indicates to which extent the box was misfitting or not annotated at all
# # Small difference in IoU indicates that the box was misfitting, large difference indicates that the box was not annotated at all in the original GT
# # Very small difference in IoU may indicate non-severe label errors

md_casc = get_ious_with_orig_and_val_gt(md_casc)

fig = plt.figure()
ax = plt.gca()
ax.scatter(md_casc.query("TP == True")['iou_with_original_gt'], md_casc.query("TP == True")['iou_with_val_gt'], s = 4)

plt.xlabel('IoU with original GT')
plt.ylabel('IoU with validated GT')
plt.grid(zorder=0)
ax.set_axisbelow(True)
plt.show()