# Tuning thresholds based on validated detections

This notebook assumes detections from multiple `{date}` runs have been validated and grouped (e.g. by using the validation script) and saved as follows:

```
data_folder/{date}          # contains TP detections
data_folder/{date}/fp_cat1  # contains FP detections 
data_folder/{date}/fp_cat2  # contains TP detections that are too far away
```

In [None]:
import json
import os

import pandas as pd


def json_dir_to_df(dir: str) -> pd.DataFrame:
    json_files = [file for file in os.listdir(dir) if file.endswith(".json")]
    dfs = []
    for file in json_files:
        with open(os.path.join(dir, file)) as f:
            json_data = json.load(f)
            dfs.append(pd.json_normalize(json_data, "detections"))
    
    df = (pd.concat(dfs, ignore_index=True)
          .drop("tracking_id", axis="columns")
          .rename(columns=col_map)
    )
    df["area"] = df["width"] * df["height"]
    return df


data_folder = "../datasets/oor/val/detection_metadata/"

cat_name_map = {
    "": "tp",
    "fp_cat1": "fp",
    "fp_cat2": "dist",
}

col_map = {
    "boundingBox.x_center": "x_center",
    "boundingBox.y_center": "y_center",
    "boundingBox.width": "width",
    "boundingBox.height": "height",
}

In [None]:
dfs = []

date_subfolders = [dir for dir in os.listdir(data_folder) if os.path.isdir(os.path.join(data_folder, dir))]
for date in date_subfolders:
    for (cat, cat_name) in cat_name_map.items():
        cat_df = json_dir_to_df(os.path.join(data_folder, date, cat))
        cat_df["category"] = cat_name
        dfs.append(cat_df)

detections_df = pd.concat(dfs, ignore_index=True)

In [None]:
detections_df.head(5)

In [None]:
# Print statistics for a combination of object class and attribute.
# Output is ready to be copied into markdown.

target_class = 2  # 2, 3, 4
attribute = "confidence"  # e.g. "confidence", "area", "height", "width"

object_df = detections_df[detections_df["object_class"]==target_class]

md = (object_df
      .groupby(by="category")[attribute]
      .describe(percentiles=[0.1, 0.2, 0.8, 0.9])
      .iloc[::-1]
      .to_markdown(floatfmt=".4f")
)

print(md)

In [None]:
# Save boxplots for the attributes of a specified object class.

import matplotlib.pyplot as plt

target_class = 2  # 2, 3, 4


class_map = {
    2: "Container",
    3: "Mobile toilet",
    4: "Scaffolding",
}

object_df = detections_df[detections_df["object_class"]==target_class]

fig, axes = plt.subplots(2, 2, figsize=[10, 10], constrained_layout=True)
object_df.boxplot(column=["confidence"], by="category", ax=axes[0][0])
object_df.boxplot(column=["area"], by="category", ax=axes[0][1])
object_df.boxplot(column=["width"], by="category", ax=axes[1][0])
object_df.boxplot(column=["height"], by="category", ax=axes[1][1])
fig.suptitle(class_map[target_class]);

plt.savefig(
    os.path.join(data_folder, f"{class_map[target_class]} - boxplots.png"),
    dpi=150
)