In [1]:
# This cell tells python to use the local version of CVToolkit instead of the installed one.
# Use for testing purposes until branch feature/BCV-970-oor-metrics is merged.
import sys
import os

module_path = os.path.abspath(os.path.join("../../CVToolkit"))
if module_path not in sys.path:
    sys.path.insert(0, module_path)

In [2]:
import sys
import pandas as pd

sys.path.append("../..")

from objectherkenning_openbare_ruimte.performance_evaluation_pipeline.source.oor_evaluation import (
    tba_evaluation, per_image_evaluation, coco_evaluation
)
from objectherkenning_openbare_ruimte.performance_evaluation_pipeline.metrics.metrics_utils import (
    ObjectClass, predictions_to_coco_json
)

In [3]:
# Size of the images (width, height)
img_shape = (1280, 720)
gt_base_dir = "../../datasets/oor/processed-merged-batches-first-official-training-dataset-oor"
pred_base_dir = "../../datasets/oor/inference/processed_merged_v2"

models = ["yolov8m_1280_oor_v2_best", "yolov8_1280_oor_v2_noble_sweep_15", "yolov8_1280_oor_v2_cerulean_sweep_25"]

splits = ["train", "val", "test"]

## Detailed look into one specific model / run

In [None]:
split = splits[2]
model = models[1]

gt_annotations_folder = f"{gt_base_dir}/labels/{split}"

In [4]:
# Load frame metadata

import pathlib
import geopandas as gpd
from typing import List, Union

from cvtoolkit.datasets.yolo_labels_dataset import YoloLabelsDataset

metadata_folder = "../../datasets/oor/metadata"

RD_CRS = "EPSG:28992"  # CRS code for the Dutch Rijksdriehoek coordinate system
LAT_LON_CRS = "EPSG:4326"  # CRS code for WGS84 latitude/longitude coordinate system

def metadata_to_video_name(metadata_name: str) -> str:
    metadata_split = metadata_name.split(sep="-", maxsplit=1)
    return f"{metadata_split[0]}-0-{metadata_split[1]}"

def load_metadata_csv(metadata_file: str) -> pd.DataFrame:
    df = pd.read_csv(metadata_file)
    video_name = metadata_to_video_name(pathlib.Path(metadata_file).stem)
    df["frame_name"] = [f"{video_name}_{frame_id:04}" for frame_id in df["new_frame_id"]]
    return df.set_index("frame_name")

def get_target_cls_file_names(yolo_annotations_folder: str, target_cls: Union[ObjectClass, None] = None) -> List[str]:
    yolo_dataset = YoloLabelsDataset(
        folder_path=yolo_annotations_folder,
        image_area=img_shape[0]*img_shape[1],
    )
    if target_cls:
        yolo_dataset.filter_by_class(target_cls.value)
    target_labels = yolo_dataset._filtered_labels
    return [k for k, v in target_labels.items() if len(v) > 0]

metadata_files = pathlib.Path(metadata_folder).glob("*.csv")
metadata_df = pd.concat(
    [load_metadata_csv(metadata_file) for metadata_file in metadata_files]
)

metadata_gdf = gpd.GeoDataFrame(
    metadata_df,
    geometry=gpd.points_from_xy(
        x=metadata_df.gps_lon,
        y=metadata_df.gps_lat,
        crs=LAT_LON_CRS,
    ),
).to_crs(RD_CRS)

del metadata_df, metadata_files

In [None]:
# Get all detections of containers

# Ground truth
gt_container_names = get_target_cls_file_names(gt_annotations_folder, ObjectClass.container)
keep_index = [frame in gt_container_names for frame in metadata_gdf.index]
gt_gdf = metadata_gdf[keep_index]
gt_gdf = gt_gdf[["gps_state", "geometry"]]

# Predictions
pred_folder = f"{pred_base_dir}/{model}/labels/{split}"

pred_container_names = get_target_cls_file_names(pred_folder, ObjectClass.container)
keep_index = [frame in pred_container_names for frame in metadata_gdf.index]
pred_gdf = metadata_gdf[keep_index]
pred_gdf = pred_gdf[["gps_state", "geometry"]]

In [None]:
# Compute distances between ground truth and detections
gt_gdf["distance"] = gt_gdf["geometry"].distance(pred_gdf["geometry"].unary_union)
pred_gdf["distance"] = pred_gdf["geometry"].distance(gt_gdf["geometry"].unary_union)

In [None]:
# Compute distance statistics
import numpy as np

stats = {
    "distance": np.arange(0, 26, 5),
    "fnr": [],
    "fpr": [],
}

gt_total = len(gt_gdf)
pred_total = len(pred_gdf)

for dst in stats["distance"]:
    fn = np.count_nonzero(gt_gdf["distance"] > dst)
    fp = np.count_nonzero(pred_gdf["distance"] > dst)
    stats["fnr"].append(fn/gt_total)
    stats["fpr"].append(fp/pred_total)

stats

In [None]:
# Plot results on a map

import matplotlib.pyplot as plt

joined_gdf = gt_gdf.join(pred_gdf, how="outer", lsuffix="_gt", rsuffix="_pred")

f, ax = plt.subplots()
joined_gdf.set_geometry("geometry_gt").plot(ax=ax, markersize=20)
joined_gdf.set_geometry("geometry_pred").plot(ax=ax, color="red", markersize=5)

plt.savefig("val_map.png")
plt.show()

## Compute Total Blurred Area for sensitive object classes

In [None]:
# Compute TBA results

tba_results = []
tba_names = []

for model in models:
    for split in splits:
        print(f"Computing TBA for {model}/{split}...")
        tba_names.append(f"{model}_{split}")

        gt_annotations_folder = f"{gt_base_dir}/labels/{split}"
        pred_folder = f"{pred_base_dir}/{model}/labels/{split}"
        tba_results_file = f"{pred_base_dir}/{model}/tba_results_{split}.md"

        tba_results.append(
            tba_evaluation(
                ground_truth_folder=gt_annotations_folder,
                prediction_folder=pred_folder,
                image_shape=img_shape,
                save_results=True,
                results_file=tba_results_file,
                hide_progress=True,
                upper_half=False,
            )
        )

In [None]:
from objectherkenning_openbare_ruimte.performance_evaluation_pipeline.metrics.per_pixel_stats import EvaluatePixelWise

EvaluatePixelWise.store_tba_results(tba_results, model_name=tba_names, markdown_output_path="tba_results.md")

## Image statistics

In [4]:
data_labels = [
    "split",
    "target_class",
    "Precision",
    "Recall",
    "FPR",
    "FNR",
    "TNR",
]

target_classes = [ObjectClass.container, ObjectClass.mobile_toilet, ObjectClass.scaffolding]

img_stat_df = pd.DataFrame(columns=data_labels)

names = []
results = []

for model in models:
    for split in splits:
        gt_annotations_folder = f"{gt_base_dir}/labels/{split}"
        pred_annotations_folder = f"{pred_base_dir}/{model}/labels/{split}"

        names.append(f"{model}_{split}")
        results.append(per_image_evaluation(
            ground_truth_folder=gt_annotations_folder,
            prediction_folder=pred_annotations_folder,
            image_shape=img_shape,
            object_classes=target_classes)
        )

In [None]:
# TODO create df out of results

In [None]:
img_stat_df[img_stat_df["target_class"] == "container"]

## Run custom COCO evaluation

In [None]:
# Compute COCO evaluation

data_labels = [
    "model_size",
    "img_size",
    "split",
    "target_class",
    "AP@50-95_all",
    "AP@75_all",
    "AP@50_all",
    "AP@50_small",
    "AP@50_medium",
    "AP@50_large",
    "AR@50-95_all",
    "AR@75_all",
    "AR@50_all",
    "AR@50_small",
    "AR@50_medium",
    "AR@50_large",
]

target_classes = [[0, 1, 2], [0], [1], [2], [3], [4]]
target_class_names = ["all", "person", "license_plate", "container", "mobile_toilet", "scaffolding"]

coco_df = pd.DataFrame(columns=data_labels)

for model in models:
    for split in splits:
        pred_folder = f"{pred_base_dir}/{model}/labels/{split}"
        pred_coco_json = f"{pred_base_dir}/{model}/coco_predictions_{split}.json"
        gt_coco_json = f"{gt_base_dir}/coco_gt_{split}.json"

        # if not os.path.isfile(pred_coco_json):
        predictions_to_coco_json(predictions_folder=pred_folder, image_shape=img_shape, json_file=pred_coco_json)

        model_size = model.split(sep="_")[0][-1]
        img_size = int(model.split(sep="_")[1])

        for target_cls_name, target_cls in zip(target_class_names, target_classes):
            print(f"EVALUATING {model} / {split}, TARGET CLASS {target_cls_name}")
            eval = coco_evaluation(
                coco_annotations_json=gt_coco_json,
                coco_predictions_json=pred_coco_json,
                predicted_img_shape=img_shape,
                class_ids=target_cls,
                print_summary=True,
            )
            coco_df.loc[f"{model}_{split}_{target_cls_name}"] = [model_size, img_size, split, target_cls_name, *eval.stats]

In [None]:
coco_df.to_csv("coco_results.csv")

In [None]:
# For demo
pd.set_option('display.float_format', '{:.2f}'.format)

cols_to_show = [
    "target_class",
    "AP@50-95_all",
    "AP@50_all",
    "AP@50_small",
    "AP@50_medium",
    "AP@50_large",
    "AR@50-95_all",
    "AR@50_all",
    "AR@50_small",
    "AR@50_medium",
    "AR@50_large",
]

# demo_df = coco_df[(coco_df["model_size"]=="m") & (coco_df["img_size"].isin((1024, 1920)))]
demo_df = coco_df
demo_df = demo_df[cols_to_show]

In [None]:
demo_df[demo_df["target_class"]=="all"]

In [None]:
demo_df[demo_df["target_class"]=="container"]

In [None]:
demo_df[demo_df["target_class"]=="person"]

In [None]:
coco_df[coco_df["target_class"]=="container"]