## Perform data minimisation

In [None]:
import os
import sys

sys.path.append("../..")

from objectherkenning_openbare_ruimte.performance_evaluation_pipeline.metrics import tba_calculator
from objectherkenning_openbare_ruimte.data_minimisation.data_minimisation import DataMinimisation, Scenarios

In [None]:
data_folder = "../../datasets/oor/data-minimalisation"
annotations = "predictions"
output_folder = "../../datasets/oor/data-minimalisation/blurred"

In [None]:
data_minimisation = DataMinimisation()

images_folder = os.path.join(data_folder, "images")
labels_folder = os.path.join(data_folder, annotations)

data_minimisation.process_folder(images_folder, labels_folder, output_folder, image_format="png")

In [None]:
# Single image
image_path = "../../datasets/oor/data-minimalisation/images/TMX7316010203-001666_pano_0000_003484_left.png"
scenario = Scenarios.A

data_minimisation = DataMinimisation()

image, yolo_annotations = DataMinimisation.load_image_and_annotations(image_path, labels_folder)
image = data_minimisation.process_image(image, yolo_annotations, scenario)

In [None]:
from PIL import Image
from IPython.display import display

display(Image.fromarray(image[:, :, ::-1]))

## Evaluate performance

In [None]:
# This cell tells python to use the local version of CVToolkit instead of the installed one.
# Use for testing purposes until branch feature/BCV-970-oor-metrics is merged.
import sys
import os

module_path = os.path.abspath(os.path.join("../../CVToolkit"))
if module_path not in sys.path:
    sys.path.insert(0, module_path)

## Collect sample data

In [None]:
# Find all images with containers

import os
import pandas as pd
import pathlib

from typing import List

def get_labels(label_file: str) -> List[int]:
    labels: List[int] = [0]*5
    if not os.path.isfile(label_file):
        return labels
    with open(label_file, "r") as file:
        lines = file.readlines()
        for line in lines:
            label = int(line.split(sep=" ")[0])
            labels[label] += 1
    return labels

data_folder = "../../datasets/oor/first-train-oor-nc5"

data = {
    "filename": [],
    "n_person": [],
    "n_license_plate": [],
    "n_container": [],
}

images = list(pathlib.Path(os.path.join(data_folder, "images", "val")).glob("*.png"))

for img in images:
    filename = img.stem
    label_file = os.path.join(data_folder, "labels", "val", filename + ".txt")
    labels = get_labels(label_file)
    data["filename"].append(filename)
    data["n_person"].append(labels[0])
    data["n_license_plate"].append(labels[1])
    data["n_container"].append(labels[2])

df = pd.DataFrame(data=data)

In [None]:
import shutil

filenames = df.query("n_container > 0 & (n_license_plate > 0 | n_person > 0)")["filename"].to_list()

target_dir = "../../datasets/oor/data-minimalisation"

pathlib.Path(os.path.join(target_dir, "images")).mkdir(parents=True, exist_ok=True)
pathlib.Path(os.path.join(target_dir, "labels")).mkdir(parents=True, exist_ok=True)

for file in filenames:
    img_src = os.path.join(data_folder, "images", "val", file + ".png")
    img_dst = os.path.join(target_dir, "images", file + ".png")
    shutil.copyfile(img_src, img_dst)
    lab_src = os.path.join(data_folder, "labels", "val", file + ".txt")
    lab_dst = os.path.join(target_dir, "labels", file + ".txt")
    shutil.copyfile(lab_src, lab_dst)