In [None]:
import yaml
from pathlib import Path

def remap_yolo_annotations(yaml_path: str):
    """
    Remaps YOLO dataset annotations according to specified categories:
      - Keeps:
          "mitóza"
          "referenčné bunky - erytrocyt"
          "referenčná bunky - lymfocyt"
          "hyperchrómne jadro"
      - Merges all others into "podozrivé jadro"
    Updates both YAML and label files.
    """

    yaml_path = Path(yaml_path)
    with open(yaml_path, "r", encoding="utf-8") as f:
        data = yaml.safe_load(f)

    keep_labels = {
        "mitóza": "mitóza",
        "referenčné bunky - erytrocyt": "referenčné bunky - erytrocyt",
        "referenčná bunky - lymfocyt": "referenčná bunky - lymfocyt",
        "hyperchrómne jadro": "hyperchrómne jadro",
    }

    new_label_names = [
        "podozrivé jadro",  # merged class
        *keep_labels.values(),
    ]
    new_label_to_id = {name: i for i, name in enumerate(new_label_names)}

    # build a mapping from old_id -> new_id
    print(data["names"])
    old_to_new = {}
    for old_id, old_name in data["names"].items():
        old_name = bytes(old_name, "utf-8").decode("utf-8")
        if old_name in keep_labels:
            new_id = new_label_to_id[keep_labels[old_name]]
        else:
            new_id = new_label_to_id["podozrivé jadro"]
        old_to_new[int(old_id)] = new_id

    # # update YAML
    # data["names"] = {i: name for i, name in enumerate(new_label_names)}
    # with open(yaml_path, "w", encoding="utf-8") as f:
    #     yaml.dump(data, f, allow_unicode=True)

    # # update label files
    # base_path = yaml_path.parent
    # for split in ["train", "val"]:
    #     label_dir = base_path / data[split].replace("images", "labels")
    #     for txt_file in label_dir.rglob("*.txt"):
    #         lines = txt_file.read_text(encoding="utf-8").strip().splitlines()
    #         new_lines = []
    #         for line in lines:
    #             if not line.strip():
    #                 continue
    #             parts = line.split()
    #             old_id = int(parts[0])
    #             new_id = old_to_new[old_id]
    #             parts[0] = str(new_id)
    #             new_lines.append(" ".join(parts))
    #         txt_file.write_text("\n".join(new_lines), encoding="utf-8")

    print("Remapping completed successfully.")
    print("New class mapping:")
    for i, name in data["names"].items():
        print(f"{i}: {name}")
    return old_to_new


In [21]:
old_to_new = remap_yolo_annotations('/Users/simon/Documents/000_fiit/09_semester/DP/notebooks/pleomorphy-analysis/dp-pleomorphy-analysis/data/processed/yolo-initial-640/yolo_dataset/data.yaml')

{0: 'veľké nepravidelné jadro', 1: 'veľké jadro', 2: 'nepravidelné jadro', 3: 'viacpočetné jadierka', 4: 'veľké jadierko', 5: 'mitóza', 6: 'hyperchrómne jadro', 7: 'referenčné bunky - erytrocyt', 8: 'referenčná bunky - lymfocyt', 9: 'viacjadrová bunka'}
{'path': '/Users/simon/Documents/000_fiit/09_semester/DP/notebooks/pleomorphy-analysis/dp-pleomorphy-analysis/data/processed/initial-640/yolo_dataset', 'train': 'images/train', 'val': 'images/val', 'names': {0: 'podozrivé jadro', 1: 'mitóza', 2: 'referenčné bunky - erytrocyt', 3: 'referenčná bunky - lymfocyt', 4: 'hyperchrómne jadro'}}
✅ Remapping completed successfully.
New class mapping:
0: podozrivé jadro
1: mitóza
2: referenčné bunky - erytrocyt
3: referenčná bunky - lymfocyt
4: hyperchrómne jadro


In [23]:
old_to_new

{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 1, 6: 4, 7: 2, 8: 3, 9: 0}