<p style='
  color: var(--vscode-foreground, #3b4045); 
  text-align: center;
  font-weight: bold;
  font-family: -apple-system,BlinkMacSystemFont, "Segoe UI Adjusted","Segoe UI","Liberation Sans",sans-serif;     font-size: 2.07692308rem; '> 
    Clean and Export MAPLES-DR Biomarkers Maps
</p>


This file is used to process the biomarkers map labelled by clinicians to produce the archives of MAPLES-DR.

#### Imports


In [1]:
import tempfile
from pathlib import Path
from shutil import rmtree
from zipfile import ZipFile

import cv2
import numpy as np
import pandas as pd
import skimage.morphology as skmorph
import yaml
from maples_dr.utilities import Point, Rect
from skimage import measure as skmeasure
from tqdm.auto import tqdm

tqdm.pandas()

## Paths


Path to the source folder of MAPLES-DR segmentation maps


In [2]:
ANNOTATIONS_PATH = Path("/home/gaby/These/Data/Fundus/MESSIDOR1500/MAPLES-DR/AnnotationsWIP/10_merged/")

Path to MAPLES-DR fundus images (to compute the mask).


In [3]:
MAPLES_IMAGES = Path("/home/gaby/These/Data/Fundus/MESSIDOR1500/0-images")

Output folder for cleaned maps


In [4]:
OUTPUT_PATH = Path("/home/gaby/These/Data/Fundus/MESSIDOR1500/MAPLES-DR/")

MAPLES_DR_ADDITIONAL = OUTPUT_PATH / "AdditionalData/"

MAPLES_DR_OUTPUT_PATH = Path("/home/gaby/These/Data/Fundus/MAPLES-DR/")

MAPLES_DR_ADDITIONAL_ZIP = MAPLES_DR_OUTPUT_PATH / "AdditionalData.zip"
MAPLES_DR_ZIP = MAPLES_DR_OUTPUT_PATH / "MAPLES-DR.zip"

## Load images


Load images and biomarkers names


In [5]:
with open(MAPLES_DR_ADDITIONAL / "dataset_record.yaml", "r") as f:
    record = yaml.load(f, yaml.Loader)

biomarkers = record["biomarkers"]
duplicates = list(record["duplicates"].values())

maples_imgs = (
    {_: "train" for _ in record["train"]} | {_: "test" for _ in record["test"]} | {_: "duplicates" for _ in duplicates}
)

## Clean Biomarkers


In [6]:
def compute_mask(img, blur_radius=5, morphological_clean=True, smoothing_radius=4):
    img = cv2.medianBlur(img[..., 1], blur_radius * 2 - 1)
    mask = img > 10

    if morphological_clean:
        # Remove small objects
        mask = skmorph.remove_small_objects(mask, 5000)

        # Remove holes that are not on the border
        MASK_BORDER = np.zeros_like(mask)
        MASK_BORDER[0, :] = 1
        MASK_BORDER[-1, :] = 1
        MASK_BORDER[:, 0] = 1
        MASK_BORDER[:, -1] = 1
        labelled_holes = skmeasure.label(mask == 0)
        for i in range(1, labelled_holes.max() + 1):
            hole_mask = labelled_holes == i
            if not np.any(MASK_BORDER & hole_mask):
                mask[hole_mask] = 1

    if smoothing_radius > 0:
        mask = (
            cv2.GaussianBlur(
                mask.astype(np.uint8) * 255,
                (smoothing_radius * 6 + 1, smoothing_radius * 6 + 1),
                smoothing_radius,
                borderType=cv2.BORDER_CONSTANT,
            )
            > 200
        )
    return mask

In [7]:
def clean_biomarkers(img, mask, remove_small_objects=0, fill_small_holes=0):
    img = img.copy()
    img[~mask] = 0
    if remove_small_objects:
        img = skmorph.remove_small_objects(img, remove_small_objects)
    if fill_small_holes:
        img = skmorph.remove_small_holes(img, fill_small_holes)
    return img


biomarkers_remove_small = {
    "Vessels": (100, 50),
    "Hemorrhages": (100, 0),
    "Exudates": (20, 0),
    "Microaneurysms": (20, 0),
    "CottonWoolSpots": (100, 40),
    "BrightUncertains": (20, 0),
    "RedUncertains": (20, 0),
    "Drusens": (20, 0),
    "Neovascularization": (70, 50),
    "OpticDisc": (200, 300),
    "OpticCup": (100, 100),
}

## Generate Additional Data

In [8]:
output_images = []

for biomarker in biomarkers:
    biomarker_output_path = MAPLES_DR_ADDITIONAL / "annotations" / biomarker
    rmtree(biomarker_output_path, ignore_errors=True)
    biomarker_output_path.mkdir(parents=True)

for img in tqdm(maples_imgs.keys()):
    fundus = cv2.imread(str(MAPLES_IMAGES / (img + ".png")))
    mask = compute_mask(fundus)
    for biomarker in biomarkers:
        biomarker_mask = cv2.imread(str(ANNOTATIONS_PATH / biomarker / (img + ".png")), cv2.IMREAD_GRAYSCALE) > 0
        remove_small_objects, fill_small_holes = biomarkers_remove_small.get(biomarker, (0, 0))
        biomarker_mask = clean_biomarkers(biomarker_mask, mask, remove_small_objects, fill_small_holes)
        image_output_path = MAPLES_DR_ADDITIONAL / "annotations" / biomarker / (img + ".png")
        cv2.imwrite(
            str(image_output_path),
            biomarker_mask.astype(int),
            [cv2.IMWRITE_PNG_BILEVEL, 1],
        )
        output_images.append(image_output_path)

  0%|          | 0/200 [00:00<?, ?it/s]

Create ZIP file with all cleaned maps

In [9]:
root = Path("")

with ZipFile(MAPLES_DR_ADDITIONAL_ZIP, "w") as zipf:
    for img in output_images:
        zipf.write(img, root / img.relative_to(MAPLES_DR_ADDITIONAL))
    for pre_img in MAPLES_DR_ADDITIONAL.glob("preannotations/*/*.png"):
        zipf.write(pre_img, root / pre_img.relative_to(MAPLES_DR_ADDITIONAL))
    zipf.write(
        MAPLES_DR_ADDITIONAL / "dataset_record.yaml",
        root / "dataset_record.yaml",
    )
    zipf.write(
        MAPLES_DR_ADDITIONAL / "biomarkers_annotation_infos.xls",
        root / "biomarkers_annotation_infos.xls",
    )
    zipf.write(
        MAPLES_DR_ADDITIONAL / "diagnosis_infos.xls",
        root / "diagnosis_infos.xls",
    )

    zipf.write(MAPLES_DR_ADDITIONAL / "MESSIDOR-ROIs.csv", root / "MESSIDOR-ROIs.csv")

## Generate biomarkers map

In [18]:
with (MAPLES_DR_ADDITIONAL / "MESSIDOR-ROIs.csv").open() as f:
    rois = pd.read_csv(f).set_index("name")

biomarkers = record["biomarkers"]
diagnosis_DR = pd.read_excel(MAPLES_DR_ADDITIONAL / "diagnosis_infos.xls", sheet_name="DR")
diagnosis_ME = pd.read_excel(MAPLES_DR_ADDITIONAL / "diagnosis_infos.xls", sheet_name="ME")
diagnosis = pd.DataFrame(
    data={"DR": diagnosis_DR["Consensus"], "ME": diagnosis_ME["Consensus"], "name": diagnosis_DR["name"]}
).set_index("name")

with tempfile.TemporaryDirectory() as tmpdir:
    TEMP_DIR = Path(tmpdir)

    # Generate multi-labels and multi-classes maps in a temporary directory
    for imgname, split in tqdm(maples_imgs.items()):
        imgfile = imgname + ".png"

        if split == "duplicates":
            continue

        roi = rois.loc[imgname]
        messidor_shape = Point(int(roi["H"]), int(roi["W"]))
        roi: Rect = Rect.from_points(int(roi["y0"]), int(roi["x0"]), int(roi["y1"]), int(roi["x1"]))

        biomarkers_map = {}

        for biomarker in biomarkers:
            # Read MAPLES segmentation map
            src = cv2.imread(
                str(MAPLES_DR_ADDITIONAL / "annotations" / biomarker / imgfile),
                cv2.IMREAD_GRAYSCALE,
            )

            # Resize to MESSIDOR resolution
            src_resized = cv2.resize(src, roi.shape) > 20

            # Pad according to MESSIDOR ROI
            dest_img = np.zeros(messidor_shape, dtype=bool)
            dest_img[roi.slice()] = src_resized

            # Store result in RAM
            biomarkers_map[biomarker] = dest_img

            # Export map
            # - Ensure the path exists
            biomarker_path = TEMP_DIR / split / biomarker
            biomarker_path.mkdir(exist_ok=True, parents=True)

            # - Write image
            cv2.imwrite(str(biomarker_path / imgfile), dest_img * np.uint8(255), [cv2.IMWRITE_PNG_BILEVEL, 1])

    # Write diagnosis files
    diagnosis.loc[record["train"]].to_csv(TEMP_DIR / "train" / "diagnosis.csv")
    diagnosis.loc[record["test"]].to_csv(TEMP_DIR / "test" / "diagnosis.csv")

    # Zip the temporary directory
    with ZipFile(MAPLES_DR_ZIP, "w") as zipf:
        for img in TEMP_DIR.glob("*/*/*.png"):
            zipf.write(img, img.relative_to(TEMP_DIR))
        zipf.write(TEMP_DIR / "train" / "diagnosis.csv", "train/diagnosis.csv")
        zipf.write(TEMP_DIR / "test" / "diagnosis.csv", "test/diagnosis.csv")

  0%|          | 0/200 [00:00<?, ?it/s]