MAPLES-DR Intervariability Study
================================

In [1]:
%load_ext autoreload
%autoreload 2

# Import maples-dr
import maples_dr
import numpy as np
import pandas as pd

# Import visualization tools
from ipywidgets import HTML, Dropdown, GridBox, Layout
from jppype import imshow, sync_views, vscode_theme
from maples_dr.dataset import BiomarkerField as Bio
from maples_dr.dataset import FundusField as Fundus
from maples_dr.quick_api import GLOBAL_LOADER
from sklearn.metrics import cohen_kappa_score
from coloraide import Color

# Import utilities
from variability_study_utils import (
    centroid,
    load_new_annotations,
    multi_annotator_regions_diff,
    regions_f1,
)

vscode_theme()

HTML(value="<style>\n        .cell-output-ipywidget-background {\n                background: transparent !imp…

In [2]:
maples_dr.configure(
    maples_dr_path="../PATH/TO/MAPLES-DR/AdditionalData.zip",
    messidor_path="../PATH/TO/MESSIDOR/",
    image_format="bgr",
    preprocessing="clahe",
)
maples_dataset = GLOBAL_LOADER.load_dataset("all_with_duplicates")

Output()

# Variability study on the duplicated images

In [3]:
duplicates = list(GLOBAL_LOADER.dataset_record["duplicates"].items())

#### Qualitative study

In [4]:
ID = 0
sample1, sample2 = [maples_dataset[_] for _ in duplicates[ID]]

In [5]:
selectors = [
    Dropdown(
        options=[field.value for field in Bio],
        description="Biomarker:",
        layout=Layout(width="auto"),
        value=("brightLesions", "redLesions", "vessels")[i],
    )
    for i in range(3)
]
views = [imshow(sample1["fundus"]) for i in range(3)]
for i in range(1, 3):
    views[i]._left_ruler = False

for i in range(3):

    def set_label(biomarker, i=i):
        if isinstance(biomarker, dict):
            biomarker = biomarker["new"]
        views[i].add_label(
            sample1[biomarker] + 2 * sample2[biomarker],
            colormap={1: "#a56fb9", 2: "#7aa8ba", 3: "white"},
            name="biomarker",
        )

    selectors[i].observe(set_label, "value")

    set_label(selectors[i].value)


sync_views(*views)

GridBox(
    selectors + views,
    layout=Layout(grid_template_columns="repeat(3, 1fr)", grid_template_rows="auto 600px"),
)

GridBox(children=(Dropdown(description='Biomarker:', index=4, layout=Layout(width='auto'), options=('opticCup'…

In [6]:
selector = Dropdown(
    options=[field.value for field in Bio],
    description="Biomarker:",
    layout=Layout(width="auto"),
    value=("brightLesions", "redLesions", "vessels")[i],
)

duplicates_samples = [[maples_dataset[_[0]], maples_dataset[_[1]]] for _ in duplicates]
(s1a, s1b), (s2a, s2b) = duplicates_samples
view1 = imshow(s1a["fundus"])
view2 = imshow(s1a["fundus"])
view2._left_ruler = False


def set_label(biomarker):
    if isinstance(biomarker, dict):
        biomarker = biomarker["new"]
    view1.add_label(
        s1a[biomarker] + 2 * s1b[biomarker],
        colormap={1: "#a56fb9", 2: "#7aa8ba", 3: "white"},
        name="biomarker",
    )
    view2.add_label(
        s1a.read_biomarker(biomarker, pre_annotation=True) + 2 * s1b.read_biomarker(biomarker, pre_annotation=True),
        colormap={1: "#a56fb9", 2: "#7aa8ba", 3: "white"},
        name="biomarker",
    )


selector.observe(set_label, "value")

set_label(selectors[i].value)

sync_views(view1, view2)

GridBox(
    [selector, HTML(), view1, view2],
    layout=Layout(grid_template_columns="repeat(2, 1fr)", grid_template_rows="auto 600px"),
)

GridBox(children=(Dropdown(description='Biomarker:', index=3, layout=Layout(width='auto'), options=('opticCup'…

In [7]:
selector = Dropdown(
    options=[field.value for field in Bio],
    description="Biomarker:",
    layout=Layout(width="auto"),
    value=("brightLesions", "redLesions", "vessels")[i],
)

duplicates_samples = [[maples_dataset[_[0]], maples_dataset[_[1]]] for _ in duplicates]
(s1a, s1b), (s2a, s2b) = duplicates_samples
view1b = imshow(s2a["fundus"])
view2b = imshow(s2a["fundus"])
view2b._left_ruler = False


def set_label(biomarker):
    if isinstance(biomarker, dict):
        biomarker = biomarker["new"]
    view1b.add_label(
        s2a[biomarker] + 2 * s2b[biomarker],
        colormap={1: "#a56fb9", 2: "#7aa8ba", 3: "white"},
        name="biomarker",
    )
    view2b.add_label(
        s2a.read_biomarker(biomarker, pre_annotation=True) + 2 * s2b.read_biomarker(biomarker, pre_annotation=True),
        colormap={1: "#a56fb9", 2: "#7aa8ba", 3: "white"},
        name="biomarker",
    )


selector.observe(set_label, "value")

set_label(selectors[i].value)

sync_views(view1b, view2b)

GridBox(
    [selector, HTML(), view1b, view2b],
    layout=Layout(grid_template_columns="repeat(2, 1fr)", grid_template_rows="auto 600px"),
)

GridBox(children=(Dropdown(description='Biomarker:', index=3, layout=Layout(width='auto'), options=('opticCup'…

### Quantitative

In [8]:
biomarkers = [
    field.value
    for field in (
        Bio.VESSELS,
        Bio.OPTIC_CUP,
        Bio.OPTIC_DISC,
        Bio.MACULA,
        Bio.RED_LESIONS,
        Bio.BRIGHT_LESIONS,
    )
]

data = {}
for bio in biomarkers:
    bio1 = np.array([s1a[bio], s2a[bio]], dtype=bool)
    bio2 = np.array([s1b[bio], s2b[bio]], dtype=bool)
    kappa = cohen_kappa_score(bio1.flatten(), bio2.flatten(), labels=[0, 1])
    accuracy = np.mean(bio1 == bio2)
    dice = 2 * np.sum(bio1 * bio2) / (np.sum(bio1) + np.sum(bio2))
    data[bio] = {"kappa": kappa, "accuracy": accuracy, "dice": dice}

for bio in [Bio.MACULA, Bio.OPTIC_CUP, Bio.OPTIC_DISC]:
    d = np.mean([centroid(sa[bio]).distance(centroid(sb[bio])) for sa, sb in duplicates_samples])
    data[bio.value]["distance"] = d

for bio in [Bio.RED_LESIONS, Bio.BRIGHT_LESIONS]:
    data[bio.value]["mean detection f1"] = np.nanmean([regions_f1(sa[bio], sb[bio]) for sa, sb in duplicates_samples])


pd.DataFrame(data).round(3).fillna("")

Unnamed: 0,vessels,opticCup,opticDisc,macula,redLesions,brightLesions
kappa,0.795,0.847,0.958,0.045,0.48,0.176
accuracy,0.961,0.999,0.999,0.999,0.999,0.998
dice,0.817,0.848,0.958,0.045,0.48,0.177
distance,,5.581,2.365,24.316,,
mean detection f1,,,,,0.616,0.083


# Variability Study on reannotated Images

### Load datasets

In [9]:
new_annotations = load_new_annotations()
published_dataset = GLOBAL_LOADER.load_dataset()

### Visualize the data

In [10]:
images = new_annotations[0].keys()
current_img = [images[0], 0]

In [11]:
img_selector = Dropdown(
    options=images,
    description="Biomarker:",
    layout=Layout(width="auto"),
    value=current_img[0],
)

retinologist_selector = Dropdown(
    options=[i + 1 for i in range(len(new_annotations))],
    description="Retinologist:",
    layout=Layout(width="auto"),
    value=current_img[1] + 1,
)

s = published_dataset[images[0]]

view1 = imshow(s["fundus"])
view2 = imshow(s["fundus"])
view2._left_ruler = False
view3 = imshow(s["fundus"])
view3._top_ruler = False
view4 = imshow(s["fundus"])
view4._top_ruler = False
view4._left_ruler = False

cmap = {
    # --- Pre-annotations Edited---
    2: "#d41616",  # Not in published, removed from pre-annotation
    3: "#860e0e",  # In published, removed from pre-annotation
    4: "#154f00",  # Not in published nor pre-annotation, added by ret
    5: "#248600",  # Published not in pre-annotation, added by ret
    # --- Pre-annotations not edited ---
    1: "#a8394988",  # In published, not in refined
    6: "#999bc9",  # Not in published
    7: "#cccccc",  # In published and refined
}


def setup_views():
    img, ret = current_img

    s_published = published_dataset[img]
    s_refined = new_annotations[ret][img]

    for v, l in zip(
        [view1, view2, view3, view4],
        [Bio.MICROANEURYSMS, Bio.HEMORRHAGES, Bio.EXUDATES, Bio.COTTON_WOOL_SPOTS],
        strict=True,
    ):
        diff = (
            s_published[l]
            + 2 * s_refined.read_biomarker(l, pre_annotation=True)
            + 4 * s_refined.read_biomarker(l, pre_annotation=False)
        )
        v["background"] = s_published["fundus"]
        v.add_label(
            diff,
            name="diff",
            colormap=cmap,
        )


def set_img(img):
    if isinstance(img, dict):
        img = img["new"]
    current_img[0] = img
    setup_views()


def set_ret(ret):
    if isinstance(ret, dict):
        ret = ret["new"]
    current_img[1] = ret - 1
    setup_views()


img_selector.observe(set_img, "value")
retinologist_selector.observe(set_ret, "value")

setup_views()
sync_views(view1, view2, view3, view4)

legend1 = HTML(
    f"""
<h3 style="color: var(--jppype-foreground-color);"> Pre-annotations edition </h3>
<p style="color: var(--jppype-foreground-color); font-size: 14px; text-align: center;">
    Removed by retinologist:
    <span style="display: inline-block"><span style="background-color: {cmap[2]}; width: 14px; height: 14px; 
    border-radius: 7px; display: inline-block; margin-left: 20px"> </span> Not published; </span>
    <span style="display: inline-block"><span style="background-color: {cmap[3]}; width: 14px; height: 14px; 
    border-radius: 7px; display: inline-block; margin-left: 20px"> </span> in published </span>
</p>
<p style="color: var(--jppype-foreground-color); font-size: 14px; text-align: center;">    
    Added by retinologist:
    <span style="display: inline-block"><span style="background-color: {cmap[4]}; width: 14px; height: 14px;
     border-radius: 7px; display: inline-block; margin-left: 20px"> </span> Not in published; </span>
    <span style="display: inline-block"><span style="background-color: {cmap[5]}; width: 14px; height: 14px; 
    border-radius: 7px; display: inline-block; margin-left: 20px"> </span> In published. </span>
</p>
"""
)
legend2 = HTML(
    f"""
<h3 style="color: var(--jppype-foreground-color);"> Pre-annotations kept </h3>
<p  style="color: var(--jppype-foreground-color); font-size: 14px; text-align: center;">
    <span style="display: inline-block"><span style="background-color: {cmap[1]}; width: 14px; height: 14px; border-radius: 7px; display: inline-block; margin-left: 20px"> </span> Only in published; </span>

    <span style="display: inline-block"><span style="background-color: {cmap[6]}; width: 14px; height: 14px; border-radius: 7px; display: inline-block; margin-left: 20px"> </span> Absent in published; </span>

    <span style="display: inline-block"><span style="background-color: {cmap[7]}; width: 14px; height: 14px; border-radius: 7px; display: inline-block; margin-left: 20px"> </span> In all 3. </span>
</p>
"""
)

GridBox(
    [img_selector, retinologist_selector, legend1, legend2, view1, view2, view3, view4],
    layout=Layout(
        grid_template_columns="repeat(2, 1fr)",
        grid_template_rows="auto auto 415px 400px",
    ),
)

GridBox(children=(Dropdown(description='Biomarker:', layout=Layout(width='auto'), options=('20060522_46266_010…

## Qualitative analysis

In [12]:
new_annotations = load_new_annotations()
images = new_annotations[0].keys()
current_img2 = [images[0], Bio.RED_LESIONS.value]

In [13]:
img_selector2 = Dropdown(
    options=images,
    description="Biomarker:",
    layout=Layout(width="auto"),
    value=current_img2[0],
)

biomarker_selector = Dropdown(
    options=[
        _.value
        for _ in [
            Bio.MICROANEURYSMS,
            Bio.HEMORRHAGES,
            Bio.EXUDATES,
            Bio.COTTON_WOOL_SPOTS,
            Bio.RED_LESIONS,
            Bio.BRIGHT_LESIONS,
        ]
    ],
    description="Biomarker:",
    layout=Layout(width="auto"),
    value=current_img2[1],
)

s = published_dataset[images[0]]

view6 = imshow(s["fundus"])
view7 = imshow(s["fundus"])
view7._left_ruler = False

cmap = {
    # --- Pre-annotations Edited---
    1: "red",  # Removed
    2: "green",  # Added
    # --- Pre-annotations not edited ---
    3: "white",
}


def setup_views2():
    img, bio = current_img2

    s1 = new_annotations[0][img]
    s2 = new_annotations[1][img]

    for s, v in zip([s1, s2], [view6, view7]):
        v["background"] = s["fundus"]
        diff = s.read_biomarker(bio, pre_annotation=True) + 2 * s.read_biomarker(bio, pre_annotation=False)
        v.add_label(
            diff,
            name="diff",
            colormap=cmap,
        )


def set_img2(img):
    if isinstance(img, dict):
        img = img["new"]
    current_img2[0] = img
    setup_views2()


def set_bio2(bio):
    if isinstance(bio, dict):
        bio = bio["new"]
    current_img2[1] = bio
    setup_views2()


img_selector2.observe(set_img2, "value")
biomarker_selector.observe(set_bio2, "value")

setup_views2()
sync_views(view6, view7)

legend1 = HTML(
    f"""
<p style="color: var(--jppype-foreground-color); font-size: 14px; text-align: center;">
    <span style="display: inline-block"><span style="background-color: {cmap[2]}; width: 14px; height: 14px; 
    border-radius: 7px; display: inline-block; margin-left: 20px"> </span> Added </span>
    <span style="display: inline-block"><span style="background-color: {cmap[1]}; width: 14px; height: 14px; 
    border-radius: 7px; display: inline-block; margin-left: 20px"> </span> Removed </span>
</p>
<h3 style="color: var(--jppype-foreground-color);"> Daniel </h3>
"""
)
legend2 = HTML(
    """
    <h3 style="color: var(--jppype-foreground-color);"> Fares </h3>
    """
)

GridBox(
    [img_selector2, biomarker_selector, legend1, legend2, view6, view7],
    layout=Layout(
        grid_template_columns="repeat(2, 1fr)",
        grid_template_rows="auto auto 500px",
    ),
)

GridBox(children=(Dropdown(description='Biomarker:', layout=Layout(width='auto'), options=('20060522_46266_010…

## Quantitative analysis

In [32]:
biomarkers = [
    field.value
    for field in (
        Bio.MICROANEURYSMS,
        Bio.HEMORRHAGES,
        Bio.EXUDATES,
        Bio.COTTON_WOOL_SPOTS,
        Bio.RED_LESIONS,
        Bio.BRIGHT_LESIONS,
    )
]

data = {}
data_per_image = {}
for bio in biomarkers:
    all_kappa = []
    all_accuracy = []
    all_dice = []
    all_f1_detection = []

    for s1, s2 in zip(*new_annotations):
        bio1 = s1[bio]
        bio2 = s2[bio]

        kappa = cohen_kappa_score(bio1.flatten(), bio2.flatten(), labels=[0, 1])
        all_kappa.append(kappa)

        accuracy = np.mean(bio1 == bio2)
        all_accuracy.append(accuracy)

        dice = 2 * np.sum(bio1 * bio2) / (np.sum(bio1) + np.sum(bio2))
        all_dice.append(dice)

        f1_det = regions_f1(bio1, bio2)
        all_f1_detection.append(f1_det)
        d = data_per_image.get(s1.name, {})
        d[str(bio)] = {
            "kappa": kappa,
            "accuracy": accuracy,
            "dice": dice,
            "f1_det": f1_det,
        }
        data_per_image[s1.name] = d
    data[bio] = {
        "kappa": f"{np.nanmean(all_kappa):.3f} ± {np.nanstd(all_kappa):.3f}",
        "accuracy": f"{np.nanmean(all_accuracy):.3f} ± {np.nanstd(all_accuracy):.3f}",
        "dice": f"{np.nanmean(all_dice):.3f} ± {np.nanstd(all_dice):.3f}",
        "F1 Detection": f"{np.nanmean(all_f1_detection):.3f} ± {np.nanstd(all_f1_detection):.3f}",
    }

  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  dice = 2 * np.sum(bio1 * bio2) / (np.sum(bio1) + np.sum(bio2))
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  dice = 2 * np.sum(bio1 * bio2) / (np.sum(bio1) + np.sum(bio2))
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  dice = 2 * np.sum(bio1 * bio2) / (np.sum(bio1) + np.sum(bio2))
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  dice = 2 * np.sum(bio1 * bio2) / (np.sum(bio1) + np.sum(bio2))
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  dice = 2 * np.sum(bio1 * bio2) / (np.sum(bio1) + np.sum(bio2))
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  dice = 2 * np.sum(bio1 * bio2) / (np.sum(bio1) + np.sum(bio2))
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  dice = 2 * np.sum(bio1 * bio2) / (np.sum(bio1) + np.sum(bio2))
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  dice = 2 * np.sum(bio1 * bio2) / (np.sum(bio1) + np.sum(bio2))
  k = np

In [33]:
pd.DataFrame(data).round(3).fillna("")

Unnamed: 0,microaneurysms,hemorrhages,exudates,cottonWoolSpots,redLesions,brightLesions
kappa,0.946 ± 0.083,0.731 ± 0.392,0.590 ± 0.371,0.006 ± 0.010,0.951 ± 0.068,0.518 ± 0.357
accuracy,1.000 ± 0.000,1.000 ± 0.000,1.000 ± 0.000,1.000 ± 0.001,1.000 ± 0.000,1.000 ± 0.001
dice,0.946 ± 0.083,0.731 ± 0.392,0.590 ± 0.371,0.006 ± 0.010,0.951 ± 0.068,0.518 ± 0.357
F1 Detection,0.955 ± 0.060,0.942 ± 0.106,0.791 ± 0.214,0.286 ± 0.000,0.966 ± 0.054,0.800 ± 0.219


In [34]:
pd.DataFrame({img: data_per_image[img]["hemorrhages"] for img in data_per_image}).round(
    3
).fillna("")

Unnamed: 0,20060522_46266_0100_PP,20060412_52668_0200_PP,20060523_49010_0100_PP,20060411_58221_0200_PP,20051020_62461_0100_PP,20051202_51677_0400_PP,20051116_44816_0400_PP,20060412_59400_0200_PP,20060412_61000_0200_PP,20060412_60895_0200_PP,20051202_51488_0400_PP,20060412_59037_0200_PP,20060410_43675_0200_PP,20060410_40146_0200_PP,20060412_52978_0200_PP,20051021_40074_0100_PP
kappa,,0.004,1.0,0.946,,1.0,,,,0.993,0.998,0.502,0.0,0.884,,0.985
accuracy,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
dice,,0.004,1.0,0.946,,1.0,,,,0.993,0.998,0.502,0.0,0.884,,0.985
f1_det,,0.667,1.0,0.933,,1.0,,,,1.0,0.875,1.0,,1.0,,1.0


In [17]:
def multi_diff(sample_ID, bio):
    s1 = new_annotations[0][sample_ID]
    s2 = new_annotations[1][sample_ID]
    labels, diffs = multi_annotator_regions_diff(s1.read_biomarker(bio, pre_annotation=True), s1[bio], s2[bio])
    v = imshow(s1["fundus"])

    def diff_to_color(diff):
        if "A" in diff:
            return (
                Color.interpolate(["yellow", "green"])(sum(_ == "A" for _ in diff) / len(diff))
                .convert("srgb")
                .to_string(hex=True)
            )
        return (
            Color.average(
                [
                    Color.interpolate(["white", "blue"])(sum(_ == "C" for _ in diff) / len(diff)),
                    Color.interpolate(["white", "red"])(sum(_ == "R" for _ in diff) / len(diff)),
                ]
            )
            .convert("srgb")
            .to_string(hex=True)
        )

    v.add_label(
        labels,
        name="diff",
        colormap={int(k): diff_to_color(v) for k, v in diffs.items()},
    )
    return diffs, v


diffs, v = multi_diff("20051116_44816_0400_PP", Bio.RED_LESIONS)
v

View2D()

In [29]:
def diff_conf_mat(bio, annotator1=0, annotator2=1):
    cm = pd.DataFrame(
        np.zeros((5, 5)),
        index=["K", "C", "R", "A", ""],
        columns=["K", "C", "R", "A", ""],
    )
    for img in new_annotations[0].keys():
        diffs, _ = multi_diff(img, bio)
        for diff in diffs.values():
            ann1 = diff[annotator1]
            ann2 = diff[annotator2]
            cm.loc[ann1, ann2] += 1
    return cm


for bio in [
    Bio.MICROANEURYSMS,
    Bio.HEMORRHAGES,
    Bio.EXUDATES,
    Bio.COTTON_WOOL_SPOTS,
    Bio.RED_LESIONS,
    Bio.BRIGHT_LESIONS,
]:
    print(bio)
    print(diff_conf_mat(bio).astype(int))
    print("\n")

BiomarkerField.MICROANEURYSMS
     K   C  R  A   
K  326  43  7  0  0
C    0   0  0  0  0
R    3   0  2  0  0
A    0   0  0  0  1
     0   0  0  6  0


BiomarkerField.HEMORRHAGES
    K  C  R  A   
K  31  6  1  0  0
C   2  1  0  0  0
R   4  0  2  0  0
A   0  0  0  0  0
    0  0  0  1  0


BiomarkerField.EXUDATES
     K   C   R  A    
K  143  28  10  0   0
C    0   0   0  0   0
R    0   0   1  0   0
A    0   0   0  0  24
     0   0   0  0   0


BiomarkerField.COTTON_WOOL_SPOTS
   K  C  R  A   
K  1  0  3  0  0
C  0  0  0  0  0
R  3  2  5  0  0
A  0  0  0  0  0
   0  0  0  0  0


BiomarkerField.RED_LESIONS
     K   C  R  A   
K  335  44  4  0  0
C    3   1  2  0  0
R    3   0  2  0  0
A    0   0  0  0  1
     0   0  0  6  0


BiomarkerField.BRIGHT_LESIONS
     K   C   R  A    
K  144  28  13  0   0
C    0   0   0  0   0
R    3   2   6  0   0
A    0   0   0  1  23
     0   0   0  0   0




In [43]:
2 / (144 + 28 + 13 + 3 + 2 + 6)

0.01020408163265306