In [1]:
import pandas as pd

from typing import Optional, List, Union
import os

from utils.config import get_path_of_directory_with_id

# Helpers

In [2]:
def get_segmentation_metrics_from_experiment(
    experiment_id: str, 
    model_version: str, 
    csv_name: str,
    metrics: List[str]
) -> pd.Series:
    experiment_dir = get_path_of_directory_with_id(experiment_id)
    eval_dir = os.path.join(experiment_dir, "evaluation", model_version)
    metrics_path = os.path.join(eval_dir, csv_name)

    df = pd.read_csv(metrics_path).set_index("patient").loc[["mean", "std"]]
    df_shard = df[metrics].rename(columns={n: f"{n}_mean" for n in df.columns})

    df_shard = (
        df_shard
        .assign(**{
            n.replace("mean", "std"): df_shard.loc["std", n] 
            for n in df_shard.columns
        })
        .loc["mean"]
        .rename(experiment_id)
        .sort_index()
    )
    
    return df_shard

def get_metrics_summary_for_experiments(
    experiment_ids: List[str], 
    model_version: str, 
    csv_names: Union[str, List[str]],
    metrics: List[str]
) -> pd.DataFrame:
    
    if isinstance(csv_names, str):
        csv_names = [csv_names] * len(experiment_ids)

    df = pd.concat([
        get_segmentation_metrics_from_experiment(eid, model_version, csv_name, metrics)
        for eid, csv_name in zip(experiment_ids, csv_names)
    ], axis=1).transpose()
    return df

# Get the summary for single modality experiments

In [None]:
model_version : str = "best"
metrics : List[str] = ["sens", "ppv", "dsc", "hd"]
experiment_ids : List[str] = list(range(1, 2)) 
csv_names: List[str] = ["patient_metrics.csv"] * len(experiment_ids)

df = get_metrics_summary_for_experiments(experiment_ids, model_version, csv_names, metrics)
df.index.name = "experiment_id"
df = df.reset_index()

distance_metrics = ["hd", "avc"]
highlight_max_cols = [
    c for c in df.columns 
    if c.endswith("mean") 
    and not any(c.startswith(m) for m in distance_metrics)
]
highlight_min_cols = [
    c for c in df.columns 
    if c.endswith("mean") 
    and any(c.startswith(m) for m in distance_metrics)
]

# (
#     df
#     .style
#     .hide_index()
#     .highlight_max(
#         axis='rows', 
#         subset=pd.IndexSlice[:, highlight_max_cols]
#     )
#     .highlight_min(
#         axis='rows', 
#         subset=pd.IndexSlice[:, highlight_min_cols]
#     )
# )

# Summary for the FOSCAL experiments with the joint embedding model

In [None]:
radiologist_metrics = {}
metrics : List[str] = ["sens", "ppv", "dsc", "hd"]
for radiologist in ["Daniel", "Andres"]:
    experiment_ids = [1, 1, 4, 4, 2, 3, 5, 5, 8, 8, 6, 7, 9, 9, 10, 10] #
    fold = [0] * len(experiment_ids)
    dual = [True, True, True, True, False, False, True, True, True, True, False, False, True, True, True, True] #
    # experiment_ids = [0, 1, 10, 10, 2, 3, 11, 11, 4, 5, 12, 12, 6, 7, 13, 13, 8, 9, 14, 14]
    # fold = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4]
    csv_names, modalities = [], []
    first_occurence = True
    for i, experiment_id in enumerate(experiment_ids):
        if not dual[i]:
            csv_names.append(f"test/{radiologist}/patient_metrics.csv")
            if experiment_id in [0, 2, 4, 6, 8]:
                modalities.append("ADC")
            elif experiment_id in [1, 3, 5, 7, 9]:
                modalities.append("DWI")
        else:
            if first_occurence:
                csv_names.append(f"test/{radiologist}/adc_patient_metrics.csv")
                modalities.append("ADC (ADC,DWI)")
                first_occurence = False
            else:
                csv_names.append(f"test/{radiologist}/dwi_patient_metrics.csv")
                modalities.append("DWI (ADC,DWI)")
                first_occurence = True

    df = get_metrics_summary_for_experiments(experiment_ids, "best", csv_names, metrics)
    df.index.name = "experiment_id"
    df = df.reset_index()
    df.insert(1, "fold", fold)
    df.insert(2, "modality", modalities)
    radiologist_metrics[radiologist] = df

distance_metrics = ["hd", "avc"]
highlight_max_cols = [
    c for c in df.columns 
    if c.endswith("mean") 
    and not any(c.startswith(m) for m in distance_metrics)
]
highlight_min_cols = [
    c for c in df.columns 
    if c.endswith("mean") 
    and any(c.startswith(m) for m in distance_metrics)
]

display(
    radiologist_metrics["Andres"]
    .style
    .hide_index()
    .highlight_max(
        axis='rows', 
        subset=pd.IndexSlice[:, highlight_max_cols]
    )
    .highlight_min(
        axis='rows', 
        subset=pd.IndexSlice[:, highlight_min_cols]
    )
)

display(
    radiologist_metrics["Daniel"]
    .style
    .hide_index()
    .highlight_max(
        axis='rows', 
        subset=pd.IndexSlice[:, highlight_max_cols]
    )
    .highlight_min(
        axis='rows', 
        subset=pd.IndexSlice[:, highlight_min_cols]
    )
)

# (
#     df
#     .style
#     .hide_index()
#     .highlight_max(
#         axis='rows', 
#         subset=pd.IndexSlice[:, highlight_max_cols]
#     )
#     .highlight_min(
#         axis='rows', 
#         subset=pd.IndexSlice[:, highlight_min_cols]
#     )
# )

In [4]:
def display_experiments_summary(metrics, experiment_ids, dual):
    radiologist_metrics = {}
    for radiologist in ["Daniel", "Andres"]:
        csv_names, modalities = [], []
        first_occurence = True
        for i, experiment_id in enumerate(experiment_ids):
            if not dual[i]:
                csv_names.append(f"test/{radiologist}/patient_metrics.csv")
                if experiment_id in [0, 2, 4, 6, 8]:
                    modalities.append("ADC")
                elif experiment_id in [1, 3, 5, 7, 9]:
                    modalities.append("DWI")
            else:
                if first_occurence:
                    csv_names.append(f"test/{radiologist}/adc_patient_metrics.csv")
                    modalities.append("ADC (ADC,DWI)")
                    first_occurence = False
                else:
                    csv_names.append(f"test/{radiologist}/dwi_patient_metrics.csv")
                    modalities.append("DWI (ADC,DWI)")
                    first_occurence = True

        df = get_metrics_summary_for_experiments(experiment_ids, "best", csv_names, metrics)
        df.index.name = "experiment_id"
        df = df.reset_index()
        df.insert(1, "modality", modalities)
        radiologist_metrics[radiologist] = df

    distance_metrics = ["hd", "avc"]
    highlight_max_cols = [
        c for c in df.columns 
        if c.endswith("mean") 
        and not any(c.startswith(m) for m in distance_metrics)
    ]
    highlight_min_cols = [
        c for c in df.columns 
        if c.endswith("mean") 
        and any(c.startswith(m) for m in distance_metrics)
    ]

    display(
        radiologist_metrics["Andres"]
        .style
        .hide_index()
        .highlight_max(
            axis='rows', 
            subset=pd.IndexSlice[:, highlight_max_cols]
        )
        .highlight_min(
            axis='rows', 
            subset=pd.IndexSlice[:, highlight_min_cols]
        )
    )

    display(
        radiologist_metrics["Daniel"]
        .style
        .hide_index()
        .highlight_max(
            axis='rows', 
            subset=pd.IndexSlice[:, highlight_max_cols]
        )
        .highlight_min(
            axis='rows', 
            subset=pd.IndexSlice[:, highlight_min_cols]
        )
    )

# MICCAI results

## Models version

Our models come in two versions. The first version is intended to take a single medical image, either ADC or DWI, as input and generate the corresponding segmentation mask. Meanwhile, the second version is designed to process both images as separate inputs using different autoencoders to produce both segmentations. Additionally, the dual models have to versions

Furthermore, we utilized the validation dice score to save the weights of the most effective segmentation model in our experiments. However, we considered two variations of the dice score. The first variant returns a score of 1.0 ($\text{Dice}_{1}$) when the masks do not contain annotations, while the second variant returns a score of 0.0 ($\text{Dice}_{0}$). The following table gives a description of the models configuration.

| Experiment ID | Model type | Input modalities | Dice version      |
|---------------|------------|------------------|-------------------|
| 1             | Dual       | ADC (ADC, DWI)   | $\text{Dice}_{0}$ |
| 2             | Single     | ADC              | $\text{Dice}_{0}$ |
| 3             | Single     | DWI              | $\text{Dice}_{0}$ |
| 4             | Dual       | DWI (ADC, DWI)   | $\text{Dice}_{0}$ |
| 5             | Dual       | ADC (ADC, DWI)   | $\text{Dice}_{1}$ |
| 6             | Single     | ADC              | $\text{Dice}_{1}$ |
| 7             | Single     | DWI              | $\text{Dice}_{1}$ |
| 8             | Dual       | DWI (ADC, DWI)   | $\text{Dice}_{1}$ |
| 9             | Dual       | DWI (ADC, DWI)   | $\text{Dice}_{1}$ |
| 10            | Dual       | ADC (ADC, DWI)   | $\text{Dice}_{1}$ |
| 11            | Dual       | ADC (ADC, DWI)   | $\text{Dice}_{1}$ |
| 12            | Dual       | DWI (ADC, DWI)   | $\text{Dice}_{1}$ |

## Comparison of using $\text{Dice}_{0}$ and $\text{Dice}_{1}$

### Single models

We noticed that the single models benefit from training with $\text{Dice}_{0}$

In [5]:
metrics : List[str] = ["sens", "ppv", "dsc", "hd"]
experiment_ids = [2, 3, 6, 7]
dual = [False, False, False, False]
display_experiments_summary(metrics, experiment_ids, dual)

  radiologist_metrics["Andres"]


experiment_id,modality,dsc_mean,dsc_std,hd_mean,hd_std,ppv_mean,ppv_std,sens_mean,sens_std
2,ADC,0.459082,0.264369,52.671588,30.25718,0.640536,0.310065,0.40572,0.248928
3,DWI,0.421925,0.33936,46.761536,33.404472,0.570016,0.377753,0.401261,0.378909
6,ADC,0.280966,0.302756,46.132255,38.676306,0.663091,0.433127,0.147931,0.180114
7,DWI,0.452838,0.336543,54.617336,32.685463,0.569633,0.366656,0.44548,0.372175


  radiologist_metrics["Daniel"]


experiment_id,modality,dsc_mean,dsc_std,hd_mean,hd_std,ppv_mean,ppv_std,sens_mean,sens_std
2,ADC,0.476205,0.287759,50.118567,27.824417,0.616208,0.354307,0.457968,0.265757
3,DWI,0.438817,0.338502,42.12101,28.476261,0.612175,0.390498,0.366615,0.330547
6,ADC,0.316953,0.303326,36.683323,41.648335,0.603937,0.460374,0.174488,0.186074
7,DWI,0.446615,0.335028,55.538734,31.66486,0.591813,0.37958,0.403888,0.337107


### Dual models

We noticed that the single models benefit from training with $\text{Dice}_{0}$

In [6]:
metrics : List[str] = ["sens", "ppv", "dsc", "hd"]
experiment_ids = [1, 1, 4, 4, 5, 5, 8, 8]
dual = [True] * len(experiment_ids)
display_experiments_summary(metrics, experiment_ids, dual)

  radiologist_metrics["Andres"]


experiment_id,modality,dsc_mean,dsc_std,hd_mean,hd_std,ppv_mean,ppv_std,sens_mean,sens_std
1,"ADC (ADC,DWI)",0.492271,0.264756,63.5886,26.54353,0.624558,0.29118,0.42387,0.279829
1,"DWI (ADC,DWI)",0.397015,0.316324,70.289388,25.593844,0.525778,0.3478,0.40333,0.347325
4,"ADC (ADC,DWI)",0.476037,0.300792,44.724001,31.425763,0.608074,0.339085,0.377072,0.295
4,"DWI (ADC,DWI)",0.452918,0.317758,60.841842,29.587984,0.563451,0.340544,0.464356,0.359223
5,"ADC (ADC,DWI)",0.556649,0.275111,33.506694,34.728872,0.707669,0.282134,0.439417,0.30063
5,"DWI (ADC,DWI)",0.577215,0.303159,36.44768,36.633253,0.614566,0.318229,0.537159,0.35825
8,"ADC (ADC,DWI)",0.522942,0.263527,56.543609,33.10526,0.552678,0.2999,0.558317,0.281477
8,"DWI (ADC,DWI)",0.481837,0.306426,53.798249,28.894996,0.654586,0.325465,0.488636,0.342675


  radiologist_metrics["Daniel"]


experiment_id,modality,dsc_mean,dsc_std,hd_mean,hd_std,ppv_mean,ppv_std,sens_mean,sens_std
1,"ADC (ADC,DWI)",0.520846,0.280229,66.725861,21.551162,0.607877,0.336551,0.482611,0.303707
1,"DWI (ADC,DWI)",0.385102,0.303,71.356217,24.315728,0.547522,0.356124,0.353332,0.297381
4,"ADC (ADC,DWI)",0.516272,0.321968,37.85977,28.767407,0.605085,0.365749,0.4323,0.329305
4,"DWI (ADC,DWI)",0.476516,0.313465,51.432888,24.523623,0.614159,0.360268,0.423381,0.302465
5,"ADC (ADC,DWI)",0.604849,0.287848,32.191056,31.645309,0.686341,0.327118,0.508079,0.315374
5,"DWI (ADC,DWI)",0.610469,0.287898,29.660239,23.95201,0.668145,0.329565,0.512996,0.310846
8,"ADC (ADC,DWI)",0.524704,0.305144,51.400063,29.274546,0.513443,0.343062,0.614901,0.320374
8,"DWI (ADC,DWI)",0.468802,0.297616,51.965884,26.97052,0.663654,0.348078,0.426396,0.285126


## Dual: Cross modality attention vs 

Models optimized for ADC

In [7]:
metrics : List[str] = ["sens", "ppv", "dsc", "hd"]
experiment_ids = [5, 5, 10, 10, 11, 11] #
dual = [True, True, True, True, True, True]
display_experiments_summary(metrics, experiment_ids, dual)

  radiologist_metrics["Andres"]


experiment_id,modality,dsc_mean,dsc_std,hd_mean,hd_std,ppv_mean,ppv_std,sens_mean,sens_std
5,"ADC (ADC,DWI)",0.556649,0.275111,33.506694,34.728872,0.707669,0.282134,0.439417,0.30063
5,"DWI (ADC,DWI)",0.577215,0.303159,36.44768,36.633253,0.614566,0.318229,0.537159,0.35825
10,"ADC (ADC,DWI)",0.508867,0.278616,37.220265,35.560975,0.712127,0.29039,0.427337,0.286575
10,"DWI (ADC,DWI)",0.617605,0.262384,46.688321,37.890868,0.670174,0.285491,0.553247,0.343588
11,"ADC (ADC,DWI)",0.491041,0.288256,55.166986,34.903071,0.679152,0.32097,0.361053,0.269974
11,"DWI (ADC,DWI)",0.518853,0.288332,57.762625,29.252111,0.619414,0.301568,0.557903,0.335698


  radiologist_metrics["Daniel"]


experiment_id,modality,dsc_mean,dsc_std,hd_mean,hd_std,ppv_mean,ppv_std,sens_mean,sens_std
5,"ADC (ADC,DWI)",0.604849,0.287848,32.191056,31.645309,0.686341,0.327118,0.508079,0.315374
5,"DWI (ADC,DWI)",0.610469,0.287898,29.660239,23.95201,0.668145,0.329565,0.512996,0.310846
10,"ADC (ADC,DWI)",0.549001,0.28887,32.321417,28.400737,0.691749,0.333452,0.487531,0.300598
10,"DWI (ADC,DWI)",0.612911,0.278136,40.395655,31.46511,0.711493,0.306295,0.501798,0.311921
11,"ADC (ADC,DWI)",0.525905,0.292803,50.562662,29.158365,0.663806,0.373156,0.408318,0.28216
11,"DWI (ADC,DWI)",0.533641,0.28083,58.413246,26.217845,0.668624,0.303115,0.522324,0.281066


Models optimized for DWI

In [11]:
metrics : List[str] = ["sens", "ppv", "dsc", "hd"]
experiment_ids = [8, 8, 9, 9, 12, 12] #
dual = [True, True, True, True, True, True]
display_experiments_summary(metrics, experiment_ids, dual)

  radiologist_metrics["Andres"]


experiment_id,modality,dsc_mean,dsc_std,hd_mean,hd_std,ppv_mean,ppv_std,sens_mean,sens_std
8,"ADC (ADC,DWI)",0.522942,0.263527,56.543609,33.10526,0.552678,0.2999,0.558317,0.281477
8,"DWI (ADC,DWI)",0.481837,0.306426,53.798249,28.894996,0.654586,0.325465,0.488636,0.342675
9,"ADC (ADC,DWI)",0.470909,0.264246,60.405589,32.743401,0.705121,0.315,0.37144,0.245916
9,"DWI (ADC,DWI)",0.577,0.269405,43.866168,35.027093,0.714753,0.27788,0.480116,0.318408
12,"ADC (ADC,DWI)",0.369931,0.294397,47.654435,34.383532,0.72462,0.333702,0.23204,0.223657
12,"DWI (ADC,DWI)",0.532231,0.303206,50.806814,35.736483,0.675156,0.304787,0.496511,0.341973


  radiologist_metrics["Daniel"]


experiment_id,modality,dsc_mean,dsc_std,hd_mean,hd_std,ppv_mean,ppv_std,sens_mean,sens_std
8,"ADC (ADC,DWI)",0.524704,0.305144,51.400063,29.274546,0.513443,0.343062,0.614901,0.320374
8,"DWI (ADC,DWI)",0.468802,0.297616,51.965884,26.97052,0.663654,0.348078,0.426396,0.285126
9,"ADC (ADC,DWI)",0.492794,0.265705,57.561585,33.315759,0.671838,0.344364,0.409787,0.247836
9,"DWI (ADC,DWI)",0.578876,0.253152,41.946624,32.927266,0.762945,0.267511,0.447128,0.280346
12,"ADC (ADC,DWI)",0.403142,0.301615,42.783245,32.688301,0.716474,0.381993,0.267372,0.241753
12,"DWI (ADC,DWI)",0.540398,0.291417,51.635523,28.981914,0.713299,0.308398,0.449339,0.288485


In [9]:
# display(
#     radiologist_metrics["Andres"]
#     .style
#     .hide_index()
#     .highlight_max(
#         axis='rows', 
#         subset=pd.IndexSlice[:, highlight_max_cols]
#     )
#     .highlight_min(
#         axis='rows', 
#         subset=pd.IndexSlice[:, highlight_min_cols]
#     )
# )

# display(
#     radiologist_metrics["Daniel"]
#     .style
#     .hide_index()
#     .highlight_max(
#         axis='rows', 
#         subset=pd.IndexSlice[:, highlight_max_cols]
#     )
#     .highlight_min(
#         axis='rows', 
#         subset=pd.IndexSlice[:, highlight_min_cols]
#     )
# )

# # display(radiologist_metrics["Andres"])
# # display(radiologist_metrics["Daniel"])

NameError: name 'radiologist_metrics' is not defined

- [ ] Hacer correcciones del paper
- [ ] Cambiar tablas
  - [ ] Calcular las metricas de segmentacion entre radiologos
  - [ ] Poner los resultados que tenemos en tablas en el paper