# DSCagg Calculation Example for HNTS-MRG 2024

This notebook demonstrates how to calculate the evaluation metric (aggregated Dice Similarity coefficient - DSCagg) for [HNTS-MRG 2024 Challenge](https://hntsmrg24.grand-challenge.org/). More information on the evaluation can be found [here](https://hntsmrg24.grand-challenge.org/tasks-and-evaluation/). The evaluation functions are encapsulated in a Docker container image which will be run on the outputs of participants' submitted algorithms. 

This specific example in this notebook uses a subset of masks from the HNTS-MRG 2024 training dataset, available on [Zenodo](https://zenodo.org/records/11199559). We use 20 out of 150 training case masks for this example (just as a proof of concept).

Credit to the HECKTOR 2022 organizers; most of this code is directly based on their [GitHub implementations](https://github.com/voreille/hecktor/blob/master/notebooks/evaluate_segmentation2022.ipynb).

## Table of Contents

1. [Imports](#Imports)
2. [Functions](#Functions)
3. [DSCagg Calculation](#dscagg-calculation)
4. [Extra: Conventional DSC Calculation](#extra-conventional-dsc-calculation)

## Imports

In [None]:
import SimpleITK as sitk
import os
import numpy as np
import json

In [None]:
# Adjust these paths and dataset name as needed
output_json_path = '/homes/nmoradi/test/Dice_summary.json'  
dataset_name = 'midRT_medNext_517'

prediction_folder = '/homes/nmoradi/test/evaluation/predicted_midRT_517_MedNext/predictions_all_hntsmrg_format'
groundtruth_folder = '/homes/nmoradi/test/evaluation/ground_truth_midRT'

## Functions

In [None]:
def compute_volumes(im):
    """
    Compute the volumes of the GTVp and the GTVn
    """
    spacing = im.GetSpacing()
    voxvol = spacing[0] * spacing[1] * spacing[2]
    stats = sitk.LabelStatisticsImageFilter()
    stats.Execute(im, im)
    nvoxels1 = stats.GetCount(1)
    nvoxels2 = stats.GetCount(2)
    return nvoxels1 * voxvol, nvoxels2 * voxvol

def compute_agg_dice(intermediate_results):
    """
    Compute the aggregate dice score from the intermediate results
    """
    aggregate_results = {}
    TP1s = [v["TP1"] for v in intermediate_results]
    TP2s = [v["TP2"] for v in intermediate_results]
    vol_sum1s = [v["vol_sum1"] for v in intermediate_results]
    vol_sum2s = [v["vol_sum2"] for v in intermediate_results]
    DSCagg1 = 2 * np.sum(TP1s) / np.sum(vol_sum1s)
    DSCagg2 = 2 * np.sum(TP2s) / np.sum(vol_sum2s)
    aggregate_results['AggregatedDsc'] = {
        'GTVp': DSCagg1,
        'GTVn': DSCagg2,
        'mean': np.mean((DSCagg1, DSCagg2)),
    }
    return aggregate_results

def get_intermediate_metrics(patient_ID, groundtruth, prediction):
    """
    Compute intermediate metrics for a given groundtruth and prediction.
    These metrics are used to compute the aggregate dice.
    """
    overlap_measures = sitk.LabelOverlapMeasuresImageFilter()
    overlap_measures.SetNumberOfThreads(1)
    overlap_measures.Execute(groundtruth, prediction)

    DSC1 = overlap_measures.GetDiceCoefficient(1)
    DSC2 = overlap_measures.GetDiceCoefficient(2)

    vol_gt1, vol_gt2 = compute_volumes(groundtruth)
    vol_pred1, vol_pred2 = compute_volumes(prediction)

    vol_sum1 = vol_gt1 + vol_pred1
    vol_sum2 = vol_gt2 + vol_pred2
    TP1 = DSC1 * (vol_sum1) / 2
    TP2 = DSC2 * (vol_sum2) / 2
    return {
        "PatientID": patient_ID, # added patient ID so we can pinpoint exact results if needed
        "TP1": TP1,
        "TP2": TP2,
        "vol_sum1": vol_sum1,
        "vol_sum2": vol_sum2,
        "DSC1": DSC1,
        "DSC2": DSC2,
        "vol_gt1": vol_gt1, # needed if you want to exclude empty ground truths in conventional DSC calcs
        "vol_gt2": vol_gt2, 
    }

def resample_prediction(groundtruth, prediction):
    """
    Resample the prediction to the groundtruth physical domain
    """
    resample = sitk.ResampleImageFilter()
    resample.SetSize(groundtruth.GetSize())
    resample.SetOutputDirection(groundtruth.GetDirection())
    resample.SetOutputOrigin(groundtruth.GetOrigin())
    resample.SetOutputSpacing(groundtruth.GetSpacing())
    resample.SetInterpolator(sitk.sitkNearestNeighbor)
    return resample.Execute(prediction) 

def check_prediction(patient_ID, groundtruth, prediction):
    """
    Check if the prediction is valid and apply padding if needed
    """

    # Cast to the same type
    caster = sitk.CastImageFilter()
    caster.SetOutputPixelType(sitk.sitkUInt8)
    caster.SetNumberOfThreads(1)
    groundtruth = caster.Execute(groundtruth)
    prediction = caster.Execute(prediction)

    # Check labels
    stats = sitk.LabelStatisticsImageFilter()
    stats.Execute(prediction, prediction)
    labels = stats.GetLabels()
    if not all([l in [0, 1, 2] for l in labels]):
        raise RuntimeError(
            f"Patient {patient_ID}: Error. The labels are incorrect. The labels should be background: 0, GTVp: 1, GTVn: 2."
        )
    # Check spacings
    if not np.allclose(
            groundtruth.GetSpacing(), prediction.GetSpacing(), atol=0.000001):
        raise RuntimeError(
            f"Patient {patient_ID}: Error. The resolution of the prediction is different from the MRI ground truth resolution."
        )

    # Check if resampling is needed
    needs_resampling = False
    if prediction.GetSize() != groundtruth.GetSize():
        needs_resampling = True
    elif not np.allclose(prediction.GetDirection(), groundtruth.GetDirection(), atol=0.000001):
        needs_resampling = True
    elif not np.allclose(prediction.GetOrigin(), groundtruth.GetOrigin(), atol=0.000001):
        needs_resampling = True

    if needs_resampling:
        print(f"Patient {patient_ID}: Prediction checked, resampling prediction to match ground truth...")
        prediction = resample_prediction(groundtruth, prediction)
    else:
        print(f'Patient {patient_ID}: Prediction checked, everything correct and no resampling needed.')
        # To be sure that sitk won't trigger unnecessary errors
        prediction.SetSpacing(groundtruth.GetSpacing())

    return prediction

## DSCagg Calculation

Remember DSCagg is calculated over the entire set of data so you do not get patient-level datapoints like with conventional volumetric DSC.

Ground truth masks here are the mid-RT masks while the "prediction" masks here are the registered pre-RT masks. 

In [None]:
# first set up the ground truth and prediction paths

prediction_files = [os.path.join(prediction_folder, file) for file in os.listdir(prediction_folder) if "nii.gz" in file]
groundtruth_files = [os.path.join(groundtruth_folder, file) for file in os.listdir(groundtruth_folder) if "nii.gz" in file]

print("Prediction files", prediction_files, "\n")

print("Ground truth files", groundtruth_files)

Prediction files ['/homes/nmoradi/test/evaluation/predicted_midRT_517_MedNext/predictions_all_hntsmrg_format/145_midRT_mask.nii.gz', '/homes/nmoradi/test/evaluation/predicted_midRT_517_MedNext/predictions_all_hntsmrg_format/150_midRT_mask.nii.gz', '/homes/nmoradi/test/evaluation/predicted_midRT_517_MedNext/predictions_all_hntsmrg_format/41_midRT_mask.nii.gz', '/homes/nmoradi/test/evaluation/predicted_midRT_517_MedNext/predictions_all_hntsmrg_format/75_midRT_mask.nii.gz', '/homes/nmoradi/test/evaluation/predicted_midRT_517_MedNext/predictions_all_hntsmrg_format/60_midRT_mask.nii.gz', '/homes/nmoradi/test/evaluation/predicted_midRT_517_MedNext/predictions_all_hntsmrg_format/171_midRT_mask.nii.gz', '/homes/nmoradi/test/evaluation/predicted_midRT_517_MedNext/predictions_all_hntsmrg_format/164_midRT_mask.nii.gz', '/homes/nmoradi/test/evaluation/predicted_midRT_517_MedNext/predictions_all_hntsmrg_format/133_midRT_mask.nii.gz', '/homes/nmoradi/test/evaluation/predicted_midRT_517_MedNext/predi

Please note in the below code some warnings (LabelOverlapMeasuresImageFilter) are thrown because there is no label in the ground truth file and the prediction file. This is not an error and expected behavior for the given setup.

In [5]:
results = list()
for f in prediction_files:
    patient_ID = os.path.split(f)[-1].split('_')[0] # get the patient ID from the path 
    print(patient_ID)
    gt_file = [k for k in groundtruth_files if os.path.split(k)[-1].split('_')[0] == patient_ID][0]

    print(f"Evaluating patient {patient_ID}")

    prediction = sitk.ReadImage(str(f))
    groundtruth = sitk.ReadImage(str(gt_file))
    prediction = check_prediction(patient_ID, groundtruth, prediction) 


    results.append(get_intermediate_metrics(patient_ID, groundtruth, prediction))

145
Evaluating patient 145
Patient 145: Prediction checked, everything correct and no resampling needed.


150
Evaluating patient 150
Patient 150: Prediction checked, everything correct and no resampling needed.
41
Evaluating patient 41
Patient 41: Prediction checked, everything correct and no resampling needed.
75
Evaluating patient 75
Patient 75: Prediction checked, everything correct and no resampling needed.
60
Evaluating patient 60
Patient 60: Prediction checked, everything correct and no resampling needed.
171
Evaluating patient 171
Patient 171: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



164
Evaluating patient 164
Patient 164: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



133
Evaluating patient 133
Patient 133: Prediction checked, everything correct and no resampling needed.
37
Evaluating patient 37
Patient 37: Prediction checked, everything correct and no resampling needed.
6
Evaluating patient 6
Patient 6: Prediction checked, everything correct and no resampling needed.
119
Evaluating patient 119
Patient 119: Prediction checked, everything correct and no resampling needed.
22
Evaluating patient 22
Patient 22: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



190
Evaluating patient 190
Patient 190: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



138
Evaluating patient 138
Patient 138: Prediction checked, everything correct and no resampling needed.
185
Evaluating patient 185
Patient 185: Prediction checked, everything correct and no resampling needed.
107
Evaluating patient 107
Patient 107: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



29
Evaluating patient 29
Patient 29: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



94
Evaluating patient 94
Patient 94: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



112
Evaluating patient 112
Patient 112: Prediction checked, everything correct and no resampling needed.
81
Evaluating patient 81
Patient 81: Prediction checked, everything correct and no resampling needed.
101
Evaluating patient 101
Patient 101: Prediction checked, everything correct and no resampling needed.
114
Evaluating patient 114
Patient 114: Prediction checked, everything correct and no resampling needed.
196
Evaluating patient 196
Patient 196: Prediction checked, everything correct and no resampling needed.
183
Evaluating patient 183
Patient 183: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



10
Evaluating patient 10
Patient 10: Prediction checked, everything correct and no resampling needed.
31
Evaluating patient 31
Patient 31: Prediction checked, everything correct and no resampling needed.
24
Evaluating patient 24
Patient 24: Prediction checked, everything correct and no resampling needed.
99
Evaluating patient 99
Patient 99: Prediction checked, everything correct and no resampling needed.
135
Evaluating patient 135
Patient 135: Prediction checked, everything correct and no resampling needed.
188
Evaluating patient 188
Patient 188: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



177
Evaluating patient 177
Patient 177: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



66
Evaluating patient 66
Patient 66: Prediction checked, everything correct and no resampling needed.
148
Evaluating patient 148
Patient 148: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



47
Evaluating patient 47
Patient 47: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



169
Evaluating patient 169
Patient 169: Prediction checked, everything correct and no resampling needed.
52
Evaluating patient 52
Patient 52: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



156
Evaluating patient 156
Patient 156: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



78
Evaluating patient 78
Patient 78: Prediction checked, everything correct and no resampling needed.
170
Evaluating patient 170
Patient 170: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



165
Evaluating patient 165
Patient 165: Prediction checked, everything correct and no resampling needed.
74
Evaluating patient 74
Patient 74: Prediction checked, everything correct and no resampling needed.
61
Evaluating patient 61
Patient 61: Prediction checked, everything correct and no resampling needed.
55
Evaluating patient 55
Patient 55: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



144
Evaluating patient 144
Patient 144: Prediction checked, everything correct and no resampling needed.
151
Evaluating patient 151
Patient 151: Prediction checked, everything correct and no resampling needed.
95
Evaluating patient 95
Patient 95: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



80
Evaluating patient 80
Patient 80: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



113
Evaluating patient 113
Patient 113: Prediction checked, everything correct and no resampling needed.
191
Evaluating patient 191
Patient 191: Prediction checked, everything correct and no resampling needed.
139
Evaluating patient 139
Patient 139: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



184
Evaluating patient 184
Patient 184: Prediction checked, everything correct and no resampling needed.
17
Evaluating patient 17
Patient 17: Prediction checked, everything correct and no resampling needed.
118
Evaluating patient 118
Patient 118: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



36
Evaluating patient 36
Patient 36: Prediction checked, everything correct and no resampling needed.
23
Evaluating patient 23
Patient 23: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



132
Evaluating patient 132
Patient 132: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



127
Evaluating patient 127
Patient 127: Prediction checked, everything correct and no resampling needed.
30
Evaluating patient 30
Patient 30: Prediction checked, everything correct and no resampling needed.
25
Evaluating patient 25
Patient 25: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



197
Evaluating patient 197
Patient 197: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



11
Evaluating patient 11
Patient 11: Prediction checked, everything correct and no resampling needed.
93
Evaluating patient 93
Patient 93: Prediction checked, everything correct and no resampling needed.
115
Evaluating patient 115
Patient 115: Prediction checked, everything correct and no resampling needed.
86
Evaluating patient 86
Patient 86: Prediction checked, everything correct and no resampling needed.
142
Evaluating patient 142
Patient 142: Prediction checked, everything correct and no resampling needed.
79
Evaluating patient 79
Patient 79: Prediction checked, everything correct and no resampling needed.
157
Evaluating patient 157
Patient 157: Prediction checked, everything correct and no resampling needed.
46
Evaluating patient 46
Patient 46: Prediction checked, everything correct and no resampling needed.
53
Evaluating patient 53
Patient 53: Prediction checked, everything correct and no resampling needed.
201
Evaluating patient 201
Patient 201: Prediction checked, everything co

LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



110
Evaluating patient 110
Patient 110: Prediction checked, everything correct and no resampling needed.
83
Evaluating patient 83
Patient 83: Prediction checked, everything correct and no resampling needed.
105
Evaluating patient 105
Patient 105: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



96
Evaluating patient 96
Patient 96: Prediction checked, everything correct and no resampling needed.
131
Evaluating patient 131
Patient 131: Prediction checked, everything correct and no resampling needed.
20
Evaluating patient 20
Patient 20: Prediction checked, everything correct and no resampling needed.
88
Evaluating patient 88
Patient 88: Prediction checked, everything correct and no resampling needed.
4
Evaluating patient 4
Patient 4: Prediction checked, everything correct and no resampling needed.
159
Evaluating patient 159
Patient 159: Prediction checked, everything correct and no resampling needed.
77
Evaluating patient 77
Patient 77: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



48
Evaluating patient 48
Patient 48: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



166
Evaluating patient 166
Patient 166: Prediction checked, everything correct and no resampling needed.
173
Evaluating patient 173
Patient 173: Prediction checked, everything correct and no resampling needed.
152
Evaluating patient 152
Patient 152: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



69
Evaluating patient 69
Patient 69: Prediction checked, everything correct and no resampling needed.
178
Evaluating patient 178
Patient 178: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



56
Evaluating patient 56
Patient 56: Prediction checked, everything correct and no resampling needed.
50
Evaluating patient 50
Patient 50: Prediction checked, everything correct and no resampling needed.
45
Evaluating patient 45
Patient 45: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



154
Evaluating patient 154
Patient 154: Prediction checked, everything correct and no resampling needed.
141
Evaluating patient 141
Patient 141: Prediction checked, everything correct and no resampling needed.
175
Evaluating patient 175
Patient 175: Prediction checked, everything correct and no resampling needed.
64
Evaluating patient 64
Patient 64: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



71
Evaluating patient 71
Patient 71: Prediction checked, everything correct and no resampling needed.
108
Evaluating patient 108
Patient 108: Prediction checked, everything correct and no resampling needed.
26
Evaluating patient 26
Patient 26: Prediction checked, everything correct and no resampling needed.
33
Evaluating patient 33
Patient 33: Prediction checked, everything correct and no resampling needed.
2
Evaluating patient 2
Patient 2: Prediction checked, everything correct and no resampling needed.
122
Evaluating patient 122
Patient 122: Prediction checked, everything correct and no resampling needed.
116
Evaluating patient 116
Patient 116: Prediction checked, everything correct and no resampling needed.
90
Evaluating patient 90
Patient 90: Prediction checked, everything correct and no resampling needed.
103
Evaluating patient 103
Patient 103: Prediction checked, everything correct and no resampling needed.
181
Evaluating patient 181
Patient 181: Prediction checked, everything co

LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



12
Evaluating patient 12
Patient 12: Prediction checked, everything correct and no resampling needed.
194
Evaluating patient 194
Patient 194: Prediction checked, everything correct and no resampling needed.
129
Evaluating patient 129
Patient 129: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



21
Evaluating patient 21
Patient 21: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



5
Evaluating patient 5
Patient 5: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



34
Evaluating patient 34
Patient 34: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



198
Evaluating patient 198
Patient 198: Prediction checked, everything correct and no resampling needed.
125
Evaluating patient 125
Patient 125: Prediction checked, everything correct and no resampling needed.
130
Evaluating patient 130
Patient 130: Prediction checked, everything correct and no resampling needed.
82
Evaluating patient 82
Patient 82: Prediction checked, everything correct and no resampling needed.
111
Evaluating patient 111
Patient 111: Prediction checked, everything correct and no resampling needed.
104
Evaluating patient 104
Patient 104: Prediction checked, everything correct and no resampling needed.
193
Evaluating patient 193
Patient 193: Prediction checked, everything correct and no resampling needed.
57
Evaluating patient 57
Patient 57: Prediction checked, everything correct and no resampling needed.
179
Evaluating patient 179
Patient 179: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



42
Evaluating patient 42
Patient 42: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



153
Evaluating patient 153
Patient 153: Prediction checked, everything correct and no resampling needed.
146
Evaluating patient 146
Patient 146: Prediction checked, everything correct and no resampling needed.
49
Evaluating patient 49
Patient 49: Prediction checked, everything correct and no resampling needed.
172
Evaluating patient 172
Patient 172: Prediction checked, everything correct and no resampling needed.
63
Evaluating patient 63
Patient 63: Prediction checked, everything correct and no resampling needed.
158
Evaluating patient 158
Patient 158: Prediction checked, everything correct and no resampling needed.
70
Evaluating patient 70
Patient 70: Prediction checked, everything correct and no resampling needed.
161
Evaluating patient 161
Patient 161: Prediction checked, everything correct and no resampling needed.
174
Evaluating patient 174
Patient 174: Prediction checked, everything correct and no resampling needed.
155
Evaluating patient 155
Patient 155: Prediction checked, ever

LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



39
Evaluating patient 39
Patient 39: Prediction checked, everything correct and no resampling needed.
91
Evaluating patient 91
Patient 91: Prediction checked, everything correct and no resampling needed.
136
Evaluating patient 136
Patient 136: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



18
Evaluating patient 18
Patient 18: Prediction checked, everything correct and no resampling needed.


LabelOverlapMeasuresImageFilter (0x56144d43c6e0): Label  not found.



27
Evaluating patient 27
Patient 27: Prediction checked, everything correct and no resampling needed.
109
Evaluating patient 109
Patient 109: Prediction checked, everything correct and no resampling needed.
32
Evaluating patient 32
Patient 32: Prediction checked, everything correct and no resampling needed.
3
Evaluating patient 3
Patient 3: Prediction checked, everything correct and no resampling needed.


Display aggregated DSC metrics. This is what will be used in the challenge evaluation/ranking.

In [None]:
# Display raw results
print("The raw results are:", results, "\n")

# Compute and display aggregate dice scores
agg_dice_scores = compute_agg_dice(results)
print(f"Aggregate dice scores: {agg_dice_scores}\n")

The raw results are: [{'PatientID': '145', 'TP1': 760.0, 'TP2': 2649.5, 'vol_sum1': 3582.5, 'vol_sum2': 7218.0, 'DSC1': 0.424284717376134, 'DSC2': 0.7341368800221668, 'vol_gt1': 2738.0, 'vol_gt2': 3141.0}, {'PatientID': '150', 'TP1': 1093.0, 'TP2': 10777.0, 'vol_sum1': 7511.5, 'vol_sum2': 25254.0, 'DSC1': 0.29102043533249017, 'DSC2': 0.853488556268314, 'vol_gt1': 1998.0, 'vol_gt2': 12366.0}, {'PatientID': '41', 'TP1': 6464.000000000001, 'TP2': 837.0000000000001, 'vol_sum1': 20052.0, 'vol_sum2': 1960.5, 'DSC1': 0.644723718332336, 'DSC2': 0.8538638102524867, 'vol_gt1': 11849.5, 'vol_gt2': 1027.5}, {'PatientID': '75', 'TP1': 5606.500000000001, 'TP2': 2755.5, 'vol_sum1': 13894.5, 'vol_sum2': 6242.0, 'DSC1': 0.807009967972939, 'DSC2': 0.8828900993271387, 'vol_gt1': 6652.5, 'vol_gt2': 3090.5}, {'PatientID': '60', 'TP1': 3236.0, 'TP2': 12814.499999999998, 'vol_sum1': 9403.5, 'vol_sum2': 29053.5, 'DSC1': 0.688254373371617, 'DSC2': 0.8821312406422633, 'vol_gt1': 5589.5, 'vol_gt2': 14794.0}, {'P

## Extra: Conventional DSC Calculation

Since conventional volumetric DSC was also calculated during the DSCagg calculation, we can also display these values as well just for reference. These metrics will not be used in the challenge directly but may be handy to know.

In [None]:
# Extract DSC1 and DSC2 values
DSC1_values = [result["DSC1"] for result in results]
DSC2_values = [result["DSC2"] for result in results]

# Compute and display mean DSC1 and DSC2
mean_DSC1 = np.mean(DSC1_values)
mean_DSC2 = np.mean(DSC2_values)
std_DSC2 = np.std(DSC2_values)
std_DSC1 = np.std(DSC1_values)

print(f"Mean DSC1 (GTVp): {mean_DSC1}")
print(f"Mean DSC2 (GTVn): {mean_DSC2}\n")
print(f"STD DSC1 (GTVp): {std_DSC1}\n")
print(f"STD DSC2 (GTVn): {std_DSC2}\n")

Mean DSC1 (GTVp): 0.41460883182707203
Mean DSC2 (GTVn): 0.676514389776771

STD DSC1 (GTVp): 0.31586184287354396

STD DSC2 (GTVn): 0.3064299744756378



Conventional volumetric DSC may be disproportionately affected by a single false negative/postive result (yielding a DSC of 0). Therefore, it may be more informative to remove instances where the ground truth is empty. The code below removes instances with empty ground truth before computing the mean DSC values.

Alternativley, one could use a smoothing term making the DSC of a case without ground truth equal to 1 if there is no false positive, 0 otherwise (we don't implement this here). Again this would still potentially overly penalize false positives (1 voxel false positive treated the same as 1000 voxel false positive).

In [None]:
# Extract non-zero DSC1 and DSC2 values and print removed patient IDs
DSC1_values_nozeros = []
DSC2_values_nozeros = []
removed_patients_DSC1 = []
removed_patients_DSC2 = []

for result in results:
    patient_id = result["PatientID"]
    if result["vol_gt1"] != 0.0:
        DSC1_values_nozeros.append(result["DSC1"])
    else:
        removed_patients_DSC1.append(patient_id)
    if result["vol_gt2"] != 0.0:
        DSC2_values_nozeros.append(result["DSC2"])
    else:
        removed_patients_DSC2.append(patient_id)

# Print removed patient IDs
print("Removed patient IDs with empty ground truth volumes for DSC1:", removed_patients_DSC1)
print("Removed patient IDs with empty ground truth volumes for DSC2:", removed_patients_DSC2, "\n")

# Compute and display mean non-zero DSC1 and DSC2
mean_DSC1_nozeros = np.mean(DSC1_values_nozeros)
mean_DSC2_nozeros = np.mean(DSC2_values_nozeros)
std_DSC2_nozeros = np.std(DSC2_values_nozeros)
std_DSC1_nozeros = np.std(DSC1_values_nozeros)
print(f"Mean DSC1 (GTVp) without empty ground truth: {mean_DSC1_nozeros}")
print(f"Mean DSC2 (GTVn) without empty ground truth: {mean_DSC2_nozeros}\n")
print(f"STD DSC1 (GTVp) without empty ground truth: {std_DSC1_nozeros}\n")
print(f"STD DSC2 (GTVn) without empty ground truth: {std_DSC2_nozeros}\n")

Removed patient IDs with empty ground truth volumes for DSC1: ['190', '107', '29', '94', '101', '196', '188', '177', '148', '47', '52', '165', '95', '191', '139', '118', '25', '86', '46', '187', '105', '48', '152', '56', '45', '154', '71', '129', '21', '5', '34', '42', '174', '8', '136', '18']
Removed patient IDs with empty ground truth volumes for DSC2: ['171', '164', '22', '29', '183', '156', '170', '55', '80', '23', '132', '25', '197', '20', '77', '178', '64', '181', '21', '179', '42'] 

Mean DSC1 (GTVp) without empty ground truth: 0.5455379366145684
Mean DSC2 (GTVn) without empty ground truth: 0.7866446392753151

STD DSC1 (GTVp) without empty ground truth: 0.24463813393019146

STD DSC2 (GTVn) without empty ground truth: 0.15017288765948106



## Saveing the results

In [None]:
with open(output_json_path, 'r') as file:
    output = json.load(file)

dice_scores = { 
    "Mean DSC1 (GTVp)": mean_DSC1,
    "Mean DSC2 (GTVn)": mean_DSC2, 
    "STD DSC1 (GTVp)": std_DSC1,
    "STD DSC2 (GTVn)": std_DSC2,
    "Mean DSC1 (GTVp) without empty ground truth": mean_DSC1_nozeros,
    "Mean DSC2 (GTVn) without empty ground truth": mean_DSC2_nozeros,
    "STD DSC1 (GTVp) without empty ground truth": std_DSC1_nozeros,
    "STD DSC2 (GTVn) without empty ground truth": std_DSC2_nozeros
}
dice_scores.update(agg_dice_scores)


In [None]:

new_data = {
    dataset_name : dice_scores
}

output.update(new_data)
with open(output_json_path, 'w') as outfile:
    json.dump(output, outfile, indent=4)