In [12]:
%%capture
%pip install seaborn

In [13]:
from collections import defaultdict
import logging
logging.basicConfig(level=logging.INFO)

import matplotlib.pyplot as plt
import numpy as np
import json
import seaborn as sns

In [14]:
def load_single_results(data_path):
    with open(data_path, 'r') as f:
        data = json.load(f)
    return data

In [15]:


def get_dice_for_keys(dices: dict, counts: dict,
                      datasets:list = 'all', modalities:list = 'all', tasks:list = 'all'):
    # both dices and counts are nested dictionaries:
    # dataset -> modality -> task : dice score / count of samples found
    datasets = [data.lower() for data in datasets] if datasets != 'all' else datasets
    modalities = [mod.lower() for mod in modalities] if modalities != 'all' else modalities
    tasks = [task.lower() for task in tasks] if tasks != 'all' else tasks
    all_levels = []
    data_level = defaultdict(list)
    modality_level = defaultdict(list)
    task_level = defaultdict(list)
    for data_key, mod_data in dices.items():
        for modality, task_data in mod_data.items():
            for task, dice in task_data.items():
                if ((datasets == 'all' or data_key in datasets) and
                    (modalities == 'all' or modality in modalities) and
                    (tasks == 'all' or task.lower() in tasks)):
                    dice_score = dice
                    count = counts[data_key][modality][task]
                    logging.debug('using info from dataset {} modality {} task {}: dice {} count {}'.format(data_key, modality, task, dice_score, count))
                    all_levels.append((dice_score, count))
                    data_level[data_key].append((dice_score, count))
                    modality_level[modality].append((dice_score, count))
                    task_level[task].append((dice_score, count))
    return all_levels, data_level, modality_level, task_level

def compute_avg_dice(dice_count_list: list[tuple]):
    total_dice = 0
    total_count = 0
    for dice, count in dice_count_list:
        total_dice += dice * count
        total_count += count
    return total_dice / total_count

In [16]:
# DEMO
data_path = 'results_12.json'
data = load_single_results(data_path)['12']

all_levels, data_level, modality_level, task_level = get_dice_for_keys(
    data['per_dataset_modality_task_dice'], data['per_dataset_modality_task_counts'])
print('average dice for all levels: ', compute_avg_dice(all_levels))
for dataset, dices in data_level.items():
    print('average dice for dataset {}: {}'.format(dataset, compute_avg_dice(dices)))
for modality, dices in modality_level.items():
    print('average dice for modality {}: {}'.format(modality, compute_avg_dice(dices)))
for task, dices in task_level.items():
    print('average dice for task {}: {}'.format(task, compute_avg_dice(dices)))
print('______________________________________________________________________')


average dice for all levels:  0.20720262315052634
average dice for dataset chaos_mrct_42_1: 0.20720262315052634
average dice for modality CT: 0.32119057575861615
average dice for modality MRI: 0.1730062373680994
average dice for task spleen: 0.02570357918739319
average dice for task right kidney: 0.014934579531351725
average dice for task Liver: 0.4325263053178787
average dice for task Left kidney: 0.0011326860403642058
______________________________________________________________________


In [18]:
# BASELINE RESULTS
data_path = '../results/results_baseline.json'
baseline_data = load_single_results(data_path)

all_levels, data_level, modality_level, task_level = get_dice_for_keys(
    baseline_data['per_dataset_modality_task_dice'], baseline_data['per_dataset_modality_task_counts'])

print('Baseline Dice scores')
print('dice:', compute_avg_dice(all_levels))
print()
for dataset, dices in data_level.items():
    print('dataset {}: {}'.format(dataset, compute_avg_dice(dices)))
print()
for modality, dices in modality_level.items():
    print('modality {}: {}'.format(modality, compute_avg_dice(dices)))
print()
for task, dices in task_level.items():
    print('task {}: {}'.format(task, compute_avg_dice(dices)))

Baseline Dice scores
dice: 0.6295535446077153

dataset 0000 Dataset: 0.9618050456047058
dataset 0001 Dataset: 0.583496676882108
dataset 0020 Dataset: 0.7776888310909271
dataset 0008 Dataset: 0.8123207688331604

modality CT: 0.6295535446077153

task liver: 0.9549267368931924
task OpticChiasm: 0.1755075752735138
task Parotid_R: 0.7951778769493103
task Lips: 0.6662173271179199
task BuccalMucosa: 0.6360491514205933
task Glnd_Lacrimal_R: 0.4904055595397949
task Cricopharyngeus: 0.5694500803947449
task Bone_Mandible: 0.9151379466056824
task Esophagus_S: 0.6004030108451843
task Eye_PR: 0.8163533806800842
task Arytenoid: 0.132841095328331
task Glnd_Lacrimal_L: 0.3634571433067322
task OpticNrv_R: 0.3447383642196655
task Cochlea_L: 0.2100144475698471
task Eye_AR: 0.5127241611480713
task Glnd_Submand_L: 0.7903439402580261
task Glnd_Thyroid: 0.7892332077026367
task Larynx_SG: 0.6949116587638855
task Cavity_Oral: 0.8654983043670654
task Cochlea_R: 0.26442086696624756
task Glottis: 0.452264666557312