In [1]:
import pandas as pd
import scipy, researchpy
from researchpy import ttest
import os, glob
import numpy as np
import json
import mpu.io
from glob2 import glob

In [9]:
path_to_project = 'path_to/brain-mri-processing-pipeline'

In [3]:
def ttest_pair(metric, df_1, df_2, name_1, name_2, correction = None):
    return ttest(df_1[metric], df_2[metric], group1_name = name_1,
                 group2_name= name_2, equal_variances=False, paired=True, correction = 8).iloc[4].values[1]

def make_df_fold(folder, dataset, exp, list_fold_0, list_fold_1, list_fold_2):
    """ for most of experiments name of experiment `exp` should include `_f`
    """
    _columns = ['Ids','Dice_1', 'Dice_2', 'Dice_3',
                'Sensitivity_1', 'Sensitivity_2', 'Sensitivity_3',
               'Specificity_1', 'Specificity_2', 'Specificity_3',
               'Surface_dice_1', 'Surface_dice_2', 'Surface_dice_3',
                'Hausdorff95_1', 'Hausdorff95_2', 'Hausdorff95_3',
               'Precision_1', 'Precision_2', 'Precision_3']
    metrics_all = pd.DataFrame(columns = _columns)
    for i,sub in enumerate(os.listdir(folder)):
        if sub in list_fold_0:
            fold=0
            exp_f = f'{exp}_fold_{fold}'
            path_json = glob('{}/{}/{}_epoch*_fold_{}_tta.json'.format(folder, sub, exp, fold))
        elif sub in list_fold_1:
            fold=1
            exp_f = f'{exp}_fold_2'
            path_json = glob('{}/{}/{}_epoch*_fold_{}_tta.json'.format(folder, sub, exp, fold))
        elif sub in list_fold_2:
            fold=2
            exp_f = f'{exp}_fold_{fold}'
            path_json = glob('{}/{}/{}_epoch*_fold_{}_tta.json'.format(folder, sub, exp, fold))   
        print(path_json)    
        assert(len(path_json) != 0)
        metrics = mpu.io.read(path_json[0])
        metrics = metrics[dataset][exp][f'fold={fold}']
        for each in metrics:
                    metrics_all.at[i,'Ids'] = sub
                    if each != 'Ids':
                        metrics_all.at[i,each] = np.round(metrics[each],3)

    return metrics_all

def make_df_fold_300(folder, dataset, exp, list_fold_0, list_fold_1, list_fold_2):
    """ for most of experiments name of experiment `exp` should include `_f`
    """
    _columns = ['Ids','Dice_1', 'Dice_2', 'Dice_3',
                'Sensitivity_1', 'Sensitivity_2', 'Sensitivity_3',
               'Specificity_1', 'Specificity_2', 'Specificity_3',
               'Surface_dice_1', 'Surface_dice_2', 'Surface_dice_3',
                'Hausdorff95_1', 'Hausdorff95_2', 'Hausdorff95_3',
               'Precision_1', 'Precision_2', 'Precision_3']
    metrics_all = pd.DataFrame(columns = _columns)
    print(exp)
    for i,sub in enumerate(os.listdir(folder)):
        if sub in list_fold_0:
            fold=0
            exp_f = f'{exp}_fold_{fold}'
            path_json = glob('{}/{}/{}_best_epoch*_fold_{}_tta.json'.format(folder, sub, exp, fold))
        elif sub in list_fold_1:
            fold=1
            exp_f=f'{exp}_fold_{fold}'
            path_json = glob('{}/{}/{}_best_epoch*_fold_{}_tta.json'.format(folder, sub, exp, fold))
        elif sub in list_fold_2:
            fold=2
            exp_f=f'{exp}_fold_{fold}'
            path_json = glob('{}/{}/{}_best_epoch*_fold_{}_tta.json'.format(folder, sub, exp, fold))   
        print(path_json)    
        assert(len(path_json) != 0)
        metrics = mpu.io.read(path_json[0])
        metrics = metrics[dataset][exp][f'fold={fold}']
        for each in metrics:
                    metrics_all.at[i,'Ids'] = sub
                    if each != 'Ids':
                        metrics_all.at[i,each] = np.round(metrics[each],3)

    return metrics_all


def make_main_table(folder, dataset, fixed, moving, metrics, list_fold_0, list_fold_1, list_fold_2):
    result_1 = pd.DataFrame()
    result_2 = pd.DataFrame()
    result_3 = pd.DataFrame()
    result = pd.DataFrame()
    df_1 = make_df_fold(folder,dataset, fixed, list_fold_0, list_fold_1, list_fold_2)
    
    for k, exp in enumerate(moving):
        df_2 = make_df_fold_300(folder,dataset, exp, list_fold_0, list_fold_1, list_fold_2)
        for metric in metrics:
            print(metric)
            result = result.append({'Experiment': fixed,
                                     f'{metric}_Mean': np.round(df_1[metric].mean(),3),
                                     f'{metric}_STD': np.round(df_1[metric].std(),3)}, 
                                   ignore_index = True)
            try:
                p_value = ttest_pair(metric, df_1, df_2, fixed, exp, correction = len(moving))
            except:
                continue
            if '1' in metric:
                result_1.at[k,'Experiment'] = exp
                result_1.at[k, f'{metric}_Mean'] = np.round(df_2[metric].mean(),3)
                result_1.at[k, f'{metric}_STD'] = np.round(df_2[metric].std(),3)
                result_1.at[k, f'{metric}_p_value'] = np.round(p_value,3)
                
            elif '2' in metric:
                result_2.at[k,'Experiment'] = exp
                result_2.at[k, f'{metric}_Mean'] = np.round(df_2[metric].mean(),3)
                result_2.at[k, f'{metric}_STD'] = np.round(df_2[metric].std(),3)
                result_2.at[k, f'{metric}_p_value'] = np.round(p_value,3)
            elif '3' in metric:
                result_3.at[k,'Experiment'] = exp
                result_3.at[k, f'{metric}_Mean'] = np.round(df_2[metric].mean(),3)
                result_3.at[k, f'{metric}_STD'] = np.round(df_2[metric].std(),3)
                result_3.at[k, f'{metric}_p_value'] = np.round(p_value,3)

    return result_1, result_2, result_3, result

In [7]:
folder = '/results/metrics/gbm' 
fixed = 'gbm_4a_resamp_300'
gbm_seed = mpu.io.read(os.path.join(path_to_project + '/nnUNet/metrics/gbm_seed.json'))
moving = ['gbm_2a_interp_300']
dataset = 'gbm'

df_stat_1, df_stat_2, df_stat_3, result  = make_main_table(folder, dataset, fixed, moving,
                ['Dice_1', 'Dice_2', 'Dice_3',
                'Surface_dice_1', 'Surface_dice_2', 'Surface_dice_3',
                'Hausdorff95_1', 'Hausdorff95_2', 'Hausdorff95_3',
                'Sensitivity_1', 'Sensitivity_2', 'Sensitivity_3',
               'Specificity_1', 'Specificity_2', 'Specificity_3',
               'Precision_1','Precision_2', 'Precision_3'
                ], gbm_seed['fold_0'], gbm_seed['fold_1'], gbm_seed['fold_2'])

In [154]:
out_json = '/results/metrics/gbm/'
dataset = 'gbm'
task = 'gbm_4a_resamp'
fold = 'fold_0'
_columns = ['Ids','Dice_1', 'Dice_2', 'Dice_3',
                'Hausdorff95_1', 'Hausdorff95_2', 'Hausdorff95_3',
                'Sensitivity_1', 'Sensitivity_2', 'Sensitivity_3',
               'Specificity_1', 'Specificity_2', 'Specificity_3',
               'Surface_dice_1', 'Surface_dice_2', 'Surface_dice_3',
               'Precision_1', 'Precision_2', 'Precision_3']
metrics_all = pd.DataFrame(columns = _columns)
for i,sub in tqdm((enumerate(os.listdir(out_json)))):
    for sub_t in os.listdir(os.path.join(out_json, sub)):
        if f'{task}_epoch' in sub_t:
            if fold in sub_t:
                metrics = json.load(open(os.path.join(out_json, sub, sub_t)))
                metrics = metrics[dataset][task][fold.replace('_', '=')]
                for each in _columns:
                    metrics_all.at[i,'Ids'] = sub
                    if each != 'Ids':
                        metrics_all.at[i,each] = np.round(metrics[each],3)
                    

102it [00:00, 193.66it/s]


In [159]:
metrics_all.sort_values('Dice_1', ascending=False)

Unnamed: 0,Ids,Dice_1,Dice_2,Dice_3,Hausdorff95_1,Hausdorff95_2,Hausdorff95_3,Sensitivity_1,Sensitivity_2,Sensitivity_3,Specificity_1,Specificity_2,Specificity_3,Surface_dice_1,Surface_dice_2,Surface_dice_3,Precision_1,Precision_2,Precision_3
40,TCGA-02-0085,0.968,0.924,0.9,0.781,6.5,3.906,0.959,0.883,0.865,1.0,1.0,1.0,0.97,0.883,0.914,0.978,0.969,0.94
54,TCGA-12-1098,0.962,0.908,0.905,1.953,1.953,2.0,0.961,0.841,0.83,1.0,1.0,1.0,0.929,0.858,0.858,0.963,0.987,0.996
68,TCGA-08-0512,0.957,0.948,0.923,1.811,1.5,1.5,0.935,0.93,0.877,1.0,1.0,1.0,0.692,0.729,0.718,0.981,0.966,0.975
75,TCGA-02-0106,0.956,0.935,0.89,1.562,2.471,2.344,0.927,0.882,0.815,1.0,1.0,1.0,0.935,0.857,0.904,0.987,0.996,0.981
26,TCGA-12-1598,0.955,0.885,0.825,2.009,6.289,4.581,0.954,0.817,0.717,0.999,1.0,1.0,0.923,0.771,0.787,0.956,0.966,0.972
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13,TCGA-19-5960,0.676,0.603,0.473,15.544,22.636,21.524,0.592,0.528,0.338,0.996,0.997,0.999,0.291,0.241,0.311,0.789,0.702,0.786
30,TCGA-08-0390,0.632,0.699,0.626,21.418,16.95,15.366,0.473,0.581,0.514,1.0,1.0,1.0,0.055,0.364,0.464,0.954,0.878,0.799
77,TCGA-19-5958,0.587,0.717,0.225,16.643,9.434,8.246,0.55,0.64,0.182,0.996,0.999,0.999,0.221,0.254,0.415,0.629,0.814,0.296
22,TCGA-06-0119,0.53,0.764,0.589,54.43,9.233,7.526,0.522,0.731,0.543,0.993,0.999,0.998,0.285,0.428,0.514,0.539,0.8,0.643
