In [584]:
import pandas as pd
import scipy, researchpy
from researchpy import ttest
import os
from glob2 import glob
import numpy as np
import json
import mpu.io

In [670]:
gbm_seed = mpu.io.read('/home/polina/DeepLearningExamples/PyTorch/Segmentation/nnUNet/metrics/gbm_seed.json')
def ttest_pair(metric, df_1, df_2, name_1, name_2, correction = None):
    return ttest(df_1[metric], df_2[metric], group1_name = name_1,
                 group2_name= name_2, equal_variances=False, paired=True, correction = 8).iloc[4].values[1]

def make_df_fold(folder, dataset, exp, list_fold_0, list_fold_1, list_fold_2):
    """ for most of experiments name of experiment `exp` should include `_f`
    """
    _columns = ['Ids','Dice_1', 
                'Sensitivity_1',
               'Specificity_1', 
               'Surface_dice_1', 
                'Hausdorff95_1', 
               'Precision_1']
    metrics_all = pd.DataFrame(columns = _columns)
    print(exp)
    for i,sub in enumerate(os.listdir(folder)):
          
        if sub in list_fold_0:
            fold=0
#             print('{}/{}/*{}_epoch*_fold_{}_tta.json'.format(folder, sub, exp, fold))
            exp_f=f'{exp}_fold_{fold}'
            path_json = glob('{}/{}/{}_epoch*_fold_{}_tta.json'.format(folder, sub, exp, fold))
        elif sub in list_fold_1:
            fold=1
            exp_f=f'{exp}_fold_{fold}'
            path_json = glob('{}/{}/{}_epoch_*_fold_{}_tta.json'.format(folder, sub, exp, fold))
        elif sub in list_fold_2:
            fold=2
            exp_f=f'{exp}_fold_{fold}'
            path_json = glob('{}/{}/{}_epoch*_fold_{}_tta.json'.format(folder, sub, exp, fold))   
        print(path_json)    
        assert(len(path_json) != 0)
        metrics = mpu.io.read(path_json[0])
#         print(metrics)
        metrics = metrics[dataset][exp][f'fold={fold}']
        for each in metrics:
                    metrics_all.at[i,'Ids'] = sub
                    if each != 'Ids':
                        metrics_all.at[i,each] = np.round(metrics[each],3)

    return metrics_all

def make_df_fold_300(folder, dataset, exp, list_fold_0, list_fold_1, list_fold_2):
    """ for most of experiments name of experiment `exp` should include `_f`
    """
    _columns = ['Ids','Dice_1', 
                'Sensitivity_1',
               'Specificity_1', 
               'Surface_dice_1', 
                'Hausdorff95_1', 
               'Precision_1']
    metrics_all = pd.DataFrame(columns = _columns)
    print(exp)
    for i,sub in enumerate(os.listdir(folder)):
#         print(sub)
        if sub in list_fold_0:
            fold=0
            exp_f = f'{exp}_fold_{fold}'
#             print('{}/{}/{}_best_epoch_*_fold_{}_tta.json'.format(folder, sub, exp, fold))
            path_json = glob('{}/{}/{}_best_epoch_*_fold_{}_tta.json'.format(folder, sub, exp, fold))
        elif sub in list_fold_1:
            fold=1
            exp_f = f'{exp}_fold_{fold}_from_gbm'
            path_json = glob('{}/{}/{}_best_epoch_*_fold_{}_tta.json'.format(folder, sub, exp, fold))
        elif sub in list_fold_2:
            fold=2
            exp_f = f'{exp}_fold_{fold}_from_gbm'
            path_json = glob('{}/{}/{}_best_epoch_*_fold_{}_tta.json'.format(folder, sub, exp, fold))   
        print(path_json)    
        assert(len(path_json) != 0)
        metrics = mpu.io.read(path_json[0])
#         print(metrics)
        metrics = metrics[dataset][exp][f'fold={fold}']
        for each in metrics:
                    metrics_all.at[i,'Ids'] = sub
                    if each != 'Ids':
                        metrics_all.at[i,each] = np.round(metrics[each],3)

    return metrics_all
    
def make_main_table(folder, dataset, fixed, moving, metrics, list_fold):
    list_fold_0=list_fold['fold_0']
    list_fold_1=list_fold['fold_1']
    list_fold_2=list_fold['fold_2']
    result = pd.DataFrame()
    result_1 = pd.DataFrame()
    df_1 = make_df_fold(folder,dataset, fixed, list_fold_0, list_fold_1, list_fold_2)
#     print(df_1)
    for k, exp in enumerate(moving):
        df_2 = make_df_fold_300(folder,dataset, exp, list_fold_0, list_fold_1, list_fold_2)
#         for metric in [ 'Precision_1', 'Precision_2', 'Precision_3']:
#             print(np.round(df_2[metric].mean()))
#             print(np.round(df_2[metric].std()))
        for metric in metrics:
            print(metric)
            result = result.append({'Experiment': fixed,
                                     f'{metric}_Mean': np.round(df_1[metric].mean(),3),
                                     f'{metric}_STD': np.round(df_1[metric].std(),3)}, 
                                   ignore_index = True)
            try:
                p_value = ttest_pair(metric, df_1, df_2, fixed, exp, correction = len(moving))
            except:
                continue
            result_1.at[k,'Experiment'] = exp
            result_1.at[k, f'{metric}_Mean'] = np.round(df_2[metric].mean(),3)
            result_1.at[k, f'{metric}_STD'] = np.round(df_2[metric].std(),3)
            result_1.at[k, f'{metric}_p_value'] = np.round(p_value,3)

#             result.at[k, f'{metric}_criterion'] = (p_value < 0.050)
    return result_1, result

In [628]:
dataset = 'gbm'
folder = '/results/metrics/gbm'
fixed = 'gbm_4b_n4'
seed = mpu.io.read(f'/home/polina/DeepLearningExamples/PyTorch/Segmentation/nnUNet/metrics/{dataset}_seed.json')
# moving = ['bgpd_4a_resamp_300', 'bgpd_4a_resamp', bgpd_3a_atlas', 'bgpd_5_ss_shared', 'bgpd_1_reg', 'bgpd_2a_interp', 'bgpd_4b_n4', 'bgpd_4d_susan']
moving = ['gbm_4b_n4_from_bgpd_TL_last_bgpd']
dataset = 'gbm'

df_stat_1, result  = make_main_table(folder, dataset, fixed, moving,
                ['Dice_1', 
                'Surface_dice_1', 
                'Hausdorff95_1', 
                'Sensitivity_1', 
               'Specificity_1',
#             ], seed)
               'Precision_1'], seed)

gbm_4b_n4
['/results/metrics/gbm/TCGA-02-0033/gbm_4b_n4_epoch_82-dice_mean_75_38_task_46_fold_0_tta.json']
['/results/metrics/gbm/TCGA-14-1825/gbm_4b_n4_epoch_82-dice_mean_75_38_task_46_fold_0_tta.json']
['/results/metrics/gbm/TCGA-06-0188/gbm_4b_n4_epoch_82-dice_mean_75_38_task_46_fold_0_tta.json']
['/results/metrics/gbm/TCGA-02-0046/gbm_4b_n4_epoch_89-dice_mean_73_95_task_46_fold_2_tta.json']
['/results/metrics/gbm/TCGA-12-1094/gbm_4b_n4_epoch_89-dice_mean_73_95_task_46_fold_2_tta.json']
['/results/metrics/gbm/TCGA-76-6663/gbm_4b_n4_epoch_89-dice_mean_73_95_task_46_fold_2_tta.json']
['/results/metrics/gbm/TCGA-06-0182/gbm_4b_n4_epoch_89-dice_mean_73_95_task_46_fold_2_tta.json']
['/results/metrics/gbm/TCGA-06-0164/gbm_4b_n4_epoch_89-dice_mean_73_95_task_46_fold_2_tta.json']
['/results/metrics/gbm/TCGA-02-0011/gbm_4b_n4_epoch_82-dice_mean_75_38_task_46_fold_0_tta.json']
['/results/metrics/gbm/TCGA-19-2631/gbm_4b_n4_epoch_86-dice_mean_70_25_task_46_fold_1_tta.json']
['/results/metrics/g

In [629]:
df_stat_1.mean()

Dice_1_Mean                0.850
Dice_1_STD                 0.113
Dice_1_p_value             0.000
Surface_dice_1_Mean        0.644
Surface_dice_1_STD         0.210
Surface_dice_1_p_value     0.000
Hausdorff95_1_Mean        12.524
Hausdorff95_1_STD         18.518
Hausdorff95_1_p_value      0.023
Sensitivity_1_Mean         0.818
Sensitivity_1_STD          0.144
Sensitivity_1_p_value      0.000
Specificity_1_Mean         0.999
Specificity_1_STD          0.002
Specificity_1_p_value      0.008
Precision_1_Mean           0.903
Precision_1_STD            0.111
Precision_1_p_value        0.012
dtype: float64

In [613]:
result.mean()

Dice_1_Mean             0.850
Dice_1_STD              0.116
Surface_dice_1_Mean     0.654
Surface_dice_1_STD      0.205
Hausdorff95_1_Mean     13.096
Hausdorff95_1_STD      19.665
Sensitivity_1_Mean      0.812
Sensitivity_1_STD       0.146
Specificity_1_Mean      0.999
Specificity_1_STD       0.002
Precision_1_Mean        0.911
Precision_1_STD         0.112
dtype: float64

In [660]:
!cd /results/metrics/bgpd/1019_18 && ls -l

total 620
-rw-r--r-- 1 root root 269 Sep  4 11:15 bgpd_1_reg_best_epoch_250-dice_mean_64_50_task_53_fold_1_tta.json
-rw-r--r-- 1 root root 271 Sep  4 11:49 bgpd_1_reg_best_epoch_282-dice_mean_71_47_task_53_fold_2_tta.json
-rw-r--r-- 1 root root 270 Sep  4 10:38 bgpd_1_reg_best_epoch_299-dice_mean_67_61_task_53_fold_0_tta.json
-rw-r--r-- 1 root root 296 Aug  8 12:59 bgpd_1_reg_best_epoch_95-dice_mean_63_35_task_53_fold_2_tta.json
-rw-r--r-- 1 root root 298 Aug 15 14:47 bgpd_1_reg_epoch_299-dice_mean_67_61_task_53_fold_0_tta.json
-rw-r--r-- 1 root root 299 Aug 10 12:34 bgpd_1_reg_epoch_81-dice_mean_64_83_task_53_fold_0_tta.json
-rw-r--r-- 1 root root 296 Aug 10 12:58 bgpd_1_reg_epoch_87-dice_mean_62_16_task_53_fold_1_tta.json
-rw-r--r-- 1 root root 296 Aug 11 17:57 bgpd_1_reg_epoch_95-dice_mean_63_35_task_53_fold_2_tta.json
-rw-r--r-- 1 root root 277 Sep 29 12:00 bgpd_1_reg_from_gbm_best_epoch_52-dice_mean_63_89_task_53_fold_0_tta.json
-rw-r--r-- 1 root root 278 Sep 29 17:05 bgpd_1_reg_f

In [671]:
dataset = 'bgpd'
folder = '/results/metrics/bgpd'
fixed = 'bgpd_3a_atlas'
seed = mpu.io.read(f'/home/polina/DeepLearningExamples/PyTorch/Segmentation/nnUNet/metrics/{dataset}_seed.json')
# moving = ['bgpd_4a_resamp_300', 'bgpd_4a_resamp', bgpd_3a_atlas', 'bgpd_5_ss_shared', 'bgpd_1_reg', 'bgpd_2a_interp', 'bgpd_4b_n4', 'bgpd_4d_susan']
moving = ['bgpd_3a_atlas_from_brats_TL']
dataset = 'bgpd'

df_stat_1, result  = make_main_table(folder, dataset, fixed, moving,
                ['Dice_1', 
                'Surface_dice_1', 
                'Hausdorff95_1', 
                'Sensitivity_1', 
               'Specificity_1',
#             ], seed)
               'Precision_1'], seed) 

bgpd_3a_atlas
['/results/metrics/bgpd/1043_18_4/bgpd_3a_atlas_epoch_99-dice_mean_70_04_task_50_fold_2_tta.json']
['/results/metrics/bgpd/1770_18_4/bgpd_3a_atlas_epoch_99-dice_mean_70_04_task_50_fold_2_tta.json']
['/results/metrics/bgpd/Patient_2914/bgpd_3a_atlas_epoch_99-dice_mean_70_59_task_50_fold_0_tta.json']
['/results/metrics/bgpd/Patient_92114/bgpd_3a_atlas_epoch_99-dice_mean_70_59_task_50_fold_0_tta.json']
['/results/metrics/bgpd/349_18_4/bgpd_3a_atlas_epoch_99-dice_mean_70_04_task_50_fold_2_tta.json']
['/results/metrics/bgpd/1170_18_4/bgpd_3a_atlas_epoch_99-dice_mean_70_04_task_50_fold_2_tta.json']
['/results/metrics/bgpd/Patient_109017/bgpd_3a_atlas_epoch_99-dice_mean_70_04_task_50_fold_2_tta.json']
['/results/metrics/bgpd/1795_18_/bgpd_3a_atlas_epoch_99-dice_mean_70_59_task_50_fold_0_tta.json']
['/results/metrics/bgpd/Patient_62315/bgpd_3a_atlas_epoch_99-dice_mean_70_59_task_50_fold_0_tta.json']
['/results/metrics/bgpd/Patient_140316/bgpd_3a_atlas_epoch_99-dice_mean_70_04_tas

In [673]:
df_stat_1.mean()

Dice_1_Mean               0.744
Dice_1_STD                0.204
Dice_1_p_value            0.000
Surface_dice_1_Mean       0.441
Surface_dice_1_STD        0.192
Surface_dice_1_p_value    0.000
Hausdorff95_1_Mean          inf
Hausdorff95_1_STD           NaN
Hausdorff95_1_p_value     0.000
Sensitivity_1_Mean        0.755
Sensitivity_1_STD         0.237
Sensitivity_1_p_value     0.777
Specificity_1_Mean        0.998
Specificity_1_STD         0.003
Specificity_1_p_value     0.000
dtype: float64

In [635]:
result.mean()

Dice_1_Mean             0.706
Dice_1_STD              0.194
Surface_dice_1_Mean     0.378
Surface_dice_1_STD      0.181
Hausdorff95_1_Mean     39.124
Hausdorff95_1_STD      49.134
Sensitivity_1_Mean      0.733
Sensitivity_1_STD       0.228
Specificity_1_Mean      0.998
Specificity_1_STD       0.003
Precision_1_Mean        0.744
Precision_1_STD         0.226
dtype: float64

## Filtration metriks

In [414]:
all_exp = glob('/results/metrics/gbm/EGD*')


In [415]:
import shutil
for each in all_exp:
#     os.remove(each)
    name= each.split('/')[-1]
    shutil.move(each, f'/results/metrics/egd/{name}')

## Difference between experiments with gbm and gbm TL

In [502]:
def make_diff(folder, dataset, fixed, moving, metrics, list_fold):
    list_fold_0=list_fold['fold_0']
    list_fold_1=list_fold['fold_1']
    list_fold_2=list_fold['fold_2']
    result = pd.DataFrame()
    df_1 = make_df_fold(folder,dataset, fixed, list_fold_0, list_fold_1, list_fold_2)
    df_2 = make_df_fold_300(folder,dataset, moving, list_fold_0, list_fold_1, list_fold_2)
    assert(len(df_1) == len(df_2))
#         for metric in [ 'Precision_1', 'Precision_2', 'Precision_3']:
#             print(np.round(df_2[metric].mean()))
#             print(np.round(df_2[metric].std()))
    result =  pd.concat([df_1, df_2], axis=1)
    for metric in metrics:
        result[f'Diff_{metric}']=(df_1[metric]-df_2[metric])
        
    return result

In [630]:
dataset = 'gbm'
folder = '/results/metrics/gbm'
fixed = 'gbm_4b_n4'
seed = mpu.io.read(f'/home/polina/DeepLearningExamples/PyTorch/Segmentation/nnUNet/metrics/{dataset}_seed.json')
# moving = ['bgpd_4a_resamp_300', 'bgpd_4a_resamp', bgpd_3a_atlas', 'bgpd_5_ss_shared', 'bgpd_1_reg', 'bgpd_2a_interp', 'bgpd_4b_n4', 'bgpd_4d_susan']
moving = 'gbm_4b_n4_from_bgpd_TL_last_bgpd'
dataset = 'gbm'

diff  = make_diff(folder, dataset, fixed, moving,
                ['Dice_1', 
                'Surface_dice_1', 
                'Hausdorff95_1', 
                'Sensitivity_1', 
               'Specificity_1',
#             ], seed)
               'Precision_1'], seed)

gbm_4b_n4
['/results/metrics/gbm/TCGA-02-0033/gbm_4b_n4_epoch_82-dice_mean_75_38_task_46_fold_0_tta.json']
['/results/metrics/gbm/TCGA-14-1825/gbm_4b_n4_epoch_82-dice_mean_75_38_task_46_fold_0_tta.json']
['/results/metrics/gbm/TCGA-06-0188/gbm_4b_n4_epoch_82-dice_mean_75_38_task_46_fold_0_tta.json']
['/results/metrics/gbm/TCGA-02-0046/gbm_4b_n4_epoch_89-dice_mean_73_95_task_46_fold_2_tta.json']
['/results/metrics/gbm/TCGA-12-1094/gbm_4b_n4_epoch_89-dice_mean_73_95_task_46_fold_2_tta.json']
['/results/metrics/gbm/TCGA-76-6663/gbm_4b_n4_epoch_89-dice_mean_73_95_task_46_fold_2_tta.json']
['/results/metrics/gbm/TCGA-06-0182/gbm_4b_n4_epoch_89-dice_mean_73_95_task_46_fold_2_tta.json']
['/results/metrics/gbm/TCGA-06-0164/gbm_4b_n4_epoch_89-dice_mean_73_95_task_46_fold_2_tta.json']
['/results/metrics/gbm/TCGA-02-0011/gbm_4b_n4_epoch_82-dice_mean_75_38_task_46_fold_0_tta.json']
['/results/metrics/gbm/TCGA-19-2631/gbm_4b_n4_epoch_86-dice_mean_70_25_task_46_fold_1_tta.json']
['/results/metrics/g

In [631]:
np.round(diff['Diff_Dice_1'].mean(),3), np.round(diff['Diff_Dice_1'].std(),2)

(-0.023, 0.06)

## Difference between experiments with bgpd and bgpd TL

In [526]:
def make_diff(folder, dataset, fixed, moving, metrics, list_fold):
    list_fold_0=list_fold['fold_0']
    list_fold_1=list_fold['fold_1']
    list_fold_2=list_fold['fold_2']
    result = pd.DataFrame()
    df_1 = make_df_fold(folder,dataset, fixed, list_fold_0, list_fold_1, list_fold_2)
    df_2 = make_df_fold_300(folder,dataset, moving, list_fold_0, list_fold_1, list_fold_2)
    assert(len(df_1) == len(df_2))
#         for metric in [ 'Precision_1', 'Precision_2', 'Precision_3']:
#             print(np.round(df_2[metric].mean()))
#             print(np.round(df_2[metric].std()))
    result =  pd.concat([df_1, df_2], axis=1)
    for metric in metrics:
        result[f'Diff_{metric}']=(df_1[metric]-df_2[metric])
        
    return result

In [636]:
dataset = 'bgpd'
folder = '/results/metrics/bgpd'
fixed = 'bgpd_4b_n4'
seed = mpu.io.read(f'/home/polina/DeepLearningExamples/PyTorch/Segmentation/nnUNet/metrics/{dataset}_seed.json')
# moving = ['bgpd_4a_resamp_300', 'bgpd_4a_resamp', bgpd_3a_atlas', 'bgpd_5_ss_shared', 'bgpd_1_reg', 'bgpd_2a_interp', 'bgpd_4b_n4', 'bgpd_4d_susan']
moving = 'bgpd_4b_n4_from_gbm_last'
dataset = 'bgpd'

diff  = make_diff(folder, dataset, fixed, moving,
                ['Dice_1', 
                'Surface_dice_1', 
                'Hausdorff95_1', 
                'Sensitivity_1', 
               'Specificity_1',
#             ], seed)
               'Precision_1'], seed) 

bgpd_4b_n4
['/results/metrics/bgpd/1043_18_4/bgpd_4b_n4_best_epoch_95-dice_mean_69_24_task_54_fold_2_tta.json']
['/results/metrics/bgpd/1770_18_4/bgpd_4b_n4_best_epoch_95-dice_mean_69_24_task_54_fold_2_tta.json']
['/results/metrics/bgpd/Patient_2914/bgpd_4b_n4_best_epoch_92-dice_mean_72_48_task_54_fold_0_tta.json']
['/results/metrics/bgpd/Patient_92114/bgpd_4b_n4_best_epoch_92-dice_mean_72_48_task_54_fold_0_tta.json']
['/results/metrics/bgpd/349_18_4/bgpd_4b_n4_best_epoch_95-dice_mean_69_24_task_54_fold_2_tta.json']
['/results/metrics/bgpd/1170_18_4/bgpd_4b_n4_best_epoch_95-dice_mean_69_24_task_54_fold_2_tta.json']
['/results/metrics/bgpd/Patient_109017/bgpd_4b_n4_best_epoch_95-dice_mean_69_24_task_54_fold_2_tta.json']
['/results/metrics/bgpd/1795_18_/bgpd_4b_n4_best_epoch_92-dice_mean_72_48_task_54_fold_0_tta.json']
['/results/metrics/bgpd/Patient_62315/bgpd_4b_n4_best_epoch_92-dice_mean_72_48_task_54_fold_0_tta.json']
['/results/metrics/bgpd/Patient_140316/bgpd_4b_n4_best_epoch_95-di

In [637]:
np.round(diff['Diff_Dice_1'].mean(),3), np.round(diff['Diff_Dice_1'].std(),2)

(-0.021, 0.09)

In [619]:
!cd /results/metrics/bgpd/1254_18 && ls -l

total 588
-rw-r--r-- 1 root root 275 Sep  4 11:20 bgpd_1_reg_best_epoch_250-dice_mean_64_50_task_53_fold_1_tta.json
-rw-r--r-- 1 root root 274 Sep  4 11:54 bgpd_1_reg_best_epoch_282-dice_mean_71_47_task_53_fold_2_tta.json
-rw-r--r-- 1 root root 274 Sep  4 10:42 bgpd_1_reg_best_epoch_299-dice_mean_67_61_task_53_fold_0_tta.json
-rw-r--r-- 1 root root 301 Aug  8 13:04 bgpd_1_reg_best_epoch_95-dice_mean_63_35_task_53_fold_2_tta.json
-rw-r--r-- 1 root root 302 Aug 15 14:51 bgpd_1_reg_epoch_299-dice_mean_67_61_task_53_fold_0_tta.json
-rw-r--r-- 1 root root 303 Aug 10 12:39 bgpd_1_reg_epoch_81-dice_mean_64_83_task_53_fold_0_tta.json
-rw-r--r-- 1 root root 302 Aug 10 13:03 bgpd_1_reg_epoch_87-dice_mean_62_16_task_53_fold_1_tta.json
-rw-r--r-- 1 root root 301 Aug 11 19:08 bgpd_1_reg_epoch_95-dice_mean_63_35_task_53_fold_2_tta.json
-rw-r--r-- 1 root root 282 Sep 29 12:04 bgpd_1_reg_from_gbm_best_epoch_52-dice_mean_63_89_task_53_fold_0_tta.json
-rw-r--r-- 1 root root 282 Sep 29 17:44 bgpd_1_reg_f