In [1]:
import os
import numpy as np
import scipy

In [2]:
folders = os.listdir('.')

In [3]:
def read_data(file):
    with open(file) as f:
        lines = f.readlines()
        #print(lines)
        lines = [float(l.split(": ")[-1][:-1]) for l in lines[0:9]]
    return np.array(lines)

In [4]:
def get_model_results(loss_name, datafile):
    selected_data_files = [f"{f}/{datafile}.txt" for f in folders if loss_name in f]
    arrays = [read_data(f) for f in selected_data_files]
    arrays = np.array(arrays)
    return arrays

In [5]:
def load_all_data_files(loss_name):
    data = []
    for dfname in ["data", "new_bras", "new_hd95"]:
        try:
            data.append(get_model_results(loss_name, dfname))
        except:
            print(f"file {dfname}.txt could not be loaded for: {loss_name}")
    return np.concatenate(data, axis=1)

In [6]:
loss_names = ["brier_power2_", "spherical_alpha2_", "xent_e_", "brier_topk10_", 
              "spherical_topk1_", "xent_topk10_", "dice_standard_", "dice_temp_scaled_", "dice_plusplus_gamma3_"]

In [7]:
results = {}
for ln in loss_names:
    results[ln] = load_all_data_files(ln)

file new_hd95.txt could not be loaded for: dice_temp_scaled_


In [8]:
# key order is:
key_id = {key:idx for (idx, key) in enumerate(["old_bras", "ece", "dice", "f1", "avd", "old_hd95", "recall", "max_ueo", "prop_missed", "uus", "new_hd95"])}

In [9]:
results["dice_plusplus_gamma3_"]

array([[ 0.88536098,  0.08335576,  0.74000001,  0.66000003, 26.55999947,
         4.34000015,  0.69999999,  0.42293313,  0.14064537,  0.68460005,
         6.61999989],
       [ 0.86001586,  0.08635876,  0.74000001,  0.66000003, 29.42000008,
         4.36000013,  0.69      ,  0.42855981,  0.14716427,  0.6732083 ,
         6.5       ],
       [ 0.89745781,  0.06153423,  0.74000001,  0.68000001, 26.12999916,
         4.32999992,  0.70999998,  0.42170531,  0.14292699,  0.6937374 ,
         6.67000008]])

In [10]:
model1 = loss_names[1]
model2 = loss_names[-1]
key = "uus"
print(model1, model2, key)
inp1 = results[model1][:,key_id[key]] + np.random.randn(3) * 0.000001
inp2 = results[model2][:,key_id[key]] + np.random.randn(3) * 0.000001
print(inp1)
print(inp2)
scipy.stats.ttest_ind(inp1, inp2)

spherical_alpha2_ dice_plusplus_gamma3_ uus
[0.68996248 0.68677709 0.68801974]
[0.68459982 0.6732083  0.69373687]


Ttest_indResult(statistic=0.7329206942546006, pvalue=0.504242864410653)

In [23]:
0.2/0.72

0.2777777777777778

In [24]:
10/45

0.2222222222222222

In [41]:
np.std(results["dice_standard_"][:,4], ddof=1)

10.370289769257985

In [25]:
results["dice_standard_"][:,3]

array([0.63999999, 0.61000001, 0.63      ])

In [44]:
for model1id in range(9):
    model1 = loss_names[model1id]
    model2 = loss_names[-3]
    key = "avd"
    print(model1, model2, key)
    try:
        inp1 = results[model1][:,key_id[key]] + np.random.randn(3) * 0.00000001
        inp2 = [25, 25, 25]# results[model2][:,key_id[key]] + np.random.randn(3) * 0.00000001
        print(inp1)
        print(inp2)
        print(scipy.stats.ttest_ind(inp1, inp2, alternative='less'))
        # print(scipy.stats.ttest_ind_from_stats(np.mean(inp1), np.std(inp1, ddof=1), len(inp1), np.mean(inp2), np.std(inp2, ddof=1), len(inp2)))
    except:
        print(f"failed for {model1}")
    print("----------------------------------")

brier_power2_ dice_standard_ avd
[20.45000075 21.20999908 20.47999955]
[25, 25, 25]
Ttest_indResult(statistic=-17.251284306252792, pvalue=3.312600305092265e-05)
----------------------------------
spherical_alpha2_ dice_standard_ avd
[19.46999931 21.63999941 19.77000045]
[25, 25, 25]
Ttest_indResult(statistic=-6.932991312161481, pvalue=0.0011362711492554174)
----------------------------------
xent_e_ dice_standard_ avd
[19.60000039 20.65999985 21.44000053]
[25, 25, 25]
Ttest_indResult(statistic=-8.314448077824796, pvalue=0.0005715111682802779)
----------------------------------
brier_topk10_ dice_standard_ avd
[21.63999939 20.34000014 20.84000016]
[25, 25, 25]
Ttest_indResult(statistic=-10.723898894919463, pvalue=0.00021426174934607834)
----------------------------------
spherical_topk1_ dice_standard_ avd
[18.40999984 19.85000039 20.63999939]
[25, 25, 25]
Ttest_indResult(statistic=-8.22103075625029, pvalue=0.0005966874048850194)
----------------------------------
xent_topk10_ dice_stan

  print(scipy.stats.ttest_ind(inp1, inp2, alternative='less'))
