In [1]:
import numpy as np
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd

data= np.load('sparsity.pickle', allow_pickle=True)

In [2]:
filenames = ["average_shift.pickle", "cohesive.pickle", "conformity.pickle", "proximity.pickle", "sparsity.pickle" ]

In [3]:
def statistical_test(filenames):
    for filename in filenames:
        print("Testing for :", filename.split(".")[0])
        data= np.load(filename, allow_pickle=True)
        Dice = np.array(data['Dice'])
        Certif = np.array(data['Certif'])
        Our_method = np.array(data['Our_method'])
        GE_NSGAII = np.array(data['GE_NSGAII'])
        f_statistic, p_value = f_oneway(Dice, Certif, Our_method, GE_NSGAII)

        print("F-Statistic:", f_statistic)
        print("P-Value:", p_value)
        
        print("---------Turkey pairwise hsd-----------------")
        
        tukey_results = pairwise_tukeyhsd(np.concatenate([Dice, Certif, Our_method, GE_NSGAII]),
                                  np.concatenate([['Dice'] * len(Dice), ['Certif'] * len(Certif), ['Our_method'] * len(Our_method), ['GE_NSGAII'] * len(GE_NSGAII)]), alpha=0.05)

        print(tukey_results)
        
        print("--"*30)
        
                
            

In [4]:
statistical_test(filenames)

Testing for : average_shift
F-Statistic: 6.392804415151182
P-Value: 0.0004782130105269991
---------Turkey pairwise hsd-----------------
    Multiple Comparison of Means - Tukey HSD, FWER=0.05    
  group1    group2   meandiff p-adj   lower   upper  reject
-----------------------------------------------------------
   Certif       Dice   0.0712 0.0115   0.012  0.1303   True
   Certif  GE_NSGAII  -0.0162 0.8912 -0.0753  0.0429  False
   Certif Our_method  -0.0103 0.9686 -0.0694  0.0488  False
     Dice  GE_NSGAII  -0.0874 0.0011 -0.1465 -0.0282   True
     Dice Our_method  -0.0815 0.0027 -0.1406 -0.0223   True
GE_NSGAII Our_method   0.0059 0.9938 -0.0532   0.065  False
-----------------------------------------------------------
------------------------------------------------------------
Testing for : cohesive
F-Statistic: 14.62099727055026
P-Value: 4.22806341503612e-08
---------Turkey pairwise hsd-----------------
          Multiple Comparison of Means - Tukey HSD, FWER=0.05           


In [5]:
robustness_dict={'dice_robustness': [1.0,
  1.0,
  0.985,
  1.0,
  0.9949999999999999,
  1.0,
  0.8,
  0.9,
  1.0,
  1.0,
  0.875,
  0.9125000000000001,
  1.0,
  0.805,
  1.0,
  0.8654411764705884,
  1.0,
  0.9765624999999999,
  0.96875,
  1.0,
  0.9,
  1.0,
  0.795,
  1.0,
  1.0,
  1.0,
  1.0,
  0.9980769230769231,
  1.0,
  0.9],
 'Our_method_robustness': [1.0,
  0.95,
  0.9049999999999999,
  0.89,
  0.5900000000000001,
  0.6859375,
  0.79,
  0.805,
  0.48906250000000007,
  1.0,
  0.8774999999999998,
  0.85625,
  0.925,
  1.0,
  0.81875,
  0.20992647058823527,
  1.0,
  0.30937499999999996,
  0.6749999999999999,
  0.6781250000000001,
  0.9428571428571428,
  1.0,
  0.9949999999999999,
  0.7607142857142858,
  1.0,
  1.0,
  0.825,
  0.8288461538461538,
  0.5125,
  0.796875],
 'certif_robustness': [0.7916666666666666,
  0.8305555555555555,
  0.9833333333333333,
  0.9055555555555556,
  0.9194444444444445,
  0.8333333333333334,
  0.7861111111111111,
  0.9472222222222223,
  1.0,
  0.9527777777777777,
  0.9888888888888889,
  0.9888888888888889,
  0.9555555555555555,
  0.8333333333333334,
  0.8638888888888889,
  0.8694444444444444,
  0.9749999999999999,
  0.8583333333333333,
  0.9666666666666666,
  0.8888888888888888,
  0.7,
  0.8388888888888889,
  0.9888888888888889,
  0.8944444444444445,
  0.7833333333333333,
  0.9222222222222223,
  0.7722222222222221,
  0.9666666666666666,
  0.8777777777777778,
  0.8833333333333333],
 'GE_NSGAII_robustness': [1.0,
  0.99375,
  0.79375,
  0.4865384615384615,
  0.9642857142857142,
  0.75,
  1.0,
  0.865625,
  0.5359375,
  0.975,
  0.8600000000000001,
  1.0,
  0.9833333333333334,
  0.525,
  1.0,
  0.3527777777777778,
  0.95,
  0.4428571428571429,
  0.6229166666666666,
  0.8916666666666666,
  0.865,
  1.0,
  1.0,
  0.71,
  1.0,
  1.0,
  0.9500000000000001,
  0.8416666666666667,
  0.1625,
  1.0]}
def statistical_test_robustness(data):
    
    Dice = np.array(data['dice_robustness'])
    Certif = np.array(data['certif_robustness'])
    Our_method = np.array(data['Our_method_robustness'])
    GE_NSGAII = np.array(data['GE_NSGAII_robustness'])
    f_statistic, p_value = f_oneway(Dice, Certif, Our_method, GE_NSGAII)

    print("F-Statistic:", f_statistic)
    print("P-Value:", p_value)
    
    print("---------Turkey pairwise hsd-----------------")
    
    tukey_results = pairwise_tukeyhsd(np.concatenate([Dice, Certif, Our_method, GE_NSGAII]),
                              np.concatenate([['Dice'] * len(Dice), ['Certif'] * len(Certif), ['Our_method'] * len(Our_method), ['GE_NSGAII'] * len(GE_NSGAII)]), alpha=0.05)

    print(tukey_results)
        
    print("--"*30)
    
statistical_test_robustness(robustness_dict)

F-Statistic: 5.524784120581755
P-Value: 0.0013968775366034589
---------Turkey pairwise hsd-----------------
    Multiple Comparison of Means - Tukey HSD, FWER=0.05    
  group1    group2   meandiff p-adj   lower   upper  reject
-----------------------------------------------------------
   Certif       Dice   0.0637 0.4424 -0.0472  0.1745  False
   Certif  GE_NSGAII  -0.0748 0.2981 -0.1856   0.036  False
   Certif Our_method  -0.0883 0.1665 -0.1991  0.0225  False
     Dice  GE_NSGAII  -0.1385 0.0079 -0.2493 -0.0276   True
     Dice Our_method   -0.152 0.0028 -0.2628 -0.0412   True
GE_NSGAII Our_method  -0.0135 0.9888 -0.1243  0.0973  False
-----------------------------------------------------------
------------------------------------------------------------
