In [1]:
import numpy as np
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd

data= np.load('sparsity.pickle', allow_pickle=True)

In [2]:
filenames = ["average_shift.pickle", "cohesive.pickle", "conformity.pickle", "proximity.pickle", "sparsity.pickle" ]

In [3]:
def statistical_test(filenames):
    for filename in filenames:
        print("Testing for :", filename.split(".")[0])
        data= np.load(filename, allow_pickle=True)
        Dice = np.array(data['Dice'])
        Certif = np.array(data['Certif'])
        Our_method = np.array(data['Our_method'])
        GE_NSGAII = np.array(data['GE_NSGAII'])
        f_statistic, p_value = f_oneway(Dice, Certif, Our_method, GE_NSGAII)

        print("F-Statistic:", f_statistic)
        print("P-Value:", p_value)
        
        print("---------Turkey pairwise hsd-----------------")
        
        tukey_results = pairwise_tukeyhsd(np.concatenate([Dice, Certif, Our_method, GE_NSGAII]),
                                  np.concatenate([['Dice'] * len(Dice), ['Certif'] * len(Certif), ['Our_method'] * len(Our_method), ['GE_NSGAII'] * len(GE_NSGAII)]), alpha=0.05)

        print(tukey_results)
        
        print("--"*30)
        
                
            

In [4]:
statistical_test(filenames)

Testing for : average_shift
F-Statistic: 58.22564836725332
P-Value: 4.8926800626302796e-23
---------Turkey pairwise hsd-----------------
    Multiple Comparison of Means - Tukey HSD, FWER=0.05    
  group1    group2   meandiff p-adj   lower   upper  reject
-----------------------------------------------------------
   Certif       Dice  -0.1129    0.0 -0.1409 -0.0848   True
   Certif  GE_NSGAII   0.0139 0.5682 -0.0141   0.042  False
   Certif Our_method  -0.0083 0.8688 -0.0363  0.0198  False
     Dice  GE_NSGAII   0.1268    0.0  0.0987  0.1549   True
     Dice Our_method   0.1046    0.0  0.0765  0.1327   True
GE_NSGAII Our_method  -0.0222 0.1717 -0.0503  0.0059  False
-----------------------------------------------------------
------------------------------------------------------------
Testing for : cohesive
F-Statistic: 10818.301891652169
P-Value: 8.523781877760859e-142
---------Turkey pairwise hsd-----------------
            Multiple Comparison of Means - Tukey HSD, FWER=0.05      

In [8]:
robustness_dict = {'dice_robustness': [0.5881637168141594,
  0.5181297709923666,
  0.5648437499999993,
  0.5739197530864195,
  0.5170903954802258,
  0.5902616279069769,
  0.5173507462686563,
  0.548399390243902,
  0.5366150442477876,
  0.5778431372549014,
  0.5845864661654138,
  0.5795348837209301,
  0.4977011494252876,
  0.11875,
  0.6116564417177912,
  0.5714962121212119,
  0.5365523465703971,
  0.5888528138528136,
  0.5562015503875973,
  0.5959239130434779,
  0.5726141078838175,
  0.5334801762114535,
  0.5343567251461991,
  0.5954285714285715,
  0.5332746478873239,
  0.5692961165048543,
  0.5624999999999998,
  0.5632275132275132,
  0.4706168831168834,
  0.6238636363636364],
 'Our_method_robustness': [0.7183628318584062,
  0.7609732824427476,
  0.8889648437500002,
  0.9058641975308642,
  0.9105932203389825,
  0.9845930232558139,
  0.6672574626865675,
  0.7653201219512196,
  0.6754424778761063,
  0.6818627450980392,
  0.9906015037593985,
  0.8105813953488371,
  0.9885057471264371,
  0.75625,
  0.9920245398773005,
  0.7868371212121211,
  0.743682310469314,
  0.9734848484848486,
  0.9515503875968999,
  0.9953804347826086,
  0.9993775933609959,
  0.6747797356828193,
  0.9739766081871344,
  0.7984285714285716,
  0.9253521126760565,
  0.7117718446601943,
  0.8231343283582073,
  0.6964285714285721,
  0.7909090909090903,
  0.9100649350649357],
 'certif_robustness': [0.5781887755102042,
  0.5815051020408161,
  0.5714285714285713,
  0.5533163265306125,
  0.5872448979591836,
  0.5542091836734695,
  0.6049744897959187,
  0.5963010204081632,
  0.5635204081632652,
  0.5710459183673467,
  0.5593112244897959,
  0.5720663265306127,
  0.5579081632653063,
  0.5575255102040819,
  0.5730867346938776,
  0.6121173469387755,
  0.5229591836734695,
  0.5785714285714288,
  0.5438775510204082,
  0.57984693877551,
  0.552295918367347,
  0.5474489795918366,
  0.5545918367346943,
  0.5483418367346937,
  0.5366071428571424,
  0.5499999999999998,
  0.5660714285714283,
  0.538520408163265,
  0.5684948979591835,
  0.5687500000000001],
 'GE_NSGAII_robustness': [0.8721638655462184,
  0.8382838283828381,
  0.8506963788300858,
  0.9231884057971015,
  0.9156976744186048,
  0.9950862068965517,
  0.8963793103448275,
  0.9516483516483516,
  0.7659817351598174,
  0.881818181818182,
  0.9828767123287672,
  0.8793147208121828,
  0.9617647058823502,
  0.83125,
  0.9946052631578947,
  0.8658181818181818,
  0.7521523178807945,
  0.8869565217391304,
  0.9593023255813954,
  0.9859903381642513,
  0.9958333333333333,
  0.7433615819209038,
  0.9785714285714286,
  0.8728860294117647,
  0.8933098591549293,
  0.8704773869346734,
  0.8172413793103442,
  0.8462025316455699,
  0.6632211538461538,
  0.9485119047619047]}


def statistical_test_robustness(data):
    
    Dice = np.array(data['dice_robustness'])
    Certif = np.array(data['certif_robustness'])
    Our_method = np.array(data['Our_method_robustness'])
    GE_NSGAII = np.array(data['GE_NSGAII_robustness'])
    f_statistic, p_value = f_oneway(Dice, Certif, Our_method, GE_NSGAII)

    print("F-Statistic:", f_statistic)
    print("P-Value:", p_value)
    
    print("---------Turkey pairwise hsd-----------------")
    
    tukey_results = pairwise_tukeyhsd(np.concatenate([Dice, Certif, Our_method, GE_NSGAII]),
                              np.concatenate([['Dice'] * len(Dice), ['Certif'] * len(Certif), ['Our_method'] * len(Our_method), ['GE_NSGAII'] * len(GE_NSGAII)]), alpha=0.05)

    print(tukey_results)
        
    print("--"*30)
    
statistical_test_robustness(robustness_dict)

F-Statistic: 133.88850984714998
P-Value: 1.6092915916908167e-37
---------Turkey pairwise hsd-----------------
   Multiple Comparison of Means - Tukey HSD, FWER=0.05    
  group1    group2   meandiff p-adj   lower  upper  reject
----------------------------------------------------------
   Certif       Dice  -0.0206 0.7858 -0.0779 0.0368  False
   Certif  GE_NSGAII   0.3223    0.0   0.265 0.3797   True
   Certif Our_method   0.2767    0.0  0.2194 0.3341   True
     Dice  GE_NSGAII   0.3429    0.0  0.2856 0.4003   True
     Dice Our_method   0.2973    0.0    0.24 0.3547   True
GE_NSGAII Our_method  -0.0456 0.1683  -0.103 0.0118  False
----------------------------------------------------------
------------------------------------------------------------
