In [1]:
import numpy as np
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd

data= np.load('sparsity.pickle', allow_pickle=True)

In [2]:
filenames = ["average_shift.pickle", "cohesive.pickle", "conformity.pickle", "proximity.pickle", "sparsity.pickle" ]

In [3]:
def statistical_test(filenames):
    for filename in filenames:
        print("Testing for :", filename.split(".")[0])
        data= np.load(filename, allow_pickle=True)
        Dice = np.array(data['Dice'])
        Certif = np.array(data['Certif'])
        Our_method = np.array(data['Our_method'])
        GE_NSGAII = np.array(data['GE_NSGAII'])
        f_statistic, p_value = f_oneway(Dice, Certif, Our_method, GE_NSGAII)

        print("F-Statistic:", f_statistic)
        print("P-Value:", p_value)
        
        print("---------Turkey pairwise hsd-----------------")
        
        tukey_results = pairwise_tukeyhsd(np.concatenate([Dice, Certif, Our_method, GE_NSGAII]),
                                  np.concatenate([['Dice'] * len(Dice), ['Certif'] * len(Certif), ['Our_method'] * len(Our_method), ['GE_NSGAII'] * len(GE_NSGAII)]), alpha=0.05)

        print(tukey_results)
        
        print("--"*30)
        
                
            

In [4]:
statistical_test(filenames)

Testing for : average_shift
F-Statistic: 24.472592294207626
P-Value: 7.4884484492533e-11
---------Turkey pairwise hsd-----------------
    Multiple Comparison of Means - Tukey HSD, FWER=0.05    
  group1    group2   meandiff p-adj   lower   upper  reject
-----------------------------------------------------------
   Certif       Dice  -0.0763 0.0001 -0.1202 -0.0323   True
   Certif  GE_NSGAII  -0.1185    0.0 -0.1625 -0.0746   True
   Certif Our_method  -0.1282    0.0 -0.1722 -0.0843   True
     Dice  GE_NSGAII  -0.0423 0.0637 -0.0862  0.0017  False
     Dice Our_method   -0.052  0.014 -0.0959  -0.008   True
GE_NSGAII Our_method  -0.0097 0.9375 -0.0536  0.0343  False
-----------------------------------------------------------
------------------------------------------------------------
Testing for : cohesive
F-Statistic: 19.417473945567348
P-Value: 3.3511979063771435e-09
---------Turkey pairwise hsd-----------------
        Multiple Comparison of Means - Tukey HSD, FWER=0.05        
  g

In [5]:
robustness_dict={'dice_robustness': [0.5247272727272754,
  0.5408482142857133,
  0.546746031746033,
  0.6258561643835626,
  0.524921630094047,
  0.5251082251082273,
  0.5250000000000034,
  0.5403669724770632,
  0.6020661157024801,
  0.636419753086421,
  0.525000000000002,
  0.5595394736842111,
  0.5280434782608686,
  0.732974137931035,
  0.655,
  0.6196942446043173,
  0.5248655913978507,
  0.6922645739910323],
 'Our_method_robustness': [0.5258181818181846,
  0.5256696428571419,
  0.5304761904761939,
  0.5773972602739741,
  0.5250000000000031,
  0.5280303030303051,
  0.5259445843828751,
  0.5256880733944945,
  0.5802685950413228,
  0.618930041152262,
  0.5261853448275883,
  0.5564967105263168,
  0.524999999999999,
  0.6545258620689658,
  0.6129999999999999,
  0.5914568345323722,
  0.525940860215055,
  0.6403587443946174],
 'certif_robustness': [0.5270000000000011,
  0.5500000000000007,
  0.5636250000000008,
  0.6850000000000004,
  0.527750000000001,
  0.5253750000000016,
  0.529000000000001,
  0.5443750000000003,
  0.6317500000000005,
  0.6843750000000004,
  0.5272500000000012,
  0.5948750000000009,
  0.5250000000000017,
  0.7135000000000005,
  0.6811250000000001,
  0.6806250000000005,
  0.5250000000000017,
  0.7106250000000002],
 'GE_NSGAII_robustness': [0.5267210144927564,
  0.5252906976744178,
  0.534558823529412,
  0.5633333333333334,
  0.528508771929824,
  0.5324728260869578,
  0.5297468354430386,
  0.525,
  0.6403846153846154,
  0.5960526315789473,
  0.5285714285714286,
  0.5502777777777771,
  0.527325581395348,
  0.6730232558139536,
  0.625,
  0.5882812499999998,
  0.5336914062500024,
  0.6310185185185176]}

def statistical_test_robustness(data):
    
    Dice = np.array(data['dice_robustness'])
    Certif = np.array(data['certif_robustness'])
    Our_method = np.array(data['Our_method_robustness'])
    GE_NSGAII = np.array(data['GE_NSGAII_robustness'])
    f_statistic, p_value = f_oneway(Dice, Certif, Our_method, GE_NSGAII)

    print("F-Statistic:", f_statistic)
    print("P-Value:", p_value)
    
    print("---------Turkey pairwise hsd-----------------")
    
    tukey_results = pairwise_tukeyhsd(np.concatenate([Dice, Certif, Our_method, GE_NSGAII]),
                              np.concatenate([['Dice'] * len(Dice), ['Certif'] * len(Certif), ['Our_method'] * len(Our_method), ['GE_NSGAII'] * len(GE_NSGAII)]), alpha=0.05)

    print(tukey_results)
        
    print("--"*30)
    
statistical_test_robustness(robustness_dict)

F-Statistic: 1.267391655204623
P-Value: 0.29254059285333456
---------Turkey pairwise hsd-----------------
   Multiple Comparison of Means - Tukey HSD, FWER=0.05    
  group1    group2   meandiff p-adj   lower  upper  reject
----------------------------------------------------------
   Certif       Dice  -0.0165 0.8445 -0.0694 0.0364  False
   Certif  GE_NSGAII  -0.0315 0.4037 -0.0844 0.0214  False
   Certif Our_method   -0.035 0.3103 -0.0879 0.0179  False
     Dice  GE_NSGAII   -0.015 0.8776 -0.0679 0.0379  False
     Dice Our_method  -0.0185 0.7935 -0.0714 0.0344  False
GE_NSGAII Our_method  -0.0035 0.9981 -0.0564 0.0494  False
----------------------------------------------------------
------------------------------------------------------------
