In [10]:
import numpy as np
import pandas as pd
from scipy import stats
import scikit_posthocs as sp

## Friedman Test

In [11]:
data = pd.read_csv('data.csv')
data = data[data['MeasureType'] == 'AUC']
# drop all rows that have the string '_Aug' in the 'FilteringAlgorithm' column
data = data[~data['FilteringAlgorithm'].str.contains('_Aug')]
data = data.sort_values(by=['FilteringAlgorithm'])
data = data.groupby('FilteringAlgorithm')

# get the 'MeasureVal' column for each group as a list
AUCs = data['MeasureVal'].apply(list)
stat, p = stats.friedmanchisquare(*AUCs)
print(f'stat={stat}, p={p}')

stat=264.9189527298734, p=2.6501689327869014e-54


We can see that the p-value for the friedman test is significantly smaller than 0.05 which means we can reject the null hypothesis that the AUC metric scores for the different filtering methods are the same.

## Post-Hoc Test

In [12]:
# get the names of the groups
names = data.groups.keys()
# turn data into a numpy array
data = np.array([x for x in AUCs])
posthoc = sp.posthoc_nemenyi_friedman(data.T)
posthoc.columns = names
posthoc.index = names
posthoc

Unnamed: 0,FDR,MFMW,MFMW_New,MRMR,RFE_SVM,ReliefF,TRI_STAGE
FDR,1.0,0.64661,0.9,0.001,0.001,0.003272,0.001
MFMW,0.64661,1.0,0.9,0.001,0.001,0.331541,0.001
MFMW_New,0.9,0.9,1.0,0.001,0.001,0.052839,0.001
MRMR,0.001,0.001,0.001,1.0,0.001,0.021676,0.9
RFE_SVM,0.001,0.001,0.001,0.001,1.0,0.001,0.001
ReliefF,0.003272,0.331541,0.052839,0.021676,0.001,1.0,0.340055
TRI_STAGE,0.001,0.001,0.001,0.9,0.001,0.340055,1.0


The Nemenyi post-hoc test returns the p-values for each pairwise comparison of means. So, for alpha = 0.05 any pair that has a p-value smaller than alpha is statistically significantly different, and the rest are not.