In [24]:
import numpy as np
import pandas as pd
from scipy.stats import f
from itertools import combinations




def friedman_stat_to_f_value(n_ranks_arr, confidence_level, arr_order='cols'):
    arr_order_arwgs = ['cols','rows']
    assert arr_order in arr_order_arwgs, f'arr_order: invalid parameter, options: {arr_order_arwgs}'
    if arr_order == "cols":
        col_ordered_arr = n_ranks_arr
        chi_square = chi_square_f(col_ordered_arr)
    else:
        col_ordered_arr = np.transpose(n_ranks_arr)
        chi_square = chi_square_f(col_ordered_arr)
            
    k = col_ordered_arr.shape[0]
    N = col_ordered_arr.shape[1]

    # Iman-Davenport Statistic
    iman_davenport_stat = ((N-1)*chi_square)/(N*(k-1)-chi_square)

    # Degrees of freedom
    dfn = k-1
    dfd = (k-1)*(N-1)

    # Critical F-Value
    critical_f_value = f.ppf(confidence_level, dfn, dfd)

    # Significance 
    reject_null_hypo = iman_davenport_stat > critical_f_value

    return iman_davenport_stat, critical_f_value, reject_null_hypo


def generate_ranks(row):
    row_ranks = row[columns_for_rank].rank(method='max', ascending=False).astype(int)
    if np.all(row_ranks == row_ranks.iloc[0]) and row_ranks.iloc[0] == 3:
        row_ranks = 2 ## assign variable here for function to know how to handle this
    return row_ranks


def generate_rank_array_from_dataframe(df, cols_to_rank_lst):
    df_ranks = df.apply(generate_ranks, axis=1)
    rank_columns =[]
    for i,col in enumerate(cols_to_rank_lst):
        df[col + '_rank'] = df_ranks.apply(lambda x: x.iloc[i], axis=1)
        rank_columns.append(col + '_rank')
    rank_array = df[rank_columns].values.T

    return rank_array

def chi_square_f(n_ranks_arr):
    k = n_ranks_arr.shape[0]
    N = n_ranks_arr.shape[1]
    means = np.mean(n_ranks_arr,axis=1)
    sum_means_sqr = np.power(means,2).sum()
    chi = (12*N)/(k*(k+1))*(sum_means_sqr-((k*np.power(k+1,2)/4)))
    return chi

def nemenyi_test(n_ranks_arr,confidence_level, clf_names):
    confidence_arwgs = [0.90, 0.95]
    assert confidence_level in confidence_arwgs, f'arr_order: invalid parameter, options: {confidence_arwgs}'
    num_of_clf = n_ranks_arr.shape[0]
    alpha = str(np.round(1-confidence_level,2))

    demsar_dic = {2: {'0.05': 1.960, '0.10': 1.645},
                  3: {'0.05': 2.343, '0.10': 2.052},
                  4: {'0.05': 2.567, '0.10': 2.291},
                  5: {'0.05': 2.728, '0.10': 2.459},
                  6: {'0.05': 2.850, '0.10': 2.589},
                  7: {'0.05': 2.949, '0.10': 2.693},
                  8: {'0.05': 3.031, '0.10': 2.780},
                  9: {'0.05': 3.102, '0.10': 2.855},
                  10: {'0.05': 3.164, '0.10': 2.920}}
    
    N = n_ranks_arr.shape[1]
    k = demsar_dic[num_of_clf][alpha]
    
    crit_diff = k * np.power((num_of_clf * (num_of_clf +1))/(6*N),0.5)

    means = np.mean(n_ranks_arr,axis=1)

    means_dic = {clf: mean for clf,mean in zip(clf_names,means)}

    results = []
    for clf_pair in combinations(clf_names,2):
        mean_diff = np.abs(means_dic[clf_pair[0]] - means_dic[clf_pair[1]])
        reject_null_hypo = mean_diff > crit_diff
        results.append((clf_pair,mean_diff,crit_diff,reject_null_hypo))
    
    print(results)
    return
    

score_array = [[96.43, 99.44, 83.63],
               [73.42, 81.34, 46.46],
               [72.30, 91.51, 72.31],
               [71.70, 66.16, 70.28],
               [71.67, 71.67, 71.67],
               [74.36, 77.08, 74.35],
               [70.63, 62.21, 44.91],
               [83.22, 80.63, 82.51],
               [98.22, 93.18, 93.21],
               [69.92, 99.90, 72.54]]


score_array_columns = ['NB', 'SVM', 'AB']
score_array_dataset_name = ['A','B','C','D','E','F','G','H','I','J']
df = pd.DataFrame(score_array, columns=score_array_columns)
df['dataset'] = score_array_dataset_name
df = df[['dataset','NB','SVM','AB']]

columns_for_rank = ['NB','SVM','AB']

test = generate_rank_array_from_dataframe(df,columns_for_rank)
iman_davenport_stat, critical_f_value, reject_null_hypo = friedman_stat_to_f_value(test,0.95)

print(f'f-score: {iman_davenport_stat:.3f}\nf-critical: {critical_f_value:.3f}\nSignificant: {reject_null_hypo}')

nemenyi_test(test,0.95,columns_for_rank)


f-score: 1.227
f-critical: 3.555
Significant: False
[(('NB', 'SVM'), 0.0, 1.0478214542564015, False), (('NB', 'AB'), 0.5999999999999999, 1.0478214542564015, False), (('SVM', 'AB'), 0.5999999999999999, 1.0478214542564015, False)]
