# Friedman Test
by: *Babak Zolghadr-Asli*

In [1]:
from scipy.stats import friedmanchisquare
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
from pathlib import Path
import seaborn as sns
import os

%matplotlib inline

<div class="alert alert-danger">
    <b>SYNTAX GUIDE</b>
</div>

In order for this script/notebook to work, you need to store the files with a specified name-syntax;

* **Step I.** First all the files in specified folders, where the folder name is as the same as the algorithm name.
* **Step II.** If need be, pass the current direcoty that holds the files to `root_directory` as a `str` object; Note that if you are using a windows base machine you have to use `\` to seprate files location in your directory adress.  
* **Step III.** All results must be stored in a csv file, with the name `'a_fb_result.csv` where `a` is a placeholder for the **algorithm name**, and `b` is the number associated for the function. 



* Note that all these files should have a columns `best_OF`.

In [2]:
root_directory = 'C:/Users/bz267/Optimization results/Eliminate vs Preserve'

alg_list_temp = os.listdir(root_directory)
alg_list = [i for i in alg_list_temp if not i.startswith(tuple(str(i) for i in range(10)))]
print('=='*30)
print('The follwing algorithms are to be evaluted in this sessin:')
print('=='*30)
for i, alg_name in enumerate(alg_list):
    print('[%d] %s'%((i+1), alg_name))
print('--'*25)

The follwing algorithms are to be evaluted in this sessin:
[1] Eliminate_take_1
[2] Eliminate_take_2
[3] Eliminate_take_3
[4] Preserve_take_1
[5] Preserve_take_2
[6] Preserve_take_3
[7] Preserve_take_4
[8] Pure_Random
--------------------------------------------------


##### NC

In [3]:
func_num = np.arange(1, 31)

In [4]:
# RAEDING AND CREATING FILES WITH OFS OF ALL FUNCTIONS

for f in func_num:
    globals()['overall_db_f%d'%f] = pd.DataFrame(columns=alg_list)
    for a in alg_list:
        file_name = 'f%d.csv'%(f)
        folder_name = '/%s/NC/'%a
        db = pd.read_csv(root_directory+folder_name+file_name)
        exec("overall_db_f%d[a] = db['best_OF']"%f)
    exec("overall_db_f%d.dropna(inplace=True)"%f)

##### C

In [3]:
func_num = np.array([1, 2, 4, 6, 7])

In [4]:
# RAEDING AND CREATING FILES WITH OFS OF ALL FUNCTIONS

for f in func_num:
    globals()['overall_db_f%d'%f] = pd.DataFrame(columns=alg_list)
    for a in alg_list:
        file_name = 'f%d.csv'%(f)
        folder_name = '/%s/C/'%a
        db = pd.read_csv(root_directory+folder_name+file_name)
        exec("overall_db_f%d[a] = db['best_OF']"%f)
    exec("overall_db_f%d.dropna(inplace=True)"%f)

<div class="alert alert-success">
    <b>GENERAL PERFORMANCE ANALYSIS</b>
</div>

##### NC

In [5]:
std_result = pd.DataFrame(columns=alg_list)
mean_result = pd.DataFrame(columns=alg_list)
index = pd.Index(['f%d'%i for i in func_num])
for f in func_num:
    db = eval('overall_db_f%d'%f)
    std_temp = pd.DataFrame(db.std()).transpose()
    mean_temp = pd.DataFrame(db.mean()).transpose()
    std_result = pd.concat((std_result, std_temp), axis=0, ignore_index=True)
    mean_result = pd.concat((mean_result, mean_temp), axis=0, ignore_index=True)
std_result = std_result.set_index(index)
mean_result = mean_result.set_index(index)

In [6]:
mean_result

Unnamed: 0,Eliminate_take_1,Eliminate_take_2,Eliminate_take_3,Preserve_take_1,Preserve_take_2,Preserve_take_3,Preserve_take_4,Pure_Random
f1,23760160000.0,21654130000.0,4246591000.0,8727043.0,3176315.0,2284090.0,1876327.0,12002500000.0
f2,153385100.0,100971800.0,3634528.0,242.2154,200.166,200.0977,200.0697,17693760.0
f3,6885.043,6081.575,4516.183,302.7141,301.0539,300.7836,300.6297,4188.812
f4,551.9487,540.4589,443.4104,405.6287,400.2599,400.1124,400.09,476.587
f5,551.9209,550.3917,545.8414,530.9136,513.0664,512.4832,513.0151,546.6358
f6,633.6438,634.2016,633.3178,618.0817,605.9961,607.1183,609.19,627.3093
f7,855.6183,855.4673,819.8325,773.4693,735.425,728.8599,727.631,817.6143
f8,853.2077,853.772,848.5129,830.2443,814.8912,813.8282,815.4727,845.126
f9,1534.244,1494.162,1508.546,1190.787,920.2526,928.4902,997.2759,1257.294
f10,2049.621,2045.111,2036.567,1681.499,1260.702,1197.427,1261.692,2022.136


In [7]:
std_result

Unnamed: 0,Eliminate_take_1,Eliminate_take_2,Eliminate_take_3,Preserve_take_1,Preserve_take_2,Preserve_take_3,Preserve_take_4,Pure_Random
f1,7299762000.0,6514939000.0,1265700000.0,2443458.0,813476.409444,590512.971553,403798.247979,3073249000.0
f2,179504200.0,107364800.0,2991772.0,44.48559,0.059804,0.034891,0.030228,16912720.0
f3,1548.201,1697.734,1067.443,0.988203,0.271448,0.175311,0.164813,893.7459
f4,52.36341,36.22082,11.89953,7.008803,0.203419,0.034512,0.023462,16.64565
f5,5.800792,6.748393,5.865468,9.648674,3.241052,2.981739,3.114159,6.422486
f6,4.580036,4.740935,4.133482,9.287344,3.351178,3.605536,4.901379,3.499479
f7,24.25786,25.05187,14.88383,20.54474,6.703638,4.901175,4.739555,14.35328
f8,6.713762,5.864182,6.313653,9.575893,3.362648,2.709396,3.719483,5.060584
f9,132.3174,129.5836,127.5584,195.4127,40.094198,50.269975,134.455228,94.72836
f10,129.2606,123.0722,107.666,222.1909,106.994221,88.10738,84.759475,106.0505


##### C

In [5]:
std_result = pd.DataFrame(columns=alg_list)
mean_result = pd.DataFrame(columns=alg_list)
index = pd.Index(['f%d'%i for i in func_num])
for f in func_num:
    db = eval('overall_db_f%d'%f)
    std_temp = pd.DataFrame(db.std()).transpose()
    mean_temp = pd.DataFrame(db.mean()).transpose()
    std_result = pd.concat((std_result, std_temp), axis=0, ignore_index=True)
    mean_result = pd.concat((mean_result, mean_temp), axis=0, ignore_index=True)
std_result = std_result.set_index(index)
mean_result = mean_result.set_index(index)

In [6]:
mean_result

Unnamed: 0,Eliminate_take_1,Eliminate_take_2,Eliminate_take_3,Preserve_take_1,Preserve_take_2,Preserve_take_3,Preserve_take_4,Pure_Random
f1,24079880000.0,21953790000.0,4082831000.0,0.838749,0.357816,0.24969,0.226368,1376.861
f2,150310500.0,127300800.0,4058043.0,18631.898489,36.793505,18.359509,37.322816,79527.18
f4,558.6154,539.5901,444.1488,0.53968,0.346354,0.287105,0.268004,20.15853
f6,633.7337,634.2776,632.4429,0.78,0.0,0.0,0.0,1393.59
f7,858.7418,857.0657,814.3021,0.887207,0.136305,0.07448,0.05549,2010474.0


In [7]:
std_result

Unnamed: 0,Eliminate_take_1,Eliminate_take_2,Eliminate_take_3,Preserve_take_1,Preserve_take_2,Preserve_take_3,Preserve_take_4,Pure_Random
f1,6341530000.0,6060699000.0,1202257000.0,0.232749,0.093043,0.060627,0.054522,367.276637
f2,135676400.0,174415500.0,3483117.0,73392.633247,65.612052,46.490316,63.120897,91443.066784
f4,43.77454,39.57885,10.87069,0.080079,0.039548,0.032675,0.035081,2.472497
f6,4.942257,4.432675,4.020732,0.578748,0.0,0.0,0.0,339.802827
f7,24.39099,25.04061,16.13383,0.472659,0.060213,0.032976,0.023359,826648.210363


<div class="alert alert-warning">
    <b>Analysis</b>
</div>

##### NC

In [16]:
database = mean_result
temp_storage = list()
mean_analysis = pd.DataFrame(index=std_result.index)
for i in range(len(database)):
    np.argmin(database.iloc[i])
    min_val = np.min(database.iloc[i])
    index = database.iloc[i]==min_val
    temp_storage.append(list(index[index].index))
mean_analysis['Best Algorithms'] = temp_storage
mean_analysis['Best result'] = database.min(axis=1)
mean_analysis['Mean of algs'] = database.mean(axis=1)
mean_analysis['std. of algs'] = database.std(axis=1)
folder_name = '/0Summary result/Stat tests/NC/Mean & std comparision/'
output_dir = Path(root_directory+folder_name)
output_dir.mkdir(parents=True, exist_ok=True)
file_name = 'mean_analysis.csv'
mean_analysis.to_csv(output_dir / file_name)

In [17]:
database = std_result
temp_storage = list()
std_analysis = pd.DataFrame(index=std_result.index)
for i in range(len(database)):
    np.argmin(database.iloc[i])
    min_val = np.min(database.iloc[i])
    index = database.iloc[i]==min_val
    temp_storage.append(list(index[index].index))
std_analysis['Best Algorithms'] = temp_storage
std_analysis['Best result'] = database.min(axis=1)
std_analysis['Mean of algs'] = database.mean(axis=1)
std_analysis['std. of algs'] = database.std(axis=1)
folder_name = '/0Summary result/Stat tests/NC/Mean & std comparision/'
output_dir = Path(root_directory+folder_name)
output_dir.mkdir(parents=True, exist_ok=True)
file_name = 'std_analysis.csv'
std_analysis.to_csv(output_dir / file_name)

In [18]:
mean_analysis

Unnamed: 0,Best Algorithms,Best result,Mean of algs,std. of algs
f1,[Preserve_take_4],1876327.0,7709930000.0,10141820000.0
f2,[Preserve_take_4],200.0697,34460760.0,59219000.0
f3,[Preserve_take_4],300.6297,2859.599,2860.471
f4,[Preserve_take_4],400.09,452.312,64.03853
f5,[Preserve_take_3],512.4832,533.0335,17.8619
f6,[Preserve_take_2],605.9961,621.1073,12.4866
f7,[Preserve_take_4],727.631,789.2397,54.92765
f8,[Preserve_take_3],813.8282,834.3819,17.81864
f9,[Preserve_take_2],920.2526,1228.881,262.7379
f10,[Preserve_take_3],1197.427,1694.344,395.7176


In [19]:
std_analysis

Unnamed: 0,Best Algorithms,Best result,Mean of algs,std. of algs
f1,[Preserve_take_4],403798.247979,2269738000.0,3060442000.0
f2,[Preserve_take_4],0.030228,38346700.0,67907720.0
f3,[Preserve_take_4],0.164813,651.0904,739.3137
f4,[Preserve_take_4],0.023462,15.54995,19.22539
f5,[Preserve_take_3],2.981739,5.477845,2.298378
f6,[Preserve_take_2],3.351178,4.762421,1.921627
f7,[Preserve_take_4],4.739555,14.42949,8.378652
f8,[Preserve_take_3],2.709396,5.41495,2.221389
f9,[Preserve_take_2],40.094198,113.0525,50.27985
f10,[Preserve_take_4],84.759475,121.0127,43.59697


##### C

In [8]:
database = mean_result
temp_storage = list()
mean_analysis = pd.DataFrame(index=std_result.index)
for i in range(len(database)):
    np.argmin(database.iloc[i])
    min_val = np.min(database.iloc[i])
    index = database.iloc[i]==min_val
    temp_storage.append(list(index[index].index))
mean_analysis['Best Algorithms'] = temp_storage
mean_analysis['Best result'] = database.min(axis=1)
mean_analysis['Mean of algs'] = database.mean(axis=1)
mean_analysis['std. of algs'] = database.std(axis=1)
folder_name = '/0Summary result/Stat tests/C/Mean & std comparision/'
output_dir = Path(root_directory+folder_name)
output_dir.mkdir(parents=True, exist_ok=True)
file_name = 'mean_analysis.csv'
mean_analysis.to_csv(output_dir / file_name)

In [9]:
database = std_result
temp_storage = list()
std_analysis = pd.DataFrame(index=std_result.index)
for i in range(len(database)):
    np.argmin(database.iloc[i])
    min_val = np.min(database.iloc[i])
    index = database.iloc[i]==min_val
    temp_storage.append(list(index[index].index))
std_analysis['Best Algorithms'] = temp_storage
std_analysis['Best result'] = database.min(axis=1)
std_analysis['Mean of algs'] = database.mean(axis=1)
std_analysis['std. of algs'] = database.std(axis=1)
folder_name = '/0Summary result/Stat tests/C/Mean & std comparision/'
output_dir = Path(root_directory+folder_name)
output_dir.mkdir(parents=True, exist_ok=True)
file_name = 'std_analysis.csv'
std_analysis.to_csv(output_dir / file_name)

In [10]:
mean_analysis

Unnamed: 0,Best Algorithms,Best result,Mean of algs,std. of algs
f1,[Preserve_take_4],0.226368,6264564000.0,10450710000.0
f2,[Preserve_take_3],18.359509,35220950.0,64244060.0
f4,[Preserve_take_4],0.268004,195.4942,265.9591
f6,"[Preserve_take_2, Preserve_take_3, Preserve_ta...",0.0,411.853,505.5437
f7,[Preserve_take_4],0.05549,251625.7,710682.2


In [11]:
std_analysis

Unnamed: 0,Best Algorithms,Best result,Mean of algs,std. of algs
f1,[Preserve_take_4],0.054522,1700561000.0,2809607000.0
f2,[Preserve_take_3],46.490316,39217500.0,72246490.0
f4,[Preserve_take_3],0.032675,12.11049,18.64587
f6,"[Preserve_take_2, Preserve_take_3, Preserve_ta...",0.0,44.22215,119.4522
f7,[Preserve_take_4],0.023359,103339.3,292260.9


<div class="alert alert-success">
<b>FRIEDMAN RANKING TEST</b>
</div>

<div class="alert alert-warning">
    <b>Part A: <i> Avr. Ranking </i> </b> 
</div>


##### NC

In [20]:
rank_db_raw = list()
for i in range(len(mean_result)):
    rankings = mean_result.iloc[i,:].rank(method='min')
    rank_db_raw.append(rankings)
mean_rank_db = pd.DataFrame(rank_db_raw, columns=mean_result.columns, index = mean_result.index).transpose()
mean_rank_db['Avr_rank'] = mean_rank_db.mean(axis=1)
mean_rank_db['Overall_rank'] = mean_rank_db['Avr_rank'].rank(method='min').astype(int)
folder_name = '/0Summary result/Stat tests/NC/Friedman test/'
output_dir = Path(root_directory+folder_name)
output_dir.mkdir(parents=True, exist_ok=True)
file_name = 'mean_Friedmanrank_result.csv'
mean_rank_db.to_csv(output_dir / file_name)

In [21]:
mean_rank_db

Unnamed: 0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,...,f23,f24,f25,f26,f27,f28,f29,f30,Avr_rank,Overall_rank
Eliminate_take_1,8.0,8.0,8.0,8.0,8.0,7.0,8.0,7.0,8.0,8.0,...,8.0,8.0,8.0,7.0,8.0,7.0,8.0,8.0,7.8,8
Eliminate_take_2,7.0,7.0,7.0,7.0,7.0,8.0,7.0,8.0,6.0,7.0,...,7.0,7.0,7.0,8.0,6.0,8.0,7.0,6.0,7.0,7
Eliminate_take_3,5.0,5.0,6.0,5.0,5.0,6.0,6.0,6.0,7.0,6.0,...,6.0,6.0,5.0,6.0,5.0,5.0,5.0,5.0,5.266667,5
Preserve_take_1,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,...,5.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.2,4
Preserve_take_2,3.0,3.0,3.0,3.0,3.0,1.0,3.0,2.0,1.0,2.0,...,4.0,1.0,3.0,3.0,3.0,3.0,3.0,3.0,2.733333,3
Preserve_take_3,2.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,1.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.766667,2
Preserve_take_4,1.0,1.0,1.0,1.0,2.0,3.0,1.0,3.0,3.0,3.0,...,1.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.533333,1
Pure_Random,6.0,6.0,5.0,6.0,6.0,5.0,5.0,5.0,5.0,5.0,...,3.0,5.0,6.0,5.0,7.0,6.0,6.0,7.0,5.7,6


##### C

In [12]:
rank_db_raw = list()
for i in range(len(mean_result)):
    rankings = mean_result.iloc[i,:].rank(method='min')
    rank_db_raw.append(rankings)
mean_rank_db = pd.DataFrame(rank_db_raw, columns=mean_result.columns, index = mean_result.index).transpose()
mean_rank_db['Avr_rank'] = mean_rank_db.mean(axis=1)
mean_rank_db['Overall_rank'] = mean_rank_db['Avr_rank'].rank(method='min').astype(int)
folder_name = '/0Summary result/Stat tests/C/Friedman test/'
output_dir = Path(root_directory+folder_name)
output_dir.mkdir(parents=True, exist_ok=True)
file_name = 'mean_Friedmanrank_result.csv'
mean_rank_db.to_csv(output_dir / file_name)

In [13]:
mean_rank_db

Unnamed: 0,f1,f2,f4,f6,f7,Avr_rank,Overall_rank
Eliminate_take_1,8.0,8.0,8.0,6.0,7.0,7.4,8
Eliminate_take_2,7.0,7.0,7.0,7.0,6.0,6.8,7
Eliminate_take_3,6.0,6.0,6.0,5.0,5.0,5.6,5
Preserve_take_1,4.0,4.0,4.0,4.0,4.0,4.0,4
Preserve_take_2,3.0,2.0,3.0,1.0,3.0,2.4,3
Preserve_take_3,2.0,1.0,2.0,1.0,2.0,1.6,2
Preserve_take_4,1.0,3.0,1.0,1.0,1.0,1.4,1
Pure_Random,5.0,5.0,5.0,8.0,8.0,6.2,6


<div class="alert alert-warning">
    <b>Part B: <i> Std. Ranking </i> </b> 
</div>


##### NC

In [22]:
rank_db_raw = list()
for i in range(len(std_result)):
    rankings = std_result.iloc[i,:].rank(method='min')
    rank_db_raw.append(rankings)
std_rank_db = pd.DataFrame(rank_db_raw, columns=std_result.columns, index = std_result.index).transpose()
std_rank_db['std_rank'] = std_rank_db.mean(axis=1)
std_rank_db['Overall_rank'] = std_rank_db['std_rank'].rank(method='min').astype(int)
folder_name = '/0Summary result/Stat tests/NC/Friedman test/'
output_dir = Path(root_directory+folder_name)
output_dir.mkdir(parents=True, exist_ok=True)
file_name = 'std_Friedmanrank_result.csv'
std_rank_db.to_csv(output_dir / file_name)

In [23]:
std_rank_db

Unnamed: 0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,...,f23,f24,f25,f26,f27,f28,f29,f30,std_rank,Overall_rank
Eliminate_take_1,8.0,8.0,7.0,8.0,4.0,5.0,7.0,7.0,6.0,7.0,...,1.0,8.0,6.0,6.0,6.0,3.0,8.0,7.0,6.533333,8
Eliminate_take_2,7.0,7.0,8.0,7.0,7.0,6.0,8.0,5.0,5.0,6.0,...,2.0,7.0,4.0,5.0,5.0,4.0,6.0,8.0,6.166667,7
Eliminate_take_3,5.0,5.0,6.0,5.0,5.0,4.0,5.0,6.0,4.0,5.0,...,3.0,4.0,2.0,3.0,7.0,1.0,2.0,4.0,4.3,4
Preserve_take_1,4.0,4.0,4.0,4.0,8.0,8.0,6.0,8.0,8.0,8.0,...,5.0,3.0,7.0,7.0,4.0,6.0,7.0,5.0,5.766667,6
Preserve_take_2,3.0,3.0,3.0,3.0,3.0,1.0,3.0,2.0,1.0,4.0,...,6.0,2.0,8.0,8.0,2.0,8.0,1.0,3.0,3.433333,3
Preserve_take_3,2.0,2.0,2.0,2.0,1.0,3.0,2.0,1.0,2.0,2.0,...,8.0,5.0,3.0,2.0,1.0,7.0,5.0,2.0,2.7,2
Preserve_take_4,1.0,1.0,1.0,1.0,2.0,7.0,1.0,3.0,7.0,1.0,...,7.0,6.0,5.0,1.0,8.0,5.0,3.0,1.0,2.666667,1
Pure_Random,6.0,6.0,5.0,6.0,6.0,2.0,4.0,4.0,3.0,3.0,...,4.0,1.0,1.0,4.0,3.0,2.0,4.0,6.0,4.433333,5


##### C

In [14]:
rank_db_raw = list()
for i in range(len(std_result)):
    rankings = std_result.iloc[i,:].rank(method='min')
    rank_db_raw.append(rankings)
std_rank_db = pd.DataFrame(rank_db_raw, columns=std_result.columns, index = std_result.index).transpose()
std_rank_db['std_rank'] = std_rank_db.mean(axis=1)
std_rank_db['Overall_rank'] = std_rank_db['std_rank'].rank(method='min').astype(int)
folder_name = '/0Summary result/Stat tests/C/Friedman test/'
output_dir = Path(root_directory+folder_name)
output_dir.mkdir(parents=True, exist_ok=True)
file_name = 'std_Friedmanrank_result.csv'
std_rank_db.to_csv(output_dir / file_name)

In [15]:
std_rank_db

Unnamed: 0,f1,f2,f4,f6,f7,std_rank,Overall_rank
Eliminate_take_1,8.0,7.0,8.0,7.0,6.0,7.2,8
Eliminate_take_2,7.0,8.0,7.0,6.0,7.0,7.0,7
Eliminate_take_3,6.0,6.0,6.0,5.0,5.0,5.6,5
Preserve_take_1,4.0,4.0,4.0,4.0,4.0,4.0,4
Preserve_take_2,3.0,3.0,3.0,1.0,3.0,2.6,3
Preserve_take_3,2.0,1.0,1.0,1.0,2.0,1.4,1
Preserve_take_4,1.0,2.0,2.0,1.0,1.0,1.4,1
Pure_Random,5.0,5.0,5.0,8.0,8.0,6.2,6
