## Wilcoxon Signed-Rank Test 

In [2]:
import pandas as pd
import numpy as np
from scipy.stats import wilcoxon

### TODO: Print the result for each instance and then a summary at the end of each benchmark

In [3]:
benchmarks = ['mval', 'lpr', 'hefei', 'beijing', 'kw']
#benchmarks = ['mval', 'lpr', 'kw']
significance = 0.001
n_runs = 10

for b in benchmarks:
    uhgs_df = pd.read_csv(f"{b}_uhgs.csv", index_col='Instance')    
    madcom_df = pd.read_csv(f"{b}_madcom.csv", index_col='Instance')
    
    wins = 0
    draws = 0
    losses = 0
    
    for i in madcom_df.index.values:
        # Get run values, last 10 rows are the runs
        uhgs_runs = uhgs_df.loc[i].iloc[-n_runs:]
        madcom_runs = madcom_df.loc[i].iloc[-n_runs:]
        
        # Check that both algorithms have the same number of runs
        if uhgs_runs.isnull().any() or madcom_runs.isnull().any():
            print(f"Missing runs for instance {i}")
            continue
        
        # If all runs have the same value then it is a draw
        if (uhgs_runs == madcom_runs).all():
            draws += 1
            continue
            
        # Wilcoxon test for MADCoM being worse than UHGS (difference is larger than zero)
        res_worse = wilcoxon(madcom_runs.values, uhgs_runs.values, alternative='greater')
        if res_worse.pvalue < significance:
            losses += 1
        else:
            # Wilcoxon test for MADCoM being better than UHGS (difference is smaller than zero)
            res_better = wilcoxon(madcom_runs.values, uhgs_runs.values, alternative='less')
            if res_better.pvalue < significance:
                wins += 1
            # Otherwise it is a draw
            else:
                print(i)
                draws += 1
    # Print result
    print(f"{b}: {wins}-{draws}-{losses}")

mval1C
mval4D
mval5D
mval6C
mval9D
mval10D
mval: 0-34-0
Lpr-a-03
Lpr-a-04
Lpr-b-03
Lpr-b-04
Lpr-c-03
lpr: 4-11-0
Hefei-1
Hefei-2
Hefei-3
Hefei-4
Hefei-5
Hefei-6
Hefei-8
hefei: 3-7-0
Beijing-1
Beijing-2
Beijing-3
Beijing-4
Beijing-5
beijing: 5-5-0
kw: 12-0-0




In [3]:
b = 'kw'
uhgs_df = pd.read_csv(f"{b}_uhgs.csv", index_col='Instance')    
madcom_df = pd.read_csv(f"{b}_madcom.csv", index_col='Instance')
i = 'O1_p-2'
uhgs_runs = uhgs_df.loc[i].iloc[-n_runs:]
madcom_runs = madcom_df.loc[i].iloc[-n_runs:]
print(uhgs_runs, madcom_runs)

1     2519512.0
2     2554498.0
3     2536440.0
4     2513761.0
5     2535944.0
6     2542494.0
7     2505481.0
8     2551062.0
9     2551498.0
10    2530079.0
Name: O1_p-2, dtype: float64 1     2470306.0
2     2457985.0
3     2447884.0
4     2466404.0
5     2464065.0
6     2470278.0
7     2463144.0
8     2456482.0
9     2467717.0
10    2449952.0
Name: O1_p-2, dtype: float64


In [4]:
uhgs_df.loc[i].iloc[-n_runs:].isnull().any()

False

In [5]:
(uhgs_runs == madcom_runs).all() 

False