In [None]:
import sys
from typing import List, Tuple
import pandas as pd
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)
import numpy as np
np.set_printoptions(threshold=sys.maxsize)
%matplotlib widget
import matplotlib.pyplot as plt
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)
import scipy.stats as stats

# Nalaganje podatkov

In [None]:
pso_df = pd.DataFrame(np.asarray([np.loadtxt('PSO/PSO_%d_10_v' % fnum) - fnum * 100 for fnum in range(1, 31)]).T)
mkev3_df = pd.DataFrame(np.asarray([np.loadtxt('MKEv3/MKEv3_%d_10_v' % fnum) - fnum * 100 for fnum in range(1, 31)]).T)
nmm_df = pd.DataFrame(np.asarray([np.loadtxt('NMM/NMM_%d_10_v' % fnum) - fnum * 100 for fnum in range(1, 31)]).T)
es11_df = pd.DataFrame(np.asarray([np.loadtxt('ES(1+1)/ES(1+1)_%d_10_v' % fnum) - fnum * 100 for fnum in range(1, 31)]).T)
de_df = pd.DataFrame(np.asarray([np.loadtxt('DE/DE_%d_10_v' % fnum) - fnum * 100 for fnum in range(1, 31)]).T)
jde_df = pd.DataFrame(np.asarray([np.loadtxt('jDE/jDE_%d_10_v' % fnum) - fnum * 100 for fnum in range(1, 31)]).T)
aco_df = pd.DataFrame(np.asarray([np.loadtxt('ACO/ACO_%d_10_v' % fnum) - fnum * 100 for fnum in range(1, 31)]).T)
cso_df = pd.DataFrame(np.asarray([np.loadtxt('CSO/CSO_%d_10_v' % fnum) - fnum * 100 for fnum in range(1, 31)]).T)

# Predalava podtkov

## Uporaba povprečja

In [None]:
de_data = de_df.describe().iloc[1, :].values.flatten()
jde_data = jde_df.describe().iloc[1, :].values.flatten()
pso_data = pso_df.describe().iloc[1, :].values.flatten()
mkev3_data = mkev3_df.describe().iloc[1, :].values.flatten()
nmm_data = nmm_df.describe().iloc[1, :].values.flatten()
es11_data = es11_df.describe().iloc[1, :].values.flatten()
aco_data = aco_df.describe().iloc[1, :].values.flatten()
cso_data = cso_df.describe().iloc[1, :].values.flatten()

## Uporaba mediane

In [None]:
de_data = de_df.describe().iloc[5, :].values.flatten()
jde_data = jde_df.describe().iloc[5, :].values.flatten()
pso_data = pso_df.describe().iloc[5, :].values.flatten()
mkev3_data = mkev3_df.describe().iloc[5, :].values.flatten()
nmm_data = nmm_df.describe().iloc[5, :].values.flatten()
es11_data = es11_df.describe().iloc[5, :].values.flatten()
aco_data = aco_df.describe().iloc[5, :].values.flatten()
cso_data = cso_df.describe().iloc[5, :].values.flatten()

## Uporaba vseh podatkov

In [None]:
de_data = de_df.values.T.flatten()
jde_data = jde_df.values.T.flatten()
pso_data = pso_df.values.T.flatten()
mkev3_data = mkev3_df.values.T.flatten()
nmm_data = nmm_df.values.T.flatten()
es11_data = es11_df.values.T.flatten()
aco_data = aco_df.values.T.flatten()
cso_data = cso_df.values.T.flatten()

## Uporaba mediane, povprečja, standardnega odklona, maksimalne in minimalne vrednosti

In [None]:
de_data = de_df.describe().iloc[[1, 2, 3, 5, 7], :].T.values.flatten()
jde_data = jde_df.describe().iloc[[1, 2, 3, 5, 7], :].T.values.flatten()
pso_data = pso_df.describe().iloc[[1, 2, 3, 5, 7], :].T.values.flatten()
mkev3_data = mkev3_df.describe().iloc[[1, 2, 3, 5, 7], :].T.values.flatten()
nmm_data = nmm_df.describe().iloc[[1, 2, 3, 5, 7], :].T.values.flatten()
es11_data = es11_df.describe().iloc[[1, 2, 3, 5, 7], :].T.values.flatten()
aco_data = aco_df.describe().iloc[[1, 2, 3, 5, 7], :].T.values.flatten()
cso_data = cso_df.describe().iloc[[1, 2, 3, 5, 7], :].T.values.flatten()

## Končan priprava podatkov

In [None]:
# algs_data = [de_data, jde_data, pso_data, mkev3_data, nmm_data, es11_data, aco_data, cso_data]
# algs_labels = ['DE', 'jDE', 'PSO', 'MKEv3', 'NMM', 'ES(1+1)', 'ACO', 'CSO']
algs_data = [pso_data, mkev3_data, nmm_data, es11_data, aco_data, cso_data]
algs_labels = ['PSO', 'MKEv3', 'NMM', 'ES(1+1)', 'ACO', 'CSO']

# Statistični testi

## Friedman test

### Kriticne razdalje za friedman test
$$
CD = q_{\alpha} \sqrt{\frac{k(k + 1)}{6N}}
$$
, kjer:
* $k \mapsto$ Stevilo algoritmov
* $N \mapsto$ Stevilo problemov
* $q_{\alpha} \mapsto$ Kriticna vrednost

In [None]:
def wilcoxonSignedRanks(a:np.ndarray, b:np.ndarray) -> Tuple[float, float, float]:
   r"""Get rank values from signed wilcoxon test.
   
   Args:
	  a: First data.
	  b: Second data.
	  
   Returns:
	  1. Positive ranks.
	  2. Negative ranks.
	  3. T value
   """
   y = a - b
   y_diff = y[y != 0]
   r = stats.rankdata(np.abs(y_diff))
   r_all = np.sum(r) / 2
   r_p, r_n = r_all + np.sum(r[np.where(y_diff > 0)]) , r_all + np.sum(r[np.where(y_diff < 0)])
   return r_p, r_n, np.min([r_p, r_n])

def friedmanRanks(*arrs:List[np.ndarray]) -> np.array:
   r = np.asarray([stats.rankdata([arrs[j][i] for j in range(len(arrs))]) for i in range(len(arrs[0]))])
   return np.asarray([np.sum(r[:, i]) / len(arrs[0]) for i in range(len(arrs))])

def cd(alpha:float, k:float, n:float) -> float:
   r"""Get critial distance for friedman test.
   
   Args:
      alpha: Fold value.
      k: Number of algorithms.
      n: Number of algorithm results.
   """
   nemenyi_df = pd.read_csv('nemenyi.csv')
   q_a = nemenyi_df['%.2f' % alpha][nemenyi_df['k'] == k].values
   return q_a[0] * np.sqrt((k * (k + 1)) / (6 * n))

In [None]:
cd_h = cd(0.01, len(algs_data), len(algs_data[0])) / 2.0
r = friedmanRanks(*algs_data)
f, a = plt.subplots(figsize=(6, 4))
a.boxplot([(e - cd_h, e, e + cd_h) for e in r], labels=algs_labels, widths=.15)
a.xaxis.set_minor_locator(AutoMinorLocator(7)); a.yaxis.set_minor_locator(AutoMinorLocator(7))
a.grid(which='both'); a.grid(which='minor', alpha=0.2, linestyle=':'); a.grid(which='major', alpha=0.5, linestyle='--')
a.set_ylabel('Average rank'); a.set_xlabel('Algorithm')
f.tight_layout()

In [None]:
pw_data = pd.DataFrame(np.asarray([[stats.wilcoxon(algs_data[j], algs_data[i])[1] if j != i else 1 for i in range(len(algs_data))] for j in range(len(algs_data))]), index=algs_labels, columns=algs_labels)
pw_data

In [None]:
pw_data.to_csv('res_data.csv')