In [None]:
import sys
import time
import numpy as np
import matplotlib.pyplot as plt

# Test the type of distribution of data for any distribution
from scipy.stats import kstest
# Test if data is distributed in normal way
from scipy.stats import shapiro
# Test with parametric t-test
from scipy.stats import ttest_ind, ttest_rel
# Test with nonparametric test for comparing of two
from scipy.stats import wilcoxon
# Test with nonparametric test for comparing of two or more
from scipy.stats import kruskal
# Test with nonparametric test for comparing of three or more
from scipy.stats import friedmanchisquare
# probability plot
from scipy.stats import probplot
# data preparation
from sklearn.preprocessing import normalize

np.set_printoptions(linewidth=100000000, formatter={'all': lambda x: str(x)})
%matplotlib inline

In [None]:
dims, algos = [10, 30, 50], ['dynNpMsjDE', 'DE', 'jDE', 'BBFWA', 'SCA', 'ES(1+1)', 'dynFWAG', 'ASO', 'BA', 'MTS']

# CEC 2017 algorithm speed test
Example of time execution speed of algorithm

In [None]:
sys.path.append('cec2017')
from cec2017 import run_fun
from run_cec import MinMB
from NiaPy import Runner
from NiaPy.util import Task

def testOne(x=0.55):
   for i in range(10 ** 6): 
      x = x + x
      x = x / 2
      x = x * x
      x = np.sqrt(x)
      x = np.log(x)
      x = np.exp(x)
      x = x / (x + 2)

def testTwo(d):
   for i in range(2 * 10 ** 5): 
      x = np.random.uniform(-100, 100, d)
      run_fun(x, 18)

def runTree(a, d):
   task = Task(D=d, nFES=2 * 10 ** 5, benchmark=MinMB(run_fun, fnum=18))
   algo = Runner.getAlgorithm(a)(task=task)
   start_time = time.time()
   algo.run()
   return time.time() - start_time

In [None]:
start_time = time.time()
testOne()
t0 = (time.time() - start_time)

print ('t0: ', t0)

t1 = []
for d in dims:
   start_time = time.time()
   testTwo(d)
   t1.append(time.time() - start_time)
   
print ('t1: ', t1)

t2 = []
for d in dims:
   tmp = []
   for a in algos:   
      r = np.full(5, 0.0)
      for i in range(len(r)): r[i] = runTree(a, d)
      tmp.append(np.mean(r))
   t2.append(tmp)

for i, d in enumerate(dims):
   for j, a in enumerate(algos):
      print ('%10s %d -- %.4E' % (a, d, (t2[i][j] - t1[i]) / t0))

# CEC 2017 statistic
For getting the data need to perform statistic test we used: `for i in {1..30}; do python run_cec.py -c 17 -o T -rn 50 -a DE -D 10 -f $i -seed {1000..1050}; done`.
The example shows how to run DE algorithm on all benchmark functions that have problem dimensionality set to 10.
DE algorithm runs 50 times on each benchmark functions with seed in range from 1000 to 1050.
Every algorithm run has it's own seed.
We used `-o T` for generating the output.

## Example of multiple runs on one problem
* dim $\in \{10, 30, 50\}$
* fnum $\in \{1, \cdots , 30\}$
* algos $\in$ `{dynNpMsjDE, BBFWA, DE, jDE, SCA, ES(1+1), ES(m+1), ASO, BA, dynFWAG}`

In [None]:
# Loda data
dim, data = dims[2], []
data = np.asanyarray([[np.loadtxt('data/%s_%d_%d_v' % (a, fnum, dim)) for fnum in range(1, 31)] for a in algos])
# Get basic statistics
vals = []
for fnum in range(30):
   tmp = []
   print ('\nfun_num: %d' % (fnum + 1))
   for i, a in enumerate(algos):
      d = data[i, fnum] - (fnum + 1) * 100
      print ('%10s:\tmin: %.3E \tmean: %.3E\tstd: %.3E' % (a, np.min(d), np.mean(d), np.std(d)))
      tmp.append((np.min(d), np.mean(d), np.std(d)))
   vals.append(tmp)
vals = np.asanyarray(vals)
# Get best values for basic statistics
imin, imean, istd = [], [], []
for fnum in range(30):
   imin.append(np.argmin([vals[fnum, i, 0] for i in range(len(algos))]))
   imean.append(np.argmin([vals[fnum, i, 1] for i in range(len(algos))]))
   istd.append(np.argmin([vals[fnum, i, 2] for i in range(len(algos))]))
# Generate table entrys for latex
out = ''
for i in range(len(algos)):
   for fnum in range(30): out += ('%.3E' if i != imin[fnum] else '\\textbf{%.3E}') % vals[fnum, i, 0] + ' & ' + ('%.3E' if i != imean[fnum] else '\\textbf{%.3E}') % vals[fnum, i, 1] + ' & ' + ('%.3E' if i != istd[fnum] else '\\textbf{%.3E}') % vals[fnum, i, 2] + ' \\\\ \n'
   out += '\n'
print ('\n', out)

## Normalize the data

In [None]:
ndata = np.asanyarray([normalize([data[j][i] for j in range(len(algos))]) for i in range(30)])

## Test for normal distribution
[Kolmogorov–Smirnov](https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test) test: Tests whether a sample is drawn from a given distribution, or whether two samples are drawn from the same distribution.

If $\text{p-value} > \alpha$ then values belong to selected distribution.

In [None]:
for i, a in enumerate(algos):
   for j in range(30):
      print(a, ' on ', j + 1, ' ', kstest(ndata[j, i], 'norm'))

### Plot

In [None]:
probplot(ndata[i][0], plot=plt)

[Shapiro–Wilk test](https://en.wikipedia.org/wiki/Shapiro%E2%80%93Wilk_test) is a test of normality in frequentist statistics.

In [None]:
for i, a in enumerate(algos):
    print(a, ' ', shapiro(ndata[i]))

## Run t-test related

In [None]:
for i in range(1, len(ndata)):
    print ('%s vs. %s:  %s' % (algos[0], algos[i], ttest_rel(ndata[0], ndata[i])))

## Run Wilcoxon test
[Wilcoxon signed-rank](https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test) test: tests whether matched pair samples are drawn from populations with different mean ranks

In [None]:
for i in range(1, len(ndata)):
    print ('%s vs. %s:  %s' % (algos[0], algos[i], wilcoxon(ndata[0], ndata[i])))

## Run Kruskal test
[Kruskal–Wallis one-way analysis of variance by ranks](https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance): tests whether > 2 independent samples are drawn from the same distribution

In [None]:
for i in range(1, len(ndata)):
    print ('%s vs. %s:  %s' % (algos[0], algos[i], kruskal(ndata[0], ndata[i])))

## Run Friedman test
[Friedman two-way analysis of variance by ranks](https://en.wikipedia.org/wiki/Friedman_test): tests whether k treatments in randomized block designs have identical effects

In [None]:
print (algos, friedmanchisquare(*ndata))