In [1]:
import pandas as pd
from scipy.stats import friedmanchisquare
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Result Datasets

In [2]:
texas_gap_data = pd.read_excel('texas_gap_percent.xlsx')
texas_hv_data = pd.read_excel('texas_hypervolume.xlsx')
texas_ud_data = pd.read_excel('texas_uniform_distribution.xlsx')
texas_rni_data = pd.read_excel('texas_ratio_non_dominated.xlsx')
texas_ct_data = pd.read_excel('texas_computation_time.xlsx')
texas_eval_data = pd.read_excel('texas_unique_evaluations.xlsx')

# GAP (%)

In [3]:
data = [
    texas_gap_data['nsga2_nbcpm'], 
    texas_gap_data['nsga2_lfpc'],
    texas_gap_data['nsga3_nbcpm'],
    texas_gap_data['nsga3_lfpc'],
    texas_gap_data['pos_nbcpm'],
    texas_gap_data['pos_lfpc'],]

# Perform the Friedman test
statistic, p_value = friedmanchisquare(*data)

print(f"Friedman Test Statistic: {statistic}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a statistically significant difference among the algorithms.")
else:
    print("No statistically significant difference found.")

Friedman Test Statistic: 51.44103489771353
p-value: 7.023889297517898e-10
There is a statistically significant difference among the algorithms.


In [4]:
tgd = texas_gap_data.melt(var_name='algorithm', value_name='score') # Reshaping data for Tukey's HSD

texas_gap_data_tukey = pairwise_tukeyhsd(endog=tgd['score'], groups=tgd['algorithm'], alpha=0.05)
print(texas_gap_data_tukey)

      Multiple Comparison of Means - Tukey HSD, FWER=0.05      
   group1      group2   meandiff p-adj   lower    upper  reject
---------------------------------------------------------------
 nsga2_lfpc nsga2_nbcpm   2.7748 0.9462  -6.0674  11.617  False
 nsga2_lfpc  nsga3_lfpc   5.3304 0.5135  -3.5118 14.1726  False
 nsga2_lfpc nsga3_nbcpm   8.0558 0.0972  -0.7864  16.898  False
 nsga2_lfpc    pos_lfpc  -0.8514 0.9998  -9.6936  7.9908  False
 nsga2_lfpc   pos_nbcpm   4.1472 0.7593   -4.695 12.9894  False
nsga2_nbcpm  nsga3_lfpc   2.5556 0.9619  -6.2866 11.3978  False
nsga2_nbcpm nsga3_nbcpm    5.281 0.5241  -3.5612 14.1232  False
nsga2_nbcpm    pos_lfpc  -3.6262 0.8478 -12.4684   5.216  False
nsga2_nbcpm   pos_nbcpm   1.3724 0.9978  -7.4698 10.2146  False
 nsga3_lfpc nsga3_nbcpm   2.7254 0.9501  -6.1168 11.5676  False
 nsga3_lfpc    pos_lfpc  -6.1818 0.3416  -15.024  2.6604  False
 nsga3_lfpc   pos_nbcpm  -1.1832 0.9989 -10.0254   7.659  False
nsga3_nbcpm    pos_lfpc  -8.9072 0.0472 

# HyperVolume

In [5]:
data = [
    texas_hv_data['nsga2_nbcpm'], 
    texas_hv_data['nsga2_lfpc'],
    texas_hv_data['nsga3_nbcpm'],
    texas_hv_data['nsga3_lfpc'],
    texas_hv_data['pos_nbcpm'],
    texas_hv_data['pos_lfpc'],]

# Perform the Friedman test
statistic, p_value = friedmanchisquare(*data)

print(f"Friedman Test Statistic: {statistic}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a statistically significant difference among the algorithms.")
else:
    print("No statistically significant difference found.")

Friedman Test Statistic: 141.85142857142864
p-value: 7.228947544564415e-29
There is a statistically significant difference among the algorithms.


In [6]:
thvd = texas_hv_data.melt(var_name='algorithm', value_name='score') # Reshaping data for Tukey's HSD

texas_hv_data_tukey = pairwise_tukeyhsd(endog=thvd['score'], groups=thvd['algorithm'], alpha=0.05)
print(texas_hv_data_tukey)

     Multiple Comparison of Means - Tukey HSD, FWER=0.05      
   group1      group2   meandiff p-adj   lower   upper  reject
--------------------------------------------------------------
 nsga2_lfpc nsga2_nbcpm    -0.06 0.1054 -0.1268  0.0067  False
 nsga2_lfpc  nsga3_lfpc    -0.19    0.0 -0.2568 -0.1233   True
 nsga2_lfpc nsga3_nbcpm  -0.2133    0.0   -0.28 -0.1465   True
 nsga2_lfpc    pos_lfpc   0.0458 0.3631  -0.021  0.1126  False
 nsga2_lfpc   pos_nbcpm  -0.0757  0.016 -0.1424 -0.0089   True
nsga2_nbcpm  nsga3_lfpc    -0.13    0.0 -0.1968 -0.0633   True
nsga2_nbcpm nsga3_nbcpm  -0.1532    0.0   -0.22 -0.0865   True
nsga2_nbcpm    pos_lfpc   0.1058 0.0001  0.0391  0.1726   True
nsga2_nbcpm   pos_nbcpm  -0.0156 0.9849 -0.0824  0.0511  False
 nsga3_lfpc nsga3_nbcpm  -0.0232 0.9182   -0.09  0.0435  False
 nsga3_lfpc    pos_lfpc   0.2358    0.0  0.1691  0.3026   True
 nsga3_lfpc   pos_nbcpm   0.1144    0.0  0.0476  0.1811   True
nsga3_nbcpm    pos_lfpc   0.2591    0.0  0.1923  0.3258

# Uniform Distribution

In [7]:
data = [
    texas_ud_data['nsga2_nbcpm'], 
    texas_ud_data['nsga2_lfpc'],
    texas_ud_data['nsga3_nbcpm'],
    texas_ud_data['nsga3_lfpc'],
    texas_ud_data['pos_nbcpm'],
    texas_ud_data['pos_lfpc'],]

# Perform the Friedman test
statistic, p_value = friedmanchisquare(*data)

print(f"Friedman Test Statistic: {statistic}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a statistically significant difference among the algorithms.")
else:
    print("No statistically significant difference found.")

Friedman Test Statistic: 41.702857142857056
p-value: 6.764867485444019e-08
There is a statistically significant difference among the algorithms.


In [8]:
tudd = texas_ud_data.melt(var_name='algorithm', value_name='score') # Reshaping data for Tukey's HSD

texas_ud_data_tukey = pairwise_tukeyhsd(endog=tudd['score'], groups=tudd['algorithm'], alpha=0.05)
print(texas_ud_data_tukey)

     Multiple Comparison of Means - Tukey HSD, FWER=0.05      
   group1      group2   meandiff p-adj   lower   upper  reject
--------------------------------------------------------------
 nsga2_lfpc nsga2_nbcpm  -0.0249 0.2422 -0.0575  0.0076  False
 nsga2_lfpc  nsga3_lfpc   0.0262 0.1947 -0.0064  0.0587  False
 nsga2_lfpc nsga3_nbcpm     -0.0    1.0 -0.0326  0.0325  False
 nsga2_lfpc    pos_lfpc  -0.0016    1.0 -0.0342  0.0309  False
 nsga2_lfpc   pos_nbcpm  -0.0161 0.7165 -0.0486  0.0165  False
nsga2_nbcpm  nsga3_lfpc   0.0511 0.0001  0.0186  0.0836   True
nsga2_nbcpm nsga3_nbcpm   0.0249 0.2438 -0.0077  0.0574  False
nsga2_nbcpm    pos_lfpc   0.0233 0.3146 -0.0092  0.0559  False
nsga2_nbcpm   pos_nbcpm   0.0088 0.9708 -0.0237  0.0414  False
 nsga3_lfpc nsga3_nbcpm  -0.0262 0.1933 -0.0588  0.0063  False
 nsga3_lfpc    pos_lfpc  -0.0278  0.143 -0.0603  0.0048  False
 nsga3_lfpc   pos_nbcpm  -0.0423 0.0032 -0.0748 -0.0097   True
nsga3_nbcpm    pos_lfpc  -0.0016    1.0 -0.0341   0.031

# RNI

In [9]:
data = [
    texas_rni_data['nsga2_nbcpm'], 
    texas_rni_data['nsga2_lfpc'],
    texas_rni_data['nsga3_nbcpm'],
    texas_rni_data['nsga3_lfpc'],
    texas_rni_data['pos_nbcpm'],
    texas_rni_data['pos_lfpc'],]

# Perform the Friedman test
statistic, p_value = friedmanchisquare(*data)

print(f"Friedman Test Statistic: {statistic}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a statistically significant difference among the algorithms.")
else:
    print("No statistically significant difference found.")

Friedman Test Statistic: 143.5927442949094
p-value: 3.081702810604489e-29
There is a statistically significant difference among the algorithms.


In [10]:
trnid = texas_rni_data.melt(var_name='algorithm', value_name='score') # Reshaping data for Tukey's HSD

texas_rni_data_tukey = pairwise_tukeyhsd(endog=trnid['score'], groups=trnid['algorithm'], alpha=0.05)
print(texas_rni_data_tukey)

     Multiple Comparison of Means - Tukey HSD, FWER=0.05      
   group1      group2   meandiff p-adj   lower   upper  reject
--------------------------------------------------------------
 nsga2_lfpc nsga2_nbcpm   0.0131 0.9935 -0.0543  0.0806  False
 nsga2_lfpc  nsga3_lfpc  -0.2054    0.0 -0.2729  -0.138   True
 nsga2_lfpc nsga3_nbcpm  -0.2186    0.0  -0.286 -0.1511   True
 nsga2_lfpc    pos_lfpc   0.0111  0.997 -0.0563  0.0785  False
 nsga2_lfpc   pos_nbcpm  -0.0166  0.981 -0.0841  0.0508  False
nsga2_nbcpm  nsga3_lfpc  -0.2186    0.0  -0.286 -0.1511   True
nsga2_nbcpm nsga3_nbcpm  -0.2317    0.0 -0.2991 -0.1643   True
nsga2_nbcpm    pos_lfpc   -0.002    1.0 -0.0695  0.0654  False
nsga2_nbcpm   pos_nbcpm  -0.0298 0.8031 -0.0972  0.0377  False
 nsga3_lfpc nsga3_nbcpm  -0.0131 0.9935 -0.0806  0.0543  False
 nsga3_lfpc    pos_lfpc   0.2165    0.0  0.1491   0.284   True
 nsga3_lfpc   pos_nbcpm   0.1888    0.0  0.1214  0.2562   True
nsga3_nbcpm    pos_lfpc   0.2297    0.0  0.1622  0.2971

# Computation Time

In [11]:
data = [
    texas_ct_data['nsga2_nbcpm'], 
    texas_ct_data['nsga2_lfpc'],
    texas_ct_data['nsga3_nbcpm'],
    texas_ct_data['nsga3_lfpc'],
    texas_ct_data['pos_nbcpm'],
    texas_ct_data['pos_lfpc'],]

# Perform the Friedman test
statistic, p_value = friedmanchisquare(*data)

print(f"Friedman Test Statistic: {statistic}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a statistically significant difference among the algorithms.")
else:
    print("No statistically significant difference found.")

Friedman Test Statistic: 211.53142857142848
p-value: 9.672966184192463e-44
There is a statistically significant difference among the algorithms.


In [12]:
tctd = texas_ct_data.melt(var_name='algorithm', value_name='score') # Reshaping data for Tukey's HSD

texas_ct_data_tukey = pairwise_tukeyhsd(endog=tctd['score'], groups=tctd['algorithm'], alpha=0.05)
print(texas_ct_data_tukey)

      Multiple Comparison of Means - Tukey HSD, FWER=0.05       
   group1      group2   meandiff p-adj   lower    upper   reject
----------------------------------------------------------------
 nsga2_lfpc nsga2_nbcpm -22.1062    0.0 -24.8908 -19.3216   True
 nsga2_lfpc  nsga3_lfpc   0.8526 0.9515   -1.932   3.6372  False
 nsga2_lfpc nsga3_nbcpm -21.7344    0.0  -24.519 -18.9498   True
 nsga2_lfpc    pos_lfpc   1.7154 0.4887  -1.0692      4.5  False
 nsga2_lfpc   pos_nbcpm -22.5812    0.0 -25.3658 -19.7966   True
nsga2_nbcpm  nsga3_lfpc  22.9588    0.0  20.1742  25.7434   True
nsga2_nbcpm nsga3_nbcpm   0.3718 0.9989  -2.4128   3.1564  False
nsga2_nbcpm    pos_lfpc  23.8216    0.0   21.037  26.6062   True
nsga2_nbcpm   pos_nbcpm   -0.475 0.9965  -3.2596   2.3096  False
 nsga3_lfpc nsga3_nbcpm  -22.587    0.0 -25.3716 -19.8024   True
 nsga3_lfpc    pos_lfpc   0.8628  0.949  -1.9218   3.6474  False
 nsga3_lfpc   pos_nbcpm -23.4338    0.0 -26.2184 -20.6492   True
nsga3_nbcpm    pos_lfpc  

# Percentage of Unique Solutions (%)

In [13]:
data = [
    texas_eval_data['nsga2_nbcpm'], 
    texas_eval_data['nsga2_lfpc'],
    texas_eval_data['nsga3_nbcpm'],
    texas_eval_data['nsga3_lfpc'],
    texas_eval_data['pos_nbcpm'],
    texas_eval_data['pos_lfpc'],]

# Perform the Friedman test
statistic, p_value = friedmanchisquare(*data)

print(f"Friedman Test Statistic: {statistic}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a statistically significant difference among the algorithms.")
else:
    print("No statistically significant difference found.")

Friedman Test Statistic: 236.8914285714286
p-value: 3.562959797958596e-49
There is a statistically significant difference among the algorithms.


In [14]:
tevald = texas_eval_data.melt(var_name='algorithm', value_name='score') # Reshaping data for Tukey's HSD

texas_eval_data_tukey = pairwise_tukeyhsd(endog=tevald['score'], groups=tevald['algorithm'], alpha=0.05)
print(texas_eval_data_tukey)

      Multiple Comparison of Means - Tukey HSD, FWER=0.05       
   group1      group2   meandiff p-adj   lower    upper   reject
----------------------------------------------------------------
 nsga2_lfpc nsga2_nbcpm -24.4112    0.0 -28.1874  -20.635   True
 nsga2_lfpc  nsga3_lfpc   4.4732   0.01    0.697   8.2494   True
 nsga2_lfpc nsga3_nbcpm -15.1438    0.0   -18.92 -11.3676   True
 nsga2_lfpc    pos_lfpc   1.0266 0.9708  -2.7496   4.8028  False
 nsga2_lfpc   pos_nbcpm -23.7358    0.0  -27.512 -19.9596   True
nsga2_nbcpm  nsga3_lfpc  28.8844    0.0  25.1082  32.6606   True
nsga2_nbcpm nsga3_nbcpm   9.2674    0.0   5.4912  13.0436   True
nsga2_nbcpm    pos_lfpc  25.4378    0.0  21.6616   29.214   True
nsga2_nbcpm   pos_nbcpm   0.6754 0.9956  -3.1008   4.4516  False
 nsga3_lfpc nsga3_nbcpm  -19.617    0.0 -23.3932 -15.8408   True
 nsga3_lfpc    pos_lfpc  -3.4466 0.0961  -7.2228   0.3296  False
 nsga3_lfpc   pos_nbcpm  -28.209    0.0 -31.9852 -24.4328   True
nsga3_nbcpm    pos_lfpc  