In [47]:
import numpy as np
import pandas as pd
from scipy.stats import friedmanchisquare
import scikit_posthocs as sp

# Raw data (replace commas with decimals)
data = {
    "Sharpe": {
        "Set": [1.566, 1.592, 0.785, 1.744],
        "Sha": [1.152, 1.296, 1.795, 1.119],
        "Sor": [1.142, 0.902, 0.832, 0.940],
        "Ste": [1.086, 1.456, 1.830, 1.050],
        "Cal": [1.103, 1.578, 1.261, 1.988]
    },
    "Sortino": {
        "Set": [2.072, 2.211, 1.046, 2.069],
        "Sha": [1.359, 1.476, 2.290, 1.333],
        "Sor": [1.386, 1.109, 0.988, 1.087],
        "Ste": [1.387, 1.930, 2.384, 1.255],
        "Cal": [1.497, 2.121, 1.545, 2.328]
    },
    "Sterling": {
        "Set": [6.563, 6.655, 3.226, 7.035],
        "Sha": [5.377, 5.607, 7.530, 5.210],
        "Sor": [5.240, 3.544, 3.306, 4.228],
        "Ste": [4.590, 6.581, 8.015, 4.719],
        "Cal": [4.744, 7.216, 5.333, 9.469]
    },
    "Calmar": {
        "Set": [2.444, 2.022, 0.470, 1.666],
        "Sha": [0.981, 1.028, 1.825, 1.022],
        "Sor": [0.975, 0.763, 0.652, 0.823],
        "Ste": [0.949, 1.562, 1.911, 0.778],
        "Cal": [1.021, 2.318, 1.229, 2.724]
    }
}

# Perform tests for each metric
results = {}
for metric in data.keys():
    # Extract data for the metric
    df = pd.DataFrame(data[metric])
    
    # Friedman test
    friedman_stat, friedman_p = friedmanchisquare(*df.values.T)
    
    # Nemenyi post-hoc test (if Friedman p < 0.05)
    nemenyi_result = None
    if friedman_p < 0.15:
        nemenyi_matrix = sp.posthoc_nemenyi_friedman(df.values)
        nemenyi_matrix.columns = df.columns
        nemenyi_matrix.index = df.columns
    
    results[metric] = {
        "Friedman p-value": friedman_p,
        "Nemenyi Matrix": nemenyi_matrix if friedman_p < 0.15 else "No significant differences"
    }

# Display results
for metric, result in results.items():
    print(f"----- {metric} -----")
    print(f"Friedman test p-value: {result['Friedman p-value']:.4f}")
    if isinstance(result['Nemenyi Matrix'], pd.DataFrame):
        print("\nSignificant pairwise differences (Nemenyi test):")
        print(result['Nemenyi Matrix'].round(4))
    else:
        print(result['Nemenyi Matrix'])
    print("\n")

----- Sharpe -----
Friedman test p-value: 0.4060
No significant differences


----- Sortino -----
Friedman test p-value: 0.0719

Significant pairwise differences (Nemenyi test):
        Set     Sha     Sor     Ste     Cal
Set  1.0000  0.6651  0.1000  0.9627  1.0000
Sha  0.6651  1.0000  0.7971  0.9627  0.6651
Sor  0.1000  0.7971  1.0000  0.3800  0.1000
Ste  0.9627  0.9627  0.3800  1.0000  0.9627
Cal  1.0000  0.6651  0.1000  0.9627  1.0000


----- Sterling -----
Friedman test p-value: 0.4060
No significant differences


----- Calmar -----
Friedman test p-value: 0.2146
No significant differences




In [44]:
data["Calmar"]

{'Set': [2.444, 2.022, 0.47, 1.666],
 'Sha': [0.981, 1.028, 1.825, 1.022],
 'Sor': [0.975, 0.763, 0.652, 0.823],
 'Ste': [0.949, 1.562, 1.911, 0.778],
 'Cal': [1.021, 2.318, 1.229, 2.724]}

In [42]:
results

{'Sharpe': {'Friedman p-value': np.float64(0.40600584970983794),
  'Nemenyi Matrix': 'No significant differences'},
 'Sortino': {'Friedman p-value': np.float64(0.07191336276466469),
  'Nemenyi Matrix': 'No significant differences'},
 'Sterling': {'Friedman p-value': np.float64(0.40600584970983794),
  'Nemenyi Matrix': 'No significant differences'},
 'Calmar': {'Friedman p-value': np.float64(0.21459055821998732),
  'Nemenyi Matrix': 'No significant differences'}}