In [9]:
# Import packages and functions
import numpy as np
from scipy.stats import rankdata
from scipy.stats import binom
from itertools import product
from tqdm import tqdm

In [10]:
lens_no_zeros_ties = [-2.2, -1.4, -1.1, -0.7, 0.8]
n = len(lens_no_zeros_ties)
# Calculate Walsh averages
WA = []

for i, xi in enumerate(lens_no_zeros_ties):
    for xj in lens_no_zeros_ties[i:]:
        WA += [(xi + xj) / 2]

WA = np.array(WA)
print("Walsh averages", WA)
print("Walsh averages", sorted(WA))
print("Size of Walsh averages set", len(WA))

Walsh averages [-2.2  -1.8  -1.65 -1.45 -0.7  -1.4  -1.25 -1.05 -0.3  -1.1  -0.9  -0.15
 -0.7   0.05  0.8 ]
Walsh averages [-2.2, -1.8, -1.6500000000000001, -1.4500000000000002, -1.4, -1.25, -1.1, -1.0499999999999998, -0.9, -0.7000000000000001, -0.7, -0.29999999999999993, -0.15000000000000002, 0.050000000000000044, 0.8]
Size of Walsh averages set 15


In [11]:
# Compute point estimations
m_sample = np.median(lens_no_zeros_ties)
m_hl = np.median(WA)

print("Median point estimation:")
print("- Sample median", m_sample)
print("- Hodges-Lehmann estimator", m_hl)

Median point estimation:
- Sample median -1.1
- Hodges-Lehmann estimator -1.0499999999999998


In [12]:
# Find exact distribution of W
def distr_W(n):
    prob = {}

    # generate binary sets representing + and - sign of the ranks
    bin_arr = [np.array(p) for p in product([1, 0], repeat=n)]

    # check all possible values of W
    for pos_neg in tqdm(bin_arr):
        W = np.sum(np.arange(1, n + 1) * pos_neg)
        if W in prob:
            prob[W] += 1
        else:
            prob[W] = 1

    # compute probabilities
    for W in prob:
        prob[W] /= 2 ** n

    return prob

W_prob = distr_W(n) # Compute pmf
W_prob = {k: v for k, v in sorted(W_prob.items(), key=lambda item: item[0])} # Sort the values of W in ascending order
W_cdf = dict(zip(W_prob.keys(), np.cumsum(list(W_prob.values())))) # Compute cdf

100%|██████████| 32/32 [00:00<00:00, 31767.51it/s]


In [14]:
# Define another confidence level
gamma = 0.95
alpha = 1 - gamma
# Compute confidence intervals
k_a_sample = int(binom(n=n, p=0.5).ppf(alpha / 2)) + 1 # First, compute a quantile of Binom: ppf will return k such that P(Binom \leq k), so, we add + 1
left_sample = np.sort(lens_no_zeros_ties)[k_a_sample - 1] # Then, pick corresponding order statistics: ordering starts with 0, so we add - 1
right_sample = np.sort(lens_no_zeros_ties)[n - k_a_sample]

k_a_hl = list(W_cdf.keys())[np.argmin(np.abs(np.array(list(W_cdf.values())) - alpha / 2))] + 1 # First, compute a quantile of the exact distribution of W
left_hl = np.sort(WA)[k_a_hl - 1] # Then, pick corresponding order statistics
right_hl = np.sort(WA)[n * (n + 1) // 2 - k_a_hl]

print(k_a_hl)
print("Median confidence interval:")
print("- with Sign test quantiles [", left_sample, right_sample, "]")
print("- with Wilcoxon signed rank test quantiles [", left_hl, right_hl, "]")

1
Median confidence interval:
- with Sign test quantiles [ -2.2 0.8 ]
- with Wilcoxon signed rank test quantiles [ -2.2 0.8 ]
