In [1]:
import numpy as np
import scipy
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats
from scipy.special import eval_sh_legendre
from typing import List
from tqdm import tqdm
from scipy.stats.sampling import TransformedDensityRejection, SimpleRatioUniforms
from typing import *

In [2]:
def phi_1(u):
    return np.sqrt(3) * (2 * u - 1)

def phi_2(u):
    return np.sqrt(48) * (0.25 - np.abs(u - 0.5))

def get_T(sample, n, m, phi):
    R = np.argsort(sample) + 1
    N = m + n
    fst_sum = np.sum([phi((R[i] - 0.5) / N) for i in range(m)])
    snd_sum = np.sum([phi((R[i] - 0.5) / N) for i in range(m, N)])
    return np.sqrt(m * n / N) * (fst_sum / m - snd_sum / n)

def get_wilcoxon(sample, n, m):
    return get_T(sample, n, m, phi=phi_1) ** 2

def get_an_br(sample, n, m):
    return get_T(sample, n, m, phi=phi_2) ** 2

def get_ks(sample1, sample2, n, m):
    return np.sqrt(n * m / (n + m)) * scipy.stats.ks_2samp(sample1, sample2).statistic

def get_statistics(n, m):
    sample1 = np.random.normal(0, 1, n)
    sample2 = np.random.normal(0, 1, m)
    samples = np.concatenate((sample1, sample2))
    w = get_wilcoxon(samples, n, m)
    an_br = get_an_br(samples, n, m)
    l = w + an_br
    ks = get_ks(sample1, sample2, n, m)
    return w, an_br, l, ks

## Task 1

Does the distribution of the statistic depends on the distribution of the data? And how?


In [10]:
Ws, ABs, Ls, KSs = [], [], [], []

for i in tqdm(range(10000)):
    n = 20
    m = 20
    X = np.random.normal(0, 1, size=n)
    sample1 = np.random.normal(0, 1, n)
    sample2 = np.random.normal(0, 1, m)
    samples = np.concatenate((sample1, sample2))

    W = get_wilcoxon(samples, n, m)
    AB = get_an_br(samples, n, m)
    L = W + AB
    KS = get_ks(sample1, sample2, n, m)

    Ws.append(W)
    ABs.append(AB)
    Ls.append(L)
    KSs.append(KS)

100%|██████████| 10000/10000 [00:04<00:00, 2185.07it/s]


In [15]:
print(f"Standard normal 0.95-quantile: {scipy.stats.chi2.ppf(0.95, 1)}")
print(f"Wilcoxon test 0.95-quantile: {np.quantile(Ws, 0.95)}")
print()
print(f"Ansari-Bradley test 0.95-quantile: {np.quantile(ABs, 0.95)}")
print()
print(f"Chi squared with 2 df 0.95-quantile: {scipy.stats.chi2.ppf(0.95, 2)}")
print(f"Lapage test 0.95-quantile: {np.quantile(Ls, 0.95)}")
print()
print(f"KS 0.95-quantile: {scipy.stats.kstwobign.ppf(0.95)}")
print(f"KS test 0.95-quantile: {np.quantile(KSs, 0.95)}")

Standard normal 0.95-quantile: 3.841458820694124
Wilcoxon test 0.95-quantile: 3.888000000000001

Ansari-Bradley test 0.95-quantile: 3.887999999999999

Chi squared with 2 df 0.95-quantile: 5.991464547107979
Lapage test 0.95-quantile: 5.988862499999997

KS 0.95-quantile: 1.3580986393225505
KS test 0.95-quantile: 1.264911064067352
