In [47]:
import scipy.stats as stat
import matplotlib.pyplot as plt
import numpy as np
import math
import itertools
import pandas as pd
 


In [48]:
def sign_test_exact(x, y):
    n = np.size(x)
    dif = x - y
    w = min(np.count_nonzero(dif > 0), np.count_nonzero(dif < 0))
    t = 2**(-n) * sum([math.comb(n, j) for j in range(w+1)])
    return t

def sign_test_assymp(x, y):
    n = np.size(x)
    dif = x - y
    w = min(np.count_nonzero(dif > 0), np.count_nonzero(dif < 0))
    t = (w - n / 2) / (n / 4)**0.5
    p_value = 2 * stat.norm.cdf(-np.abs(t))
    return p_value



In [49]:
def pairwise(iterable):
    "s -> (s0, s1), (s1, s2), (s2, s3), ..."
    a, b = itertools.tee(iterable)
    next(b, None)
    return zip(a, b)

def in_interval(t, interval):
    a, b = interval
    return a <= t <= b

def wilcoxon_test(x, y):
    dif = np.abs(x - y)
    n = np.size(dif)
    k = n // 10
    a = np.min(dif)
    b = np.max(dif)
    bounds = [a + (b - a) * i / k for i in range(k + 1)]
    intervals = list(pairwise(bounds))
    t = sum([np.sign(z) * [j for j in range(k) if in_interval(z, intervals[j])][0] for z in dif])
    t = t / (n * (n + 1) * (2 * n + 1) / 6)**0.5
    p_value = 2 * stat.norm.cdf(-np.abs(t))
    return p_value
    
    

In [50]:
def kendall_correl(x, y):
    concerted = 0
    n = np.size(x)
    for i in range(n):
        for j in range(i, n):
            if (x[i] < x[j] and y[i] < y[j]) or (x[i] > x[j] and y[i] > y[j]):
                concerted += 1
    r = (concerted - (math.comb(n,2) - concerted)) / math.comb(n,2)
    t = r / (2 * (2 * n + 5) / (9 * n * (n - 1)))**0.5
    p_value = 2 * stat.norm.cdf(-np.abs(t))
    return r, p_value


In [312]:
def autocorrelation(data):
    n = np.size(data)
    x1 = data
    x2 = np.zeros(n)
    for i in range(n - 1):
        x2[i] = x1[i+1]
    x2[n-1] = data[0]
    return stat.pearsonr(x1, x2)

def xu_test(x):
    x.sort()
    n = np.size(x)
    median = np.median(x)
    t = sum([(i - 1) * (x[i] - median) ** 2 for i in range(n)]) / (
            (n - 1) * sum([(x[i] - median) ** 2 for i in range(n)]))
    z = (t - 1 / 2) / ((n + 1) / (6 * (n - 1) * (n + 2))) ** 0.5
    return z, 2 * stat.norm.cdf(-np.abs(z))


In [52]:
alpha = 0.05

# Sign test

In [53]:
n = 20
x = stat.norm.rvs(loc=0, scale=7**0.5,size=n)
y1 = 5 * x + np.random.default_rng().uniform(low=-10, high=10, size=n)
y2 = 5 * x + stat.expon.rvs(scale=1/20, size=n)
print(f'uniform noise; exact: {sign_test_exact(x, y1)}, assymptotic: {sign_test_assymp(x, y1)}')
print(f'exponential noise; exact: {sign_test_exact(x, y2)}, assymptotic: {sign_test_assymp(x, y2)}')


uniform noise; exact: 0.2517223358154297, assymptotic: 0.37109336952269756
exponential noise; exact: 0.5880985260009766, assymptotic: 1.0


# Signed-rank test

In [54]:
n = 100
x = stat.norm.rvs(loc=0, scale=7**0.5,size=n)
y1 = 5 * x + np.random.default_rng().uniform(low=-10, high=10, size=n)
y2 = 5 * x + stat.expon.rvs(scale=1/20, size=n)
print(f'uniform noise: wilcoxon: {wilcoxon_test(x, y1)}, sign test: {sign_test_assymp(x, y1)}')
print(f'exponential noise: wilcoxon: {wilcoxon_test(x, y2)}, sign test: {sign_test_assymp(x, y2)}')


uniform noise: wilcoxon: 0.7039937435338861, sign test: 0.4237107971667934
exponential noise: wilcoxon: 0.7245170840277761, sign test: 0.4237107971667934


# Correlation analysis

In [55]:
r_pearson, p_pearson = stat.pearsonr(x, y1)
r_kendal, p_kendall = kendall_correl(x,y1)
print('uniform noise')
print(f'Pearson: correlation {r_pearson}, p-value {p_pearson}')
print(f'Kendall: correlation {r_kendal}, p-value {p_kendall}\n')

r_pearson, p_pearson = stat.pearsonr(x, y2)
r_kendal, p_kendall = kendall_correl(x, y2)
print('exponential noise')
print(f'Pearson: correlation {r_pearson}, p-value {p_pearson}')
print(f'Kendall: correlation {r_kendal}, p-value {p_kendall}')


uniform noise
Pearson: correlation 0.8718980834674352, p-value 3.7622481163703714e-32
Kendall: correlation 0.6545454545454545, p-value 4.958979438321728e-22

exponential noise
Pearson: correlation 0.9999917937241183, p-value 2.810377480595811e-236
Kendall: correlation 0.997979797979798, p-value 5.407019607857424e-49


# Autocorrelation

In [278]:
n = 200
x1 = stat.norm.rvs(loc=5, scale=7**0.5,size=n)
x2 = np.zeros(n)
for i in range(n - 1):
    x2[i] = x1[i] + 0.1 * x1[i + 1]

cor1, p_value1 = autocorrelation(x1)
print("random sample")
print(f'autocorrelation with lag 1: {cor1}; p-value:{p_value1}\n')

cor2, p_value2 = autocorrelation(x2)
print('autocorrelated sample')
print(f'autocorrelation with lag 1: {cor2}; p-value:{p_value2}')


random sample
autocorrelation with lag 1: -0.11877573450327925; p-value:0.09390501527016575

autocorrelated sample
autocorrelation with lag 1: -0.04141856598620092; p-value:0.5603474833309279


In [319]:
x1 = stat.norm.rvs(loc=5, scale=7**0.5,size=n)
t, p_value = xu_test(x1)
print('same dispersion sample')
print(f'xu statistic: {t}, p-value {p_value}\n')

x2 = np.copy(x1)
for i in range(n//2,n):
        x2[i] *= 1.5

t, p_value = xu_test(x2)
print('different dispersion sample')
print(f'xu statistic: {t}, p-value {p_value}')

same dispersion sample
xu statistic: 0.5524054536459054, p-value 0.580670594981578

different dispersion sample
xu statistic: 4.327213653225434, p-value 1.510074687413524e-05
