In [16]:
import scipy.stats as stat
import matplotlib.pyplot as plt
import numpy as np
import math
import itertools

In [19]:
alpha = 0.05
k = 8
u_half_alpha = -stat.norm.ppf(alpha / 2)

In [14]:
def in_range(x, interval):
    a, b = interval
    if a <= x <= b:
        return True
    return False


def pairwise(iterable):
    "s -> (s0, s1), (s1, s2), (s2, s3), ..."
    a, b = itertools.tee(iterable)
    next(b, None)
    return zip(a, b)


def group_data(data, k):
    x0 = math.floor(min(data))
    xn = math.ceil(max(data))
    bounds = [x0 + i * (xn - x0) / k for i in range(k + 1)]
    intervals = list(pairwise(bounds))
    invervals_mids = [(a + b) / 2 for (a, b) in intervals]
    grouped = np.zeros(np.size(data))
    for i in range(np.size(data)):
        entry_group = [j for j in range(k) if in_range(data[i], intervals[j])][0]
        grouped[i] = invervals_mids[entry_group]
    return grouped

# Simple hypothesis

## Homogeneity test for normal's expectation

$t = \frac{\overline{x_{n1}} - \overline{y_{n2}}}{s \sqrt{1/n1 + 1/n2}} \in t_{n1+n2-2}$

$p-value = P(|T| > |t|) = 1 - P(|T| \le |t|) = 1 - 2 F_{t_{n1+n2-2}}(t)$

In [15]:
mu_x = 5
sigma2_x = 7
n_x = 200

mu_y = 5
sigma2_y = 9
n_y = 300

x = stat.norm.rvs(loc=mu_x, scale=sigma2_x**0.5,size=n_x)
y = stat.norm.rvs(loc=mu_y, scale=sigma2_y**0.5,size=n_y)

s2 = ((n_x - 1) * np.var(x) + (n_y-2) * np.var(y)) / (n_x + n_y - 2)
t = (np.mean(x) - np.mean(y)) / (s2**0.5 * (1/n_x + 1/n_y)**0.5)
t_half_alpha = stat.chi2.ppf(1-alpha/2, df=n_x+n_y-2)
p_value = 1 - 2*stat.chi2.cdf(t, df=n_x+n_y-2)
print(f't statistic value: {t}')
print(f'critical values: [ -{t_half_alpha},{t_half_alpha}]')
print(f'p-value: {p_value}')


t statistic value: -0.8734691941832146
critical values: [ -561.7274004072037,561.7274004072037]
p-value: 1.0


In [22]:
x_grouped = group_data(x,k)
x_uniq = np.unique(x_grouped)
x_freq = np.array([np.count_nonzero(x_grouped == val) for val in x_uniq])
print(*x_freq)

7 15 40 50 43 31 10 4
