# Testování hypotéz

In [1]:
import scipy.stats as st
import numpy as np

https://courses.fit.cvut.cz/NI-VSM/lectures/files/NI-VSM-Lec-11-Slides.pdf

### Jednovýběrový t-test

<img src="./img/normttest.png"  width="600"/>

In [50]:
mu_0 = 0.5
alt = 'two-sided'
alpha = 0.05

In [51]:
n = 20
bar_X = 0.2
var_X = 0.168

In [6]:
X = np.array([
    5, 4
])

n = len(X)
bar_X = np.mean(X)
var_X = np.var(X, ddof=1)

In [52]:
T = (bar_X - mu_0) / np.sqrt(var_X) * np.sqrt(n) #unknown var

In [57]:
print(f'{bar_X=} {mu_0=} {np.sqrt(var_X)=} {n=}')
print(f'{T=}')
lcval = st.t.ppf(alpha/2,df=n-1)
rcval = st.t.ppf(1-alpha/2,df=n-1)
print(f'{lcval=} {rcval=}')
pval = st.t.cdf(np.abs(T), df=n-1)
print(f'{(1-pval)*2=}')

bar_X=0.2 mu_0=0.5 np.sqrt(var_X)=0.40987803063838396 n=20
T=-3.2732683535398857
lcval=-2.0930240544082634 rcval=2.093024054408263
(1-pval)*2=0.0039989125815405036


In [None]:
st.ttest_1samp(X, popmean=mu_0, alternative=alt) #unknown var

### Párový t-test

<img src="./img/pttest.png"  width="600"/>

In [64]:
X = np.array([
    178, 188, 177, 192, 193
])

Y = np.array([
    172, 176, 180, 184, 186
])

alt = 'greater'

assert(len(X) == len(Y))

n = len(X)

Z = X - Y

bar_Z = np.mean(Z)
var_Z = np.var(Z, ddof=1)

T = (bar_Z) / np.sqrt(var_Z) * np.sqrt(n) #unknown var

print(f'{bar_Z=} mu={0} {np.sqrt(var_Z)=} {n=}')
print(f'{T=}')
print(st.t.ppf(1-alpha, df=n-1))
print(1-st.t.cdf(T,df=n-1))


bar_Z=6.0 mu=0 np.sqrt(var_Z)=5.522680508593631 n=5
T=2.429328990536748
2.13184678133629
0.036016573856563094


In [61]:
st.ttest_1samp(Z, popmean=0, alternative=alt)

Ttest_1sampResult(statistic=2.429328990536748, pvalue=0.036016573856563025)

### Dvouvýběrový t-test

<img src="./img/samevar.png"  width="600"/>

<img src="./img/notsamevar.png"  width="600"/>

In [86]:
X = np.array([
    169, 178, 179, 186, 191
])

Y = np.array([
    175, 182, 183, 189, 191, 192
])

alt = 'two-sided'
alpha = 0.05

In [87]:
n = len(X)
m = len(Y)

bar_X = np.mean(X)
bar_Y = np.mean(Y)
var_X = np.var(X, ddof=1)
var_Y = np.var(Y, ddof=1)

In [93]:
s12 = np.sqrt(((n-1)*var_X + (m-1)*var_Y) / (n+m-2.0) )

T = (bar_X-bar_Y) / s12 * np.sqrt((n*m)/(n+m)) # equal var

print(f'{bar_X=} {bar_Y=} {s12=} {n=} {m=}')
print(f'{T=}')
print(st.t.ppf(1-alpha/2,n+m-2)) # c val
print(st.t.cdf(T,9)*2) # p val

bar_X=180.6 bar_Y=185.33333333333334 s12=7.4127018116303685 n=5 m=6
T=-1.0545200563892545
2.2621571627409915
0.319130239321952


In [None]:
sd = np.sqrt(var_X/n + var_Y/m)
nd = sd**4 / ((1/(n-1))(var_X/n)**2 + (1/(m-1))(var_Y/m)**2)
T = (bar_X-bar_Y) / sd # non-equal var

print(f'{bar_X=} {bar_Y=} {sd=} {n=} {m=} {nd=}')
print(f'{T=}')
print(st.t.ppf(1-alpha/2,nd)) # crit val
print(st.t.cdf(T,nd)*2) # p val

In [70]:
st.ttest_ind(X, Y, equal_var = True, alternative = alt)

Ttest_indResult(statistic=-1.0545200563892545, pvalue=0.319130239321952)

### Testy dobré shody

<img src="./img/chisqknownp.png"  width="600"/>

In [94]:
obs = np.array([
    638, 602
])

p = np.array([
    1/2, 1/2
])

alpha = 0.05

In [28]:
p = []
pk=1/2
p.append(st.geom.cdf(2,pk))
p.append(st.geom.pmf(3,pk))
p.append(st.geom.pmf(4,pk))
p.append(st.geom.sf(4,pk))
p = np.array(p)
print(p)

[0.75   0.125  0.0625 0.0625]


In [99]:
assert( np.isclose(sum(p),1))

exp = sum(obs) * p
n = sum(obs)
k = len(obs)
print(f'{obs=}')
print(f'{exp=}')
print(f'{n=}')

chi, pval = st.chisquare(obs, exp)

print(f'{chi=} {pval=}')
print(st.chi2.ppf(1-alpha,df = k-1)) # c val
print(1 - st.chi2.cdf(chi,df = k-1)) # p val

obs=array([638, 602])
exp=array([620., 620.])
n=1240
chi=1.0451612903225806 pval=0.3066241230648308
3.841458820694124
0.30662412306483366


### Test nezávislosti z kontingenční tabulky

<img src="./img/contingency1.png"  width="600"/>

<img src="./img/contingency2.png"  width="600"/>

In [2]:
c_table = np.array([
    [15,85],
    [5, 95]
])

alpha = 0.10

r, c = c_table.shape

chi, p, dof, exp = st.chi2_contingency(c_table)
print(f'{chi=} {p=} {dof=}')
print(st.chi2.ppf(1-alpha, df=(r-1)*(c-1))) # c val
print(1-st.chi2.cdf(chi, df=(r-1)*(c-1))) # p val

chi=4.5 p=0.033894853524689295 dof=1
2.705543454095404
0.03389485352468924
