# T-Testy

In [1]:
import numpy as np
import scipy.stats as st

## Příklad 5.1

In [2]:
# Změřené množství nápoje
x = np.array([0.510, 0.462, 0.451, 0.466, 0.491, 0.503, 0.475, 0.487, 0.512, 0.505])
n = len(x)

In [3]:
# Průměr
Xn = sum(x)/n
print(Xn, np.mean(x))

0.4862 0.4861999999999999


In [6]:
# Výběrový rozptyl
sn2 = sum((x-Xn)**2/(n-1))
sn = np.sqrt(sn2)
print(sn2, np.var(x, ddof = 1)) # , ddof = 1
print(sn, np.std(x, ddof = 1)) # , ddof = 1

0.0004721777777777776 0.0004721777777777777
0.021729652039960915 0.02172965203996092


$H_0: \mu \geq 0.5$ vs. $H_A: \mu < 0.5$

In [7]:
# testová statistika
mu0 = 0.5
T = (Xn - mu0)/sn*np.sqrt(n)
print(T)

-2.008289485265134


In [10]:
# kritická hodnota
alpha = 0.05
t = -st.t.ppf(1-alpha,n-1)
print(t, -st.t.isf(alpha,n-1))

-1.8331129326536335 -1.8331129326536337


In [11]:
# p-hodnota
p = st.t.cdf(T,n-1)
print(p)

0.03776810459633898


In [12]:
st.ttest_1samp(x, mu0, alternative = 'less')

Ttest_1sampResult(statistic=-2.00828948526515, pvalue=0.037768104596338024)

## Příklad 5.2

In [13]:
# data
n = 25
k = 10
# popisné hodnoty
Xn = k/n
sn = np.sqrt(k/(n-1)-k**2/n/(n-1))
print(Xn, sn)

0.4 0.5


In [14]:
# simulace téhož
x = np.random.permutation(np.append(np.zeros([1,n-k]),np.ones([1,k])))
Xn = np.mean(x)
sn = np.std(x, ddof = 1)
print(x)
print(Xn, sn)

[1. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 0. 1. 1. 1. 0. 0. 0. 1.
 0.]
0.4 0.5


$H_0: \mu = 0.5$ vs. $H_A: \mu \neq 0.5$

In [15]:
# ručně
mu = 0.5
alpha = 0.05
T = (Xn - mu0)/sn*np.sqrt(n)
t = st.t.isf(alpha/2,n-1)
print(T,t)

-0.9999999999999998 2.063898561628021


In [16]:
# pomocí funkce ttest
st.ttest_1samp(x, mu0, alternative = 'two-sided')

Ttest_1sampResult(statistic=-0.9999999999999998, pvalue=0.32728688127978545)

## Příklad 5.3

In [18]:
# data
b = np.array([0.82, 1.08, 1.01, 0.63, 1.45, 1.12, 0.56, 0.83, 1.16, 1.38])
q = np.array([0.94, 0.79, 0.75, 0.74, 1.25, 0.79, 0.76, 0.75, 0.78, 0.78])
z = b - q
print(z)

[-0.12  0.29  0.26 -0.11  0.2   0.33 -0.2   0.08  0.38  0.6 ]


$H_0: \mu_B \leq \mu_Q$ vs. $H_A: \mu_B > \mu_Q$

$H_0: \mu_Z \leq 0$ vs. $H_A: \mu_Z > 0$

In [19]:
# pomocí funkce ttest
st.ttest_1samp(z, 0, alternative = 'greater')

Ttest_1sampResult(statistic=2.1201062395856716, pvalue=0.03151335300716899)

In [22]:
1-st.t.cdf(2.12,9)

0.03151879258974655

## Příklad 5.4

In [None]:
# data
x = np.array([0.510, 0.462, 0.451, 0.466])
y = np.array([0.491, 0.503, 0.475, 0.487, 0.512, 0.505])

n = len(x)
m = len(y)
Xn = np.mean(x)
Ym = np.mean(y)
sX2 = np.var(x, ddof=1)
sY2 = np.var(y, ddof=1)
sX = np.std(x, ddof=1)
sY = np.std(y, ddof=1)
print('Xn = ', Xn, ", sX = ", sX, ", n = ", n , sep = "")
print('Ym = ', Ym, ", sY = ", sY, ", m = ", m , sep = "")

### Různé rozptyly - není důvod předpokládat, že by byly stejné

a) $H_0: \mu_X = \mu_Y$ vs. $H_A: \mu_X \neq \mu_Y$

In [None]:
# ručně
alpha = 0.05
sd2 = (sX2/n + sY2/m)
nd = sd2**2/((sX2/n)**2/(n-1) + (sY2/m)**2/(m-1))
T = (Xn - Ym)/np.sqrt(sd2)
t = st.t.isf(alpha/2,nd)
p = 2*st.t.sf(np.abs(T),nd)
print("T = ", T, ", t = ", t , ", nd = ", nd, sep="")
print("p = ", p)

In [None]:
# pomocí funkce ttest
st.ttest_ind(x, y, alternative = 'two-sided', equal_var = False)

b) $H_0: \mu_X \geq \mu_Y$ vs. $H_A: \mu_X < \mu_Y$

In [None]:
t = -st.t.isf(alpha,nd)
p = st.t.cdf(T,nd)
print("T = ", T, ", t = ", t , ", nd = ", nd, sep="")
print("p = ", p)

In [None]:
st.ttest_ind(x, y, alternative = 'less', equal_var = False)

### Shodné rozptyly - je důvod předpokládat shodu, nebo si to otestuji

In [None]:
# Test shody rozptylů
## F - test
F = sX2 / sY2
p = 2*st.f.sf(F, n-1, m-1)
print("F = ", F, ", p = ", p, sep="")

In [None]:
# Test normality
st.normaltest(x)
st.normaltest(y)

In [None]:
## Levenův 
st.levene(x,y)

a) $H_0: \mu_X = \mu_Y$ vs. $H_A: \mu_X \neq \mu_Y$

In [None]:
# ručně
s12 = np.sqrt(((n-1)*sX2 + (m-1)*sY2)/(n+m-2))
T = (Xn - Ym)/s12*np.sqrt(n*m/(n+m))
t = st.t.isf(alpha/2,n + m - 2)
p = 2*st.t.sf(np.abs(T), df = n + m - 2)
print("T = ", T, ", t = ", t,", p = ", p, sep="")

In [None]:
# pomocí funkce ttest
st.ttest_ind(x, y, alternative = 'two-sided', equal_var = True)

b) $H_0: \mu_X \geq \mu_Y$ vs. $H_A: \mu_X < \mu_Y$

In [None]:
# pomocí funkce ttest
st.ttest_ind(x, y, alternative = 'less', equal_var = True)