# T-test_python
documentation
https://docs.scipy.org/doc/scipy/reference/stats.html

In [2]:
### import libraries ###
import numpy as np
import pandas as pd
import scipy.stats as stats

# data (Problem sheet 2.3)

In [47]:
data = np.array([54,38,55,51,44,66,50])
mean = np.mean(data)
std = np.std(data, ddof=1)
print('mean', mean)
print('std', std)

mean 51.142857142857146
std 8.839844859659989


# one sample t-test
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_1samp.html#scipy.stats.ttest_1samp

$H_0$: there is NO difference, true mean $=\mu$

$H_1$: there is a difference, true mean $\neq\mu$

In [48]:
popmean = 60 # population mean for null hypothesis
statistic, pvalue = stats.ttest_1samp(data, popmean)
print('statistic', statistic)
print('pvalue', pvalue)

statistic -2.6509285738158783
pvalue 0.037981457824963816


We reject $H_0$ and conclude that the true ean is not equal to 60

### 95% confidence interval
$\text{mean}\pm\dfrac{t\sigma}{\sqrt{n}}$
Confidence limits refer to sample mean!

In [50]:
t_val = stats.t.ppf(0.025, len(data)-1)
#t_val = statistic
print('t_val', t_val, 'df', len(data)-1)

print('95% interval', mean-t_val*std/np.sqrt(len(data)),
      mean+t_val*std/np.sqrt(len(data)),
     )

t_val -2.446911848791681 df 6
95% interval 59.31835006924209 42.9673642164722


In [51]:
level = 0.95
# Percent point function (inverse of cdf — percentiles).
t_val = stats.t.ppf((1-level)/2, len(data)-1, loc=0, scale=1)
print('t_val', t_val, 'df', len(data)-1)

t_val -2.4469118487916806 df 6


# Serum Calcium levels

In [52]:
prior_data = np.array([2.09, 1.8, 1.97, 2.35, 2.08, 1.9, 2.06, 2.3, 2.35])
post_data = np.array([2.15,2.13,2.27,2.52,2.05,2.24,2.26,2.33,2.34])
print('prior data: mean', np.mean(prior_data), 
      'std', np.std(prior_data, ddof=1),
      'median',np.median(prior_data),
     )
print('post data: mean', np.mean(post_data), 
      'std', np.std(post_data, ddof=1),
      'median',np.median(post_data),
     )

prior data: mean 2.1 std 0.1981161275615895 median 2.08
post data: mean 2.2544444444444443 std 0.13794121131039047 median 2.26


### one sample t-test
$\mu = 0$

In [53]:
popmean = 0 # population mean for null hypothesis
statistic, pvalue = stats.ttest_1samp(prior_data, popmean)
print('statistic', statistic)
print('pvalue', pvalue)
mean = np.mean(prior_data)
std = np.std(prior_data, ddof=1)
t_val = stats.t.ppf(0.025, len(prior_data)-1)
print('t_val', t_val, 'df', len(prior_data)-1)
print('95% interval', mean-t_val*std/np.sqrt(len(prior_data)),
      mean+t_val*std/np.sqrt(len(prior_data)),
     )

statistic 31.799531302879334
pvalue 1.0411926907581334e-09
t_val -2.306004135033371 df 8
95% interval 2.2522855364579413 1.9477144635420587


$\mu = 2.3$

In [55]:
popmean = 2.4 # population mean for null hypothesis
statistic, pvalue = stats.ttest_1samp(prior_data, popmean)
print('statistic', statistic)
print('pvalue', pvalue)

statistic -4.542790186125616
pvalue 0.0018922336027539291


### post data

In [56]:
popmean = 0 # population mean for null hypothesis
statistic, pvalue = stats.ttest_1samp(post_data, popmean)
print('statistic', statistic)
print('pvalue', pvalue)
mean = np.mean(post_data)
std = np.std(post_data, ddof=1)
t_val = stats.t.ppf(0.025, len(post_data)-1)
print('t_val', t_val, 'df', len(post_data)-1)
print('95% interval', mean-t_val*std/np.sqrt(len(post_data)),
      mean+t_val*std/np.sqrt(len(post_data)),
     )

statistic 49.030549094677134
pvalue 3.313519931995994e-11
t_val -2.306004135033371 df 8
95% interval 2.3604754456688686 2.14841344322002


# paired t-test (paired difference test)

In [57]:
diff = post_data - prior_data

popmean = 0 # population mean for null hypothesis
statistic, pvalue = stats.ttest_1samp(diff, popmean)
print('statistic', statistic)
print('pvalue', pvalue)
mean = np.mean(diff)
std = np.std(diff, ddof=1)
t_val = stats.t.ppf(0.025, len(diff)-1)
print('t_val', t_val, 'df', len(diff)-1)
print('95% interval', mean-t_val*std/np.sqrt(len(diff)),
      mean+t_val*std/np.sqrt(len(diff)),
     )


statistic 3.139689962595746
pvalue 0.013811693734581841
t_val -2.306004135033371 df 8
95% interval 0.26787906116289684 0.041009827725992015


# Fluorescence 

In [58]:
wildtype = np.array([2689,2494,2719,2465,2709,2324,2295,2041])
mutant = np.array([3090,3314,3175,3514,3478,3111,3222,3624,2868])
print('wildtype: mean =', np.mean(wildtype), 'std', np.std(wildtype, ddof=1))
print('mutant: mean =', np.mean(mutant), 'std', np.std(mutant, ddof=1))

wildtype: mean = 2467.0 std 240.11723327217837
mutant: mean = 3266.222222222222 std 239.93164535851548


# equal variance test
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html

In [59]:
statistic, pvalue = stats.ttest_ind(wildtype,mutant,equal_var=True)
print('statistic', statistic)
print('pvalue', pvalue)

statistic -6.852752324539301
pvalue 5.4807871370978115e-06


# Welch test (variance not assumed to be equal)

In [60]:
statistic, pvalue = stats.ttest_ind(wildtype,mutant,equal_var=False)
print('statistic', statistic)
print('pvalue', pvalue)

statistic -6.8524198719031695
pvalue 5.966840376473205e-06
