# <span style="color:magenta"> Functions summary

In [None]:
# t-test functions
stat, p = ttest_ind(data1, data2) # student's t-test for independent samples
stat, p = ttest_rel(data1, data2) # paired student's t-test
stat, p = f_oneway(data1, data2, data3) # anova with f-value output

# <span style="color:violet"> Calculate mean and standard deviation. </span>

In [1]:
# generate gaussian data samples
from numpy.random import seed
from numpy.random import randn
from numpy import mean
from numpy import std

In [2]:
seed(1)

In [3]:
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 51

In [4]:
print('data1: mean=%.3f stdv=%.3f' % (mean(data1), std(data1)))
print('data2: mean=%.3f stdv=%.3f' % (mean(data2), std(data2)))

data1: mean=50.303 stdv=4.426
data2: mean=51.764 stdv=4.660


# <span style="color:magenta"> Student's t-test. </span>

In [5]:
# Student's t-test
from numpy.random import seed
from numpy.random import randn
from scipy.stats import ttest_ind

In [6]:
# seed the random number generator
seed(1)
# generate two independent samples
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 51

In [10]:
# compare samples
stat, p = ttest_ind(data1, data2)
print('Statistics=%.3f, p=%.3f' % (stat, p))

Statistics=-2.262, p=0.025


In [8]:
# interpret
alpha = 0.05
if p > alpha:
	print('Same distributions (fail to reject H0)')
else:
	print('Different distributions (reject H0)')

Different distributions (reject H0)


# <span style="color:violet"> Paired Student's t-test. </span>
- Compare means from the same sample or matched samples
- The samples are not independent
- No longer assumes there is variation between observations.

In [11]:
from numpy.random import seed
from numpy.random import randn
from scipy.stats import ttest_rel
# seed the random number generator
seed(1)
# generate two independent samples
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 51
# compare samples
stat, p = ttest_rel(data1, data2)
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
	print('Same distributions (fail to reject H0)')
else:
	print('Different distributions (reject H0)')

Statistics=-2.372, p=0.020
Different distributions (reject H0)


# <span style="color:magenta"> Analysis of Variance. </span>
- Multiple independent data samples.
- Assumes mean across 2 or more samples is equal.
- If at least one sample has a different distribution, the null hypothesis is rejected.
- Requirements: data samples are a Gaussian distribution, samples are independent, all samples have the same SD.

In [15]:
# Analysis of Variance test
from numpy.random import seed
from numpy.random import randn
from scipy.stats import f_oneway
# seed the random number generator
seed(1)
# generate three independent samples
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 50
data3 = 5 * randn(100) + 52
# compare samples
stat, p = f_oneway(data1, data2, data3)
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
	print('Same distributions (fail to reject H0)')
else:
	print('Different distributions (reject H0)')

Statistics=3.655, p=0.027
Different distributions (reject H0)


# <span style="color:violet"> Repeated measures ANOVA Test. </span>
- Multiple dependent data samples
- Compare 2 or more measurements on the same subject at different intervals
- No python package for repeated measures ANOVA

https://www.marsja.se/repeated-measures-anova-using-python/

pyvttbl runs repeated measures ANOVA test and instead of returning values in the form of "stat, p", it returns an ANOVA table

pip install pyvttbl