In [None]:
# Mathieu Blondel, February 2012
# License: BSD 3 clause

# Port to Python of examples in chapter 5 of
# "Introductory Statistics with R" by Peter Dalgaard

import numpy as np
import scipy.stats as stats

# daily intake of energy in kJ for 11 women
daily_intake = np.array([5260,5470,5640,6180,6390,6515,
                         6805,7515,7515,8230,8770])
#test normalidad
print(stats.shapiro(daily_intake))

# one sample t-test
# null hypothesis: expected value = 7725
t_statistic, p_value = stats.ttest_1samp(daily_intake, 7725,alternative='two-sided')
t_statistic, p_value = stats.ttest_1samp(daily_intake, 7725,alternative='less')
t_statistic, p_value = stats.ttest_1samp(daily_intake, 7725,alternative='greater')
# p_value < 0.05 => alternative hypothesis:
# data deviate significantly from the hypothesis that the mean
# is 7725 at the 5% level of significance
print("one-sample t-test pval", p_value)




# one sample wilcoxon-test
z_statistic, p_value = stats.wilcoxon(daily_intake - 7725)
print("one-sample wilcoxon-test pval", p_value)


ShapiroResult(statistic=0.952366054058075, pvalue=0.674299418926239)
one-sample t-test pval 0.9909313824119471
one-sample wilcoxon-test pval 0.0244140625


In [None]:
energ = np.array([
# energy expenditure in mJ and stature (0=obese, 1=lean)
[9.21, 0],
[7.53, 1],
[7.48, 1],
[8.08, 1],
[8.09, 1],
[10.15, 1],
[8.40, 1],
[10.88, 1],
[6.13, 1],
[7.90, 1],
[11.51, 0],
[12.79, 0],
[7.05, 1],
[11.85, 0],
[9.97, 0],
[7.48, 1],
[8.79, 0],
[9.69, 0],
[9.68, 0],
[7.58, 1],
[9.19, 0],
[8.11, 1]])

# similar to expend ~ stature in R
group1 = energ[:, 1] == 0
group1 = energ[group1][:, 0]
group2 = energ[:, 1] == 1
group2 = energ[group2][:, 0]

print(stats.shapiro(group2))
print(stats.levene(group1,group2))
# two-sample t-test
# null hypothesis: the two groups have the same mean
# this test assumes the two groups have the same variance...
# (can be checked with tests for equal variance)
# independent groups: e.g., how boys and girls fare at an exam
# dependent groups: e.g., how the same class fare at 2 different exams
t_statistic, p_value = stats.ttest_ind(group1, group2)

# p_value < 0.05 => alternative hypothesis:
# they don't have the same mean at the 5% significance level
print("two-sample t-test", p_value)

# two-sample wilcoxon test
# a.k.a Mann Whitney U
u, p_value = stats.mannwhitneyu(group1, group2)
print("two-sample wilcoxon-test", p_value)

ShapiroResult(statistic=0.8673301935195923, pvalue=0.048183370381593704)
LeveneResult(statistic=0.26770858376245593, pvalue=0.6105485798476267)
two-sample t-test 0.0007989982111700593
two-sample wilcoxon-test 0.002121613385880049


In [None]:
# pre and post-menstrual energy intake
intake = np.array([
[5260, 3910],
[5470, 4220],
[5640, 3885],
[6180, 5160],
[6390, 5645],
[6515, 4680],
[6805, 5265],
[7515, 5975],
[7515, 6790],
[8230, 6900],
[8770, 7335],
])

pre = intake[:, 0]
post = intake[:, 1]

# paired t-test: doing two measurments on the same experimental unit
# e.g., before and after a treatment
t_statistic, p_value = stats.ttest_1samp(post - pre, 0)

# p < 0.05 => alternative hypothesis:
# the difference in mean is not equal to 0
print("paired t-test", p_value)

# alternative to paired t-test when data has an ordinary scale or when not
# normally distributed
z_statistic, p_value = stats.wilcoxon(post - pre)

print("paired wilcoxon-test", p_value)

paired t-test 3.059020942934875e-07
paired wilcoxon-test 0.0009765625


In [None]:

performance1 = [89, 89, 88, 78, 79]
performance2 = [93, 92, 94, 89, 88]
performance3 = [89, 88, 89, 93, 90]
performance4 = [81, 78, 81, 92, 82]

print(stats.shapiro(performance4))
print(stats.levene(performance1,performance2,performance3,performance4))

print(stats.f_oneway(performance1, performance2, performance3, performance4))
print(stats.kruskal(performance1,performance2,performance3,performance4))


ShapiroResult(statistic=0.7938330173492432, pvalue=0.07211019843816757)
LeveneResult(statistic=0.6666666666666667, pvalue=0.5846671108816857)
F_onewayResult(statistic=4.625000000000002, pvalue=0.016336459839780215)
KruskalResult(statistic=7.655453149001545, pvalue=0.05369589233558365)
