In [2]:
from scipy import stats


In [4]:
# 데이터
nonsmokers = [18, 22, 21, 17, 20, 17, 23, 20, 22, 21]
smokers = [16, 20, 14, 21, 20, 18, 13, 15, 17, 21]

In [5]:
# 정규성 검정 (Shapiro-Wilk normality test)
normal1 = stats.shapiro(nonsmokers)
normal2 = stats.shapiro(smokers)
print(normal1)
print(normal2)

ShapiroResult(statistic=0.9119927883148193, pvalue=0.2949621379375458)
ShapiroResult(statistic=0.9194088578224182, pvalue=0.3520510196685791)


In [7]:
# 등분산성 검정 (levene test)
levene = stats.levene(nonsmokers, smokers)
print(levene)

LeveneResult(statistic=1.945945945945946, pvalue=0.18000074963498305)


In [9]:
# 등분산성 검정 (bartlett test)
bartlett = stats.bartlett(nonsmokers, smokers)
print(bartlett)

BartlettResult(statistic=0.8904487989220573, pvalue=0.3453557166446204)


In [10]:
from __future__ import print_function

import numpy as np
from scipy.stats import ttest_ind, ttest_ind_from_stats
from scipy.special import stdtr

np.random.seed(1)

# Create sample data.
a = np.random.randn(40)
b = 4*np.random.randn(50)

# Use scipy.stats.ttest_ind.
t, p = ttest_ind(a, b, equal_var=False)
print("ttest_ind:            t = %g  p = %g" % (t, p))

# Compute the descriptive statistics of a and b.
abar = a.mean()
avar = a.var(ddof=1)
na = a.size
adof = na - 1

bbar = b.mean()
bvar = b.var(ddof=1)
nb = b.size
bdof = nb - 1

# Use scipy.stats.ttest_ind_from_stats.
t2, p2 = ttest_ind_from_stats(abar, np.sqrt(avar), na,
                              bbar, np.sqrt(bvar), nb,
                              equal_var=False)
print("ttest_ind_from_stats: t = %g  p = %g" % (t2, p2))

# Use the formulas directly.
tf = (abar - bbar) / np.sqrt(avar/na + bvar/nb)
dof = (avar/na + bvar/nb)**2 / (avar**2/(na**2*adof) + bvar**2/(nb**2*bdof))
pf = 2*stdtr(dof, -np.abs(tf))

print("formula:              t = %g  p = %g" % (tf, pf))

ttest_ind:            t = -1.5827  p = 0.118873
ttest_ind_from_stats: t = -1.5827  p = 0.118873
formula:              t = -1.5827  p = 0.118873


In [30]:
np.random.seed(1)

beforeWeights = [60 + np.random.normal(0, 5) for _ in range(20)]
afterWeights = [w * np.random.normal(0.99, 0.02) for w in beforeWeights]

tTestResult = stats.ttest_rel(beforeWeights, afterWeights)
tTestResult

Ttest_relResult(statistic=2.9154993563693186, pvalue=0.008871163766572827)

In [31]:
beforeWeights

[68.1217268183162,
 56.94121793174962,
 57.359141238682724,
 54.63515688921915,
 64.3270381466234,
 48.492306515598585,
 68.7240588210824,
 56.19396549552449,
 61.59519548028549,
 58.75314812261295,
 67.31053968522487,
 49.69929645251173,
 58.38791397993246,
 58.07972822665792,
 65.66884721167719,
 54.500543663429845,
 59.13785896224782,
 55.61070791039314,
 60.21106873357797,
 62.914076068579114]

In [32]:
afterWeights

[65.94098797171107,
 57.67544499710449,
 57.819839215935254,
 54.63788246116392,
 64.84275566541605,
 47.34427263203588,
 67.86790793112998,
 54.58033393455883,
 60.64923115285458,
 58.78881770728494,
 65.70631311881824,
 48.80793606499072,
 57.001583230054315,
 56.517144665313666,
 64.13055954741337,
 53.94173367626836,
 57.224973536332286,
 55.315321289307555,
 61.60772730564428,
 63.218635763189475]