# Shapiro-Wilk test

In [1]:
from scipy import stats

data = [0.876,2.187,1.009,-2.44,-1.675,-1.980,-0.022,1.987]
stat, p = stats.shapiro(data)
print('stat=%.3f, p=%.3f'%(stat,p))

if p > 0.05:
    print('Normal distribution')
else:
    print('Not normal distribution')

stat=0.902, p=0.303
Normal distribution


# D' Agostino's k^2 test

In [2]:
from scipy import stats

data = [0.876,2.187,1.009,-2.44,-1.675,-1.980,-0.022,1.987]
stat, p = stats.normaltest(data)
print('stat=%.3f, p=%.3f'%(stat,p))

if p > 0.05:
    print('Normal distribution')
else:
    print('Not normal distribution')

stat=2.325, p=0.313
Normal distribution




# Anderson - Darling test

In [8]:
from scipy import stats

data = [1.876,-2.345,-0.0987,2.123,1.324,1.657,2.546]
result = stats.anderson(data)
print('stat=%.3f' %(result.statistic))

for i in range(len(result.critical_values)):
    s , c = result.significance_level[i], result.critical_values[i]
    if result.statistic < c:
        print('Normal at %.1f%% level' % (s))
    else:
        print('Not normal at %.1f%% level' %(s))

stat=0.579
Not normal at 15.0% level
Normal at 10.0% level
Normal at 5.0% level
Normal at 2.5% level
Normal at 1.0% level


# Pearson’s Correlation Coefficient

In [10]:
from scipy import stats

data1 = [0.234,0.987,0.876,-0.234,-1.987,-2.970,1.098,2.976]
data2 = [0.0256,-1.445,-2.997,1.902,0.054,0.198,-2.657,1.983]

stat, p = stats.pearsonr(data1, data2)
print('stat = %.3f, p = %.3f' %(stat,p))

if p > 0.05:
    print('Independed i.e; two samples are independent')
else:
    print('Dependent i.e; two samples are dependent')

stat = -0.075, p = 0.860
Independed i.e; two samples are independent


# Spearman's Rank Correlation 

In [11]:
from scipy import stats

data1 = [0.234,0.987,0.876,-0.234,-1.987,-2.970,1.098,2.976]
data2 = [0.0256,-1.445,-2.997,1.902,0.054,0.198,-2.657,1.983]

stat, p = stats.spearmanr(data1, data2)
print('stat = %.3f, p = %.3f' %(stat,p))

if p > 0.05:
    print('Independed i.e; two samples are independent')
else:
    print('Dependent i.e; two samples are dependent')

stat = -0.190, p = 0.651
Independed i.e; two samples are independent


# Kendall's Rank Correlation

In [12]:
from scipy import stats

data1 = [0.234,0.987,0.876,-0.234,-1.987,-2.970,1.098,2.976]
data2 = [0.0256,-1.445,-2.997,1.902,0.054,0.198,-2.657,1.983]

stat, p = stats.kendalltau(data1, data2)
print('stat = %.3f, p = %.3f' %(stat,p))

if p > 0.05:
    print('Independed i.e; two samples are independent')
else:
    print('Dependent i.e; two samples are dependent')

stat = -0.214, p = 0.548
Independed i.e; two samples are independent


# Chi-Squared Test

Tests whether two categorical variables are related or independent

In [15]:
from scipy import stats

table = [[10,20,30],[6,9,17]]
stat, p, dof, expected = stats.chi2_contingency(table)
print('stat= %.3f, p= %.3f' %(stat,p))

if p > 0.05:
    print('Independent')
else:
    print('Dependent')

stat= 0.272, p= 0.873
Independent


In [16]:
from scipy import stats

table = [[10,60,90],[6,9,13]]
stat, p, dof, expected = stats.chi2_contingency(table)
print('stat= %.3f, p= %.3f' %(stat,p))

if p > 0.05:
    print('Independent')
else:
    print('Dependent')

stat= 7.057, p= 0.029
Dependent


# Augmented Dickey - Fuller Unit Root Test

Tests whether a time series has a unit root, for example has a trend or is autoregressive

In [18]:
from statsmodels.tsa.stattools import adfuller

data = [0,1,2,3,4,5,6,7,8,9]
stat, p, lags, obs, crit, t = adfuller(data)
print('stat = %.3f, p = %.3f' %(stat, p))

if p > 0.05:
    print('Not stationary i.e; unit root is present')
else:
    print('Stationary i.e; unit root is not present')

stat = 0.992, p = 0.994
Not stationary i.e; unit root is present


# Kwiatkowski-Phillips-Schmidt-Shin

In [19]:
from statsmodels.tsa.stattools import kpss

data = [0,1,2,3,4,5,6,7,8,9]
stat, p, lags, crit = kpss(data)
print('stat = %.3f, p = %.3f' %(stat, p))

if p > 0.05:
    print('Not stationary')
else:
    print('Stationary')

stat = 0.410, p = 0.073
Not stationary




# student's t-test

In [21]:
from scipy.stats import ttest_ind
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = ttest_ind(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Same distribution i.e; the means of the samples are equal')
else:
    print('Different distributions i.e; the means of the samples are unequal')

stat=-0.326, p=0.748
Same distribution i.e; the means of the samples are equal


# Paired Student's t-test

In [22]:
from scipy.stats import ttest_rel
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = ttest_rel(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('same distribution')
else:
    print('different distributions')

stat=-0.334, p=0.746
same distribution


# ANOVA

In [23]:
from scipy.stats import f_oneway
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
data3 = [-0.208, 0.696, 0.928, -1.148, -0.213, 0.229, 0.137, 0.269, -0.870, -1.204]
stat, p = f_oneway(data1, data2, data3)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('same distribution')
else:
    print('different distributions')

stat=0.096, p=0.908
same distribution


# Mann-Whitney U test

In [24]:
from scipy.stats import mannwhitneyu
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = mannwhitneyu(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('same distribution')
else:
    print('different distributions')

stat=40.000, p=0.236
same distribution


# Wilcoxon Signed-Rank test

In [25]:
from scipy.stats import wilcoxon
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = wilcoxon(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('same distribution')
else:
    print('different distributions')

stat=21.000, p=0.557
same distribution


# Kruskal-Wallis H Test

Tests whether the distribution of two or more independent samples are equal or not

In [26]:
from scipy.stats import kruskal
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = kruskal(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('same distribution')
else:
    print('different distributions')

stat=0.571, p=0.450
same distribution


# Friedman Test

Tests whether the distributions of two or more paired samples are equal or not

In [27]:
from scipy.stats import friedmanchisquare
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
data3 = [-0.208, 0.696, 0.928, -1.148, -0.213, 0.229, 0.137, 0.269, -0.870, -1.204]
stat, p = friedmanchisquare(data1, data2, data3)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('same distribution')
else:
    print('different distributions')

stat=0.800, p=0.670
same distribution
