### Gombosuren.A, MUST

<h1 class="list-group-item list-group-item-action active">Guide to Statistical Hypothesis Tests in Python</h1>


<img src = "https://d33wubrfki0l68.cloudfront.net/a5cb4bbe1b04d9099c6fc771724ea67ec087845b/cb16f/wp-content/uploads/2019/07/statistics-vs-machine-learning.png">

In [1]:
import warnings;
warnings.filterwarnings('ignore');

## Normality Tests

<h3 class="alert alert-info">Shapiro-Wilk Test</h3>

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.shapiro.html" class="btn btn-warning" role="button">Scipy Ref -></a>

In [None]:
# CREATE DATA;
data = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869];

In [5]:
from scipy.stats import shapiro;
stat, p = shapiro(data);
print('stat={0:.3f}, p={0:.3f}' .format(stat, p));

if p > 0.05:
    print('Probably Gaussian or Normal');
else:
    print('Probably not Gaussian or not Normal');

stat=0.895, p=0.895
Probably Gaussian or Normal


<h3 class="alert alert-info">D’Agostino’s K^2 Test</h3>

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.normaltest.html" class="btn btn-warning" role="button">Scipy Ref -></a>

In [6]:
from scipy.stats import normaltest;
stat, p = normaltest(data);
print('stat={0:.3f}, p={0:.3g}'.format(stat, p));
if p > 0.05:
    print('Probably Gaussian or Normal');
else:
    print('Probably not Gaussian or not Normal');

stat=3.392, p=3.39
Probably Gaussian or Normal


<h3 class="alert alert-info">Anderson-Darling Test</h3>

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson.html" class="btn btn-warning" role="button">Scipy Ref -></a>

In [7]:
from scipy.stats import anderson;
result = anderson(data);
print('stat={0:.3g}'.format(result.statistic));
for i in range(len(result.critical_values)):
    sl, cv = result.significance_level[i], result.critical_values[i];
    if result.statistic < cv:
        print('Probably Gaussian at the %.1f%% level' % (sl));
    else:
        print('Probably not Gaussian at the %.1f%% level' % (sl));

stat=0.424
Probably Gaussian at the 15.0% level
Probably Gaussian at the 10.0% level
Probably Gaussian at the 5.0% level
Probably Gaussian at the 2.5% level
Probably Gaussian at the 1.0% level


## Correlation 

<h3 class="alert alert-info">Pearson’s Correlation Coefficient</h3>

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.normaltest.html" class="btn btn-warning" role="button">Scipy Ref -></a>/a>

In [8]:
from scipy.stats import pearsonr;
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869];
data2 = [0.353, 3.517, 0.125, -7.545, -0.555, -1.536, 3.350, -1.578, -3.537, -1.579];
stat, p = pearsonr(data1, data2);
print('stat={0:.3f}, p={0:.3f}'.format(stat, p));
if p > 0.05:
    print('Probably independent');
else:
    print('Probably dependent');

stat=0.688, p=0.688
Probably dependent


<h3 class="alert alert-info">Spearman’s Rank Correlation</h3>

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html" class="btn btn-warning" role="button">Scipy Ref -></a>

In [9]:
from scipy.stats import spearmanr;
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869];
data2 = [0.353, 3.517, 0.125, -7.545, -0.555, -1.536, 3.350, -1.578, -3.537, -1.579];
stat, p = spearmanr(data1, data2);
print('stat={0:.3g}, p={0:.3f}'.format(stat, p));
if p > 0.05:
    print('Probably independent');
else:
    print('Probably dependent');

stat=0.855, p=0.855
Probably dependent


<h3 class="alert alert-info">Kendall’s Rank Correlation</h3>

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kendalltau.html" class="btn btn-warning" role="button">Scipy Ref -></a>

In [10]:
from scipy.stats import kendalltau;
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869];
data2 = [0.353, 3.517, 0.125, -7.545, -0.555, -1.536, 3.350, -1.578, -3.537, -1.579];
stat, p = kendalltau(data1, data2);
print('stat={0:.3f}, p={0:.3f}'.format(stat, p));
if p > 0.05:
    print('Probably independent');
else:
    print('Probably dependent');

stat=0.733, p=0.733
Probably dependent


<h3 class="alert alert-info">Chi-Squared Test</h3>

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chi2_contingency.html" class="btn btn-warning" role="button">Scipy Ref -></a>

In [11]:
from scipy.stats import chi2_contingency;
table = [[10, 20, 30],[6,  9,  17]];
stat, p, dof, expected = chi2_contingency(table);
print('stat={0:.3g}, p={0:.3f}' .format(stat, p));
if p > 0.05:
    print('Probably independent');
else:
    print('Probably dependent');

stat=0.272, p=0.272
Probably independent


##Stationary Tests

<h3 class="alert alert-info">Augmented Dickey-Fuller Unit Root Test</h3>

<a href="https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html" class="btn btn-warning" role="button">Stats-Model Ref -></a>

In [12]:
from statsmodels.tsa.stattools import adfuller;
data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
stat, p, lags, obs, crit, t = adfuller(data);
print('stat={0:.3f}, p={0:.3f}'.format(stat, p));
if p > 0.05:
    print('Probably not Stationary');
else:
    print('Probably Stationary');

stat=0.175, p=0.175
Probably not Stationary


<h3 class="alert alert-info">Kwiatkowski-Phillips-Schmidt-Shin</h3>

<a href="https://www.statsmodels.org/stable/generated/statsmodels.tsa.stattools.kpss.html#statsmodels.tsa.stattools.kpss" class="btn btn-warning" role="button">Stats-Model Ref -></a>

In [13]:
from statsmodels.tsa.stattools import kpss;
data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
stat, p, lags, crit = kpss(data);
print('stat={0:.3g}, p={0:.3g}'.format(stat, p));
if p > 0.05:
    print('Probably not Stationary');
else:
    print('Probably Stationary');

stat=0.41, p=0.41
Probably not Stationary


## Parametric Statistical Hypothesis Tests

<h3 class="alert alert-info">Student’s t-test</h3>

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html" class="btn btn-warning" role="button">Scipy Ref -></a>

In [14]:
from scipy.stats import ttest_ind;
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869];
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169];
stat, p = ttest_ind(data1, data2);
print('stat={0:.3f}, p={0:.3f}'.format(stat, p));
if p > 0.05:
    print('Probably the same distribution');
else:
    print('Probably different distributions');

stat=-0.326, p=-0.326
Probably the same distribution


<h3 class="alert alert-info">Paired Student’s t-test</h3>

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_rel.html" class="btn btn-warning" role="button">Scipy Ref -></a>

In [15]:
from scipy.stats import ttest_rel;
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869];
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169];
stat, p = ttest_rel(data1, data2);
print('stat={0:.3f}, p={0:.3f}'.format(stat, p));
if p > 0.05:
    print('Probably the same distribution');
else:
    print('Probably different distributions');

stat=-0.334, p=-0.334
Probably the same distribution


<h3 class="alert alert-info">Analysis of Variance Test (ANOVA)</h3>

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.f_oneway.html" class="btn btn-warning" role="button">Scipy Ref -></a>

In [16]:
from scipy.stats import f_oneway;
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869];
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169];
data3 = [-0.208, 0.696, 0.928, -1.148, -0.213, 0.229, 0.137, 0.269, -0.870, -1.204];
stat, p = f_oneway(data1, data2, data3);
print('stat={0:.3g}, p={0:.3g}'.format(stat, p));
if p > 0.05:
    print('Probably the same distribution');
else:
    print('Probably different distributions');

stat=0.0964, p=0.0964
Probably the same distribution


## Nonparametric Statistical Hypothesis Tests

<h3 class="alert alert-info">Mann-Whitney U Test</h3>

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mannwhitneyu.html" class="btn btn-warning" role="button">Scipy Ref -></a>



In [17]:
from scipy.stats import mannwhitneyu;
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869];
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169];
stat, p = mannwhitneyu(data1, data2);
print('stat={0:.3g}, p={0:.3g}'.format(stat, p));
if p > 0.05:
    print('Probably the same distribution');
else:
    print('Probably different distributions');

stat=40, p=40
Probably the same distribution


<h3 class="alert alert-info">Wilcoxon Signed-Rank Test</h3>

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html" class="btn btn-warning" role="button">Scipy Ref -></a>

In [18]:
from scipy.stats import wilcoxon;
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869];
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169];
stat, p = wilcoxon(data1, data2);
print('stat={0:.3g}, p={0:.3g}' .format (stat, p));
if p > 0.05:
    print('Probably the same distribution');
else:
    print('Probably different distributions');

stat=21, p=21
Probably the same distribution


<h3 class="alert alert-info">Kruskal-Wallis H Test</h3>


<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kruskal.html" class="btn btn-warning" role="button">Scipy Ref -></a>

In [19]:
from scipy.stats import kruskal;
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869];
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169];
stat, p = kruskal(data1, data2);
print('stat={0:.3g}, p={0:.3g}'.format(stat, p));
if p > 0.05:
    print('Probably the same distribution');
else:
    print('Probably different distributions');

stat=0.571, p=0.571
Probably the same distribution


<h3 class="alert alert-info">Friedman Test</h3>

<a href="https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.friedmanchisquare.html" class="btn btn-warning" role="button">Scipy Ref -></a>


In [20]:
from scipy.stats import friedmanchisquare;
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869];
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169];
data3 = [-0.208, 0.696, 0.928, -1.148, -0.213, 0.229, 0.137, 0.269, -0.870, -1.204];
stat, p = friedmanchisquare(data1, data2, data3);
print('stat={0:.3g}, p={0:.3f}'.format(stat, p));
if p > 0.05:
    print('Probably the same distribution');
else:
    print('Probably different distributions');

stat=0.8, p=0.800
Probably the same distribution


### TY;