In [1]:
import numpy as np
import pandas as pd
import scipy.stats as st
import random
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

### Normality Test

#### 1. Shapiro

$H_0 = \text{Normal Dist}$

$H_a = \text{Non-Normal Dist}$

In [2]:
data_norm = st.norm(0, 1).rvs(100)
data_uni = st.uniform(-1, 1).rvs(100)

In [3]:
stat, p = st.shapiro(data_norm)

print('stat=%.3f, p=%.3f' % (stat, p))

stat=0.992, p=0.784


In [4]:
stat, p = st.shapiro(data_uni)

print('stat=%.3f, p=%.3f' % (stat, p))

stat=0.954, p=0.002


#### 2. D’Agostino’s K^2

$H_0 = \text{Normal Dist}$

$H_a = \text{Non-Normal Dist}$

In [5]:
stat, p = st.normaltest(data_norm)

print('stat=%.3f, p=%.3f' % (stat, p))

stat=0.481, p=0.786


In [6]:
stat, p = st.normaltest(data_uni)

print('stat=%.3f, p=%.3f' % (stat, p))

stat=25.540, p=0.000


#### 3. Anderson-Darling

$H_0 = \text{Normal Dist}$

$H_a = \text{Non-Normal Dist}$

In [8]:
result = st.anderson(data_norm)

print('stat=%.3f' % (result.statistic))

for i in range(len(result.critical_values)):
    sl, cv = result.significance_level[i], result.critical_values[i]
    if result.statistic < cv:
        print('Normal at the %.1f%% level' % (100 - sl))
    else:
        print('Non-Normal at the %.1f%% level' % (100 - sl))

stat=0.172
Normal at the 85.0% level
Normal at the 90.0% level
Normal at the 95.0% level
Normal at the 97.5% level
Normal at the 99.0% level


In [9]:
result = st.anderson(data_uni)

print('stat=%.3f' % (result.statistic))

for i in range(len(result.critical_values)):
    sl, cv = result.significance_level[i], result.critical_values[i]
    if result.statistic < cv:
        print('Normal at the %.1f%% level' % (100 - sl))
    else:
        print('Non-Normal at the %.1f%% level' % (100 - sl))

stat=1.068
Non-Normal at the 85.0% level
Non-Normal at the 90.0% level
Non-Normal at the 95.0% level
Non-Normal at the 97.5% level
Non-Normal at the 99.0% level


### Correlation

#### 1. Pearson’s Correlation Coefficient

$H_0 = \text{Independent Samples}$

$H_a = \text{Dependent Samples}$

In [21]:
sample_1 = np.random.rand(100)
sample_2 = np.random.rand(100)

age_in_month = np.array([131, 32, 121, 1241, 124, 124, 134, 45, 656, 457, 74, 34, 65])
age_in_years = age_in_month / 12

In [22]:
stat, p = st.pearsonr(sample_1, sample_2)

print('stat=%.3f, p=%.3f' % (stat, p))

stat=0.046, p=0.647


In [23]:
stat, p = st.pearsonr(age_in_month, age_in_years)

print('stat=%.3f, p=%.3f' % (stat, p))

stat=1.000, p=0.000


#### 2. Spearman’s Rank Correlation

$H_0 = \text{Independent Samples}$

$H_a = \text{Dependent Samples}$

In [24]:
stat, p = st.spearmanr(sample_1, sample_2)

print('stat=%.3f, p=%.3f' % (stat, p))

stat=0.034, p=0.735


In [25]:
stat, p = st.spearmanr(age_in_month, age_in_years)

print('stat=%.3f, p=%.3f' % (stat, p))

stat=1.000, p=0.000


#### 3. Kendall’s Rank Correlation

$H_0 = \text{Independent Samples}$

$H_a = \text{Dependent Samples}$

In [26]:
stat, p = st.kendalltau(sample_1, sample_2)

print('stat=%.3f, p=%.3f' % (stat, p))

stat=0.023, p=0.734


In [27]:
stat, p = st.kendalltau(age_in_month, age_in_years)

print('stat=%.3f, p=%.3f' % (stat, p))

stat=1.000, p=0.000


### Parametric

#### 1. Student’s t-test


$H_0 = \text{Equality of Mean of Samples}$

$H_a = \text{Non-Equality of Mean of Samples}$

In [33]:
def t_test(x, y, alternative='both-sided'):
    _, double_p = st.ttest_ind(x, y, equal_var = False)
    if alternative == 'both-sided':
        pval = double_p
    elif alternative == 'greater':
        if np.mean(x) > np.mean(y):
            pval = double_p / 2.
        else:
            pval = 1.0 - double_p / 2.
    elif alternative == 'less':
        if np.mean(x) < np.mean(y):
            pval = double_p / 2.
        else:
            pval = 1.0 - double_p / 2.
    return pval

In [42]:
p = t_test(sample_1, sample_2, alternative='less')

print('p=%.3f' % (p))

p=0.375


In [40]:
#Paired
stat, p = st.ttest_rel(sample_1, sample_2)

print('stat=%.3f, p=%.3f' % (stat, p))

stat=-0.334, p=0.746


#### 2. ANOVA


$H_0 = \text{Equality of Mean of Samples}$

$H_a = \text{Non-Equality of Mean of Samples}$

In [60]:
sample_1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
sample_2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]

In [41]:
stat, p = st.f_oneway(sample_1, sample_2)

print('stat=%.3f, p=%.3f' % (stat, p))

stat=0.106, p=0.748


### Non-Parametric

- Mann-Whitney U Test
- Wilcoxon Signed-Rank Test
- Kruskal-Wallis H Test
- Friedman Test


### Distribution Test
- Kolmogorov Smirnov

In [92]:
data_norm = st.norm(0, 1).rvs(200)
data_uni = st.uniform(0, 1).rvs(200)
data_poisson = st.poisson(13).rvs(200)

In [124]:
# Normal Test
st.kstest(st.norm(0, 1).rvs(1000), data_norm)

KstestResult(statistic=0.066, pvalue=0.44977339495083846)

In [125]:
# Binomial Test
st.kstest(st.binom(n=100, p=0.5).rvs(1000), data_norm)

KstestResult(statistic=1.0, pvalue=3.3306690738754696e-16)

In [126]:
# Uniform Test
st.kstest(st.uniform(0, 1).rvs(1000), data_uni)

KstestResult(statistic=0.082, pvalue=0.20486733953842318)

In [127]:
# Uniform Test
st.kstest(st.uniform(-1, 1).rvs(1000), data_uni)

KstestResult(statistic=1.0, pvalue=3.3306690738754696e-16)

In [128]:
# Poisson Test
st.kstest(st.poisson(11).rvs(1000), data_poisson)

KstestResult(statistic=0.229, pvalue=3.9603319401670944e-08)

In [129]:
# Poisson Test
st.kstest(st.poisson(12).rvs(1000), data_poisson)

KstestResult(statistic=0.127, pvalue=0.008662065379592354)

In [130]:
# Poisson Test
st.kstest(st.poisson(13).rvs(1000), data_poisson)

KstestResult(statistic=0.059, pvalue=0.5938458985569726)

### Placebo Example

In [131]:
group_1 = [50, 40, 37, 60, 78, 90, 67, 45]
group_2 = [65, 55, 65, 56, 70, 100, 80, 60]

In [135]:
t_test(group_2, group_1)

0.2392636991410787

In [136]:
group_1 = [50, 40, 37, 60, 78, 90, 67, 45]
group_2 = [65, 55, 65, 56, 70, 100, 80, 60]
group_3 = [70, 0, 65, 56, 70, 100, 90, 60]

stat, p = st.f_oneway(group_1, group_2, group_3)

print('stat=%.3f, p=%.3f' % (stat, p))

stat=0.450, p=0.643
