In [1]:
import pandas as pd
import numpy as np
import seaborn as sb


In [2]:
data = sb.load_dataset('tips')
data.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


size is no of people in a party

In [3]:
data.describe(include="all")

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
count,244.0,244.0,244,244,244,244,244.0
unique,,,2,2,4,2,
top,,,Male,No,Sat,Dinner,
freq,,,157,151,87,176,
mean,19.785943,2.998279,,,,,2.569672
std,8.902412,1.383638,,,,,0.9511
min,3.07,1.0,,,,,1.0
25%,13.3475,2.0,,,,,2.0
50%,17.795,2.9,,,,,2.0
75%,24.1275,3.5625,,,,,3.0


In [40]:
data.nunique()

total_bill    229
tip           123
sex             2
smoker          2
day             4
time            2
size            6
dtype: int64

### Quantiles

In [4]:
print("Q1 quantile of total_bill : ", np.quantile(data['total_bill'], .25)) 
print("Q2 quantile of total_bill : ", np.quantile(data['total_bill'], .50)) 
print("Q3 quantile of total_bill : ", np.quantile(data['total_bill'], .75)) 
print("100th quantile of total_bill : ", np.quantile(data['total_bill'], 1))  

Q1 quantile of total_bill :  13.3475
Q2 quantile of total_bill :  17.795
Q3 quantile of total_bill :  24.127499999999998
100th quantile of total_bill :  50.81


In [5]:
print("25th percentile of total_bill : ", np.percentile(data['total_bill'], 25))
print("50th percentile of total_bill : ",np.percentile(data['total_bill'], 50))
print("75th percentile of total_bill : ",np.percentile(data['total_bill'], 75))
print("75th percentile of total_bill : ",np.percentile(data['total_bill'], 100))

25th percentile of total_bill :  13.3475
50th percentile of total_bill :  17.795
75th percentile of total_bill :  24.127499999999998
75th percentile of total_bill :  50.81


### Hypothesis testing

![title](flowchart-for-choosing-a-statistical-test.png)

Tutorial Overview
This tutorial is divided into 5 parts; they are:

1. Normality Tests
   *  Shapiro-Wilk Test
   *  D’Agostino’s K^2 Test
   *  Anderson-Darling Test

2. Variance Equaltiy Tests
   *  Bartlette's Test
   *  Levene's Test
   *  F- Test

3. Correlation Tests
   *  Pearson’s Correlation Coefficient
   *  Spearman’s Rank Correlation
   *  Kendall’s Rank Correlation
   *  Chi-Squared Test
   
4. Stationary Tests
   *  Augmented Dickey-Fuller
   *  Kwiatkowski-Phillips-Schmidt-Shin
   
5. Parametric Statistical Hypothesis Tests
   *  Student’s t-test
   *  Paired Student’s t-test
   *  Analysis of Variance Test (ANOVA)
   *  Repeated Measures ANOVA Test
   
6. Nonparametric Statistical Hypothesis Tests
   *  Mann-Whitney U Test
   *  Wilcoxon Signed-Rank Test
   *  Kruskal-Wallis H Test
   *  Friedman Test

#### P-values are often interpreted as your risk of rejecting the null hypothesis of your test when the null hypothesis is actually true.

1. Risk of rejecting null hypothesis
2. Chance of Accepting Null hypothesis

#### Generally threshold is 0.05 i.e. 5%
#### If p value < 0.05 ;  we reject null hypothesis

### Normality Tests

Tests whether a data sample has a Gaussian distribution.

1. Assumptions

* Observations in each sample are independent and identically distributed (iid).
2. Interpretation

* H0: the sample has a Gaussian distribution.
* H1: the sample does not have a Gaussian distribution.

#### Shapiro Wilk test

In [6]:

# Example of the Shapiro-Wilk Normality Test
from scipy.stats import shapiro

stat, p = shapiro(data['total_bill'])
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably Gaussian')
else:
    print('Probably not Gaussian')

stat=0.920, p=0.000
Probably not Gaussian


#### D’Agostino’s K^2 Test

In [7]:
# Example of the K^2 Normality Test
from scipy.stats import normaltest

stat, p = normaltest(data['total_bill'])
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably Gaussian')
else:
    print('Probably not Gaussian')

stat=45.118, p=0.000
Probably not Gaussian


#### Anderson-Darling Test

In [8]:
# Example of the Anderson Normality Test
from scipy.stats import anderson

result = anderson(data['total_bill'])
print('stat=%.3f' % (result.statistic))
for i in range(len(result.critical_values)):
    sl, cv = result.significance_level[i], result.critical_values[i]

    if result.statistic < cv:
        print('Probably Gaussian at the %.1f%% level' % (sl))
    else:
        print('Probably not Gaussian at the %.1f%% level' % (sl))
        


stat=5.521
Probably not Gaussian at the 15.0% level
Probably not Gaussian at the 10.0% level
Probably not Gaussian at the 5.0% level
Probably not Gaussian at the 2.5% level
Probably not Gaussian at the 1.0% level


### Variance Tests

####  Bartlett’s test for equal variances.

* Bartlett’s test of homogeneity of variances is a test, much like Levene’s test, that measures whether the variances are equal for all samples.
* If your data is normally distributed you can use Bartlett’s test

In [53]:
# Example of the Barlett Variance test
from scipy.stats import bartlett

data2=[[2,25,14,8,7],[41,23,10,1,11],[25,13,3,23]]

stat, p = bartlett(*data2)
print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
    print('Probably equal variance')
else:
    print('Probably NOT equal variance')




stat=1.231, p=0.540
Probably equal variance


In [55]:
# Example of the Barlett Variance  test
from scipy.stats import bartlett

data2=[[2,25,14,8,7],[41,23,10,1,11],[325,13,3,23]]

stat, p = bartlett(*data2)
print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
    print('Probably equal variance')
else:
    print('Probably NOT equal variance')

stat=24.393, p=0.000
Probably NOT equal variance


In [69]:
# Example of the Barlett Variance  test
from scipy.stats import bartlett

a = [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
b = [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05]
c = [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98]
stat, p = bartlett(a, b, c)

print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
    print('Probably equal variance')
else:
    print('Probably NOT equal variance')

stat=22.789, p=0.000
Probably NOT equal variance


In [70]:
[np.var(x, ddof=1) for x in [a, b, c]]

[0.007054444444444413, 0.13073888888888888, 0.008890000000000002]

#### Levenes Test

* Levene’s test can be carried out to check that variances are equal for all samples. 
* The test can be used to check the assumption of equal variances before running a parametric test like One-Way ANOVA in Python. 
* If your data is not following a normal distribution Levene’s test is preferred before Barlett’s.

In [56]:
# Example of the Levene's Variance test
from scipy.stats import levene

data2=[[2,25,14,8,7],[41,23,10,1,11],[25,13,3,23]]

stat, p = levene(*data2)
print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
    print('Probably equal variance')
else:
    print('Probably NOT equal variance')


stat=0.351, p=0.712
Probably equal variance


In [72]:
# Example of the Levene Variance  test
from scipy.stats import levene

a = [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
b = [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05]
c = [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98]
stat, p = levene(a, b, c)



print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
    print('Probably equal variance')
else:
    print('Probably NOT equal variance')

stat=7.585, p=0.002
Probably NOT equal variance


### F-Test
* The F-test is typically used to answer one of the following questions:

1. Do two samples come from populations with equal variances?

2. Does a new treatment or process reduce the variability of some current treatment or process?

In [96]:
#define F-test function
def f_test(x, y):
    import scipy.stats as st
    x = np.array(x)
    y = np.array(y)
    f = np.var(x, ddof=1)/np.var(y, ddof=1) #calculate F test statistic 
    dfn = x.size-1 #define degrees of freedom numerator 
    dfd = y.size-1 #define degrees of freedom denominator 
    p = 1 - st.f.cdf(f, dfn, dfd) #find p-value of F test statistic
    print('F-stat = %.3f, p = %.3f' % (f, p))
    if p > 0.05:
        print('Probably equal variance')
    else:
        print('Probably NOT equal variance')
   # return f, p



In [101]:
x = [18, 19, 22, 25, 27, 28, 41, 45, 51, 55]
y = [14, 15, 15, 17, 18, 22, 25, 25, 27, 34]

#perform F-test
f_test(x , y)

F-stat = 4.387, p = 0.019
Probably NOT equal variance


### Correlation tests

1. Assumptions

*  Observations in each sample are independent and identically distributed (iid).
*  Observations in each sample are normally distributed.
*  Observations in each sample have the same variance.

2. Interpretation

*  H0: the two samples are independent.
*  H1: there is a dependency between the samples.

#### Pearsons correlation coeff

In [9]:

# Example of the Pearson's Correlation test
from scipy.stats import pearsonr

stat, p = pearsonr(data['total_bill'], data['tip'])
print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
    print('Probably independent')
else:
    print('Probably dependent')

stat=0.676, p=0.000
Probably dependent


In [10]:
# Example of the Pearson's Correlation test
from scipy.stats import pearsonr

stat, p = pearsonr(data['total_bill'], data['size'])
print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
    print('Probably independent')
else:
    print('Probably dependent')

stat=0.598, p=0.000
Probably dependent


#### Spearman’s Rank Correlation

1. Assumptions

*  Observations in each sample are independent and identically distributed (iid).
*  Observations in each sample can be ranked.


2. Interpretation

*  H0: the two samples are independent.
*  H1: there is a dependency between the samples.



In [11]:

# Example of the Spearman's Rank Correlation Test
from scipy.stats import spearmanr

stat, p = spearmanr(data['total_bill'], data.index)
print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
    print('Probably independent')
else:
    print('Probably dependent')

stat=-0.003, p=0.965
Probably independent


#### Kendall’s Rank Correlation

In [12]:
# Example of the Kendalls Rank Correlation Test
from scipy.stats import kendalltau

stat, p = kendalltau(data['total_bill'], data['size'])
print('stat=%.3f, p=%.3f' % (stat, p))

if p > 0.05:
    print('Probably independent')
else:
    print('Probably dependent')

stat=0.484, p=0.000
Probably dependent


#### Chi-squared test

1. Assumptions

*  Observations in each sample are independent and identically distributed (iid).
*  25 or more examples in each cell of the contingency table.


2. Interpretation

*  H0: the two samples are independent.
*  H1: there is a dependency between the samples.



In [13]:

# Example of the Chi-Squared Test
from scipy.stats import chi2_contingency

# table = [[10, 20, 30],[20,  40,  60]]

table = pd.crosstab(data['sex'] , data['smoker'])
print(table)

stat, p, dof, expected = chi2_contingency(table)
print("\ndegree of freedom : = " ,dof ,"\n\n exected values :- \n" , expected)

print('\nstat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably independent')
else:
    print('Probably dependent')

smoker  Yes  No
sex            
Male     60  97
Female   33  54

degree of freedom : =  1 

 exected values :- 
 [[59.84016393 97.15983607]
 [33.15983607 53.84016393]]

stat=0.009, p=0.925
Probably independent


## Stationary Tests
*  This section lists statistical tests that you can use to check if a time series is stationary or not.



#### Augmented Dickey-Fuller Unit Root Test

* Tests whether a time series has a unit root, e.g. has a trend or more generally is autoregressive.

1. Assumptions

* Observations in are temporally ordered.
2. Interpretation

* H0: a unit root is present (series is non-stationary).
* H1: a unit root is not present (series is stationary).

In [14]:

# Example of the Augmented Dickey-Fuller unit root test
from statsmodels.tsa.stattools import adfuller

timeseries = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
stat, p, lags, obs, crit, t = adfuller(timeseries)

print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably not Stationary')
else:
    print('Probably Stationary')

stat=0.992, p=0.994
Probably not Stationary


#### Kwiatkowski-Phillips-Schmidt-Shin

In [15]:

# Example of the Kwiatkowski-Phillips-Schmidt-Shin test
from statsmodels.tsa.stattools import kpss
timeseries = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

stat, p, lags, crit = kpss(timeseries)

print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably not Stationary')
else:
    print('Probably Stationary')

stat=0.410, p=0.073
Probably not Stationary




### Parametric Statistical Hypothesis Tests

*  This section lists statistical tests that you can use to compare data samples.

### Student’s t-test

* Tests whether the means of two independent samples are significantly different.

1. Assumptions

    * Observations in each sample are independent and identically distributed (iid).
    * Observations in each sample are normally distributed.
    * Observations in each sample have the same variance.
2. Interpretation

    * H0: the means of the samples are equal.
    * H1: the means of the samples are unequal.

In [16]:
# Example of the Student's t-test

from scipy.stats import ttest_ind


stat, p = ttest_ind(data['total_bill'], data['tip'])

print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably the same distribution')
else:
    print('Probably different distributions')

stat=29.107, p=0.000
Probably different distributions


In [20]:
# Example of one sample test

from scipy.stats import ttest_1samp

tstat, p = ttest_1samp(data['total_bill'], popmean=np.mean(data['tip']))

print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably the same distribution')
else:
    print('Probably different distributions')

stat=29.107, p=0.000
Probably different distributions


###  Paired Student’s t-test

* Tests whether the means of two paired samples are significantly different.

1. Assumptions

    * Observations in each sample are independent and identically distributed (iid).
    * Observations in each sample are normally distributed. (shapiro)
    * Observations in each sample have the same variance.(F test)
    * Observations across each sample are paired. (sample having two values e.g. Before & After)
    
2. Interpretation

    * H0: the means of the samples are equal.
    * H1: the means of the samples are unequal.

In [38]:
# Example of the Paired Student's t-test
from scipy.stats import ttest_rel

before = np.array([0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869])
after = before*10 + 0.145

stat, p = ttest_rel(before, after)
# stat, p = ttest_rel(data['total_bill'], data['tip'])

print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably the same distribution')
else:
    print('Probably different distributions')

stat=0.670, p=0.520
Probably the same distribution


### Analysis of Variance Test (ANOVA)

* Tests whether the means of two or more independent samples are significantly different.


1. Assumptions

    * Observations in each sample are independent and identically distributed (iid).
    * Observations in each sample are normally distributed.
    * Observations in each sample have the same variance.
2. Interpretation

    * H0: the means of the samples are equal.
    * H1: the means of the samples are unequal.

In [42]:
# Example of the Analysis of Variance Test

from scipy.stats import f_oneway

stat, p = f_oneway(data['total_bill'] , data['tip'] , data['size'])
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably the same distribution')
else:
    print('Probably different distributions')

stat=859.805, p=0.000
Probably different distributions


### Nonparametric Statistical Hypothesis Tests

#### Mann-Whitney U Test

* Tests whether the distributions of two independent samples are equal or not.

1. Assumptions

    * Observations in each sample are independent and identically distributed (iid).
    * Observations in each sample can be ranked.
2. Interpretation

    * H0: the distributions of both samples are equal.
    * H1: the distributions of both samples are not equal.

In [44]:
# Example of the Mann-Whitney U Test

from scipy.stats import mannwhitneyu
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]

stat, p = mannwhitneyu(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably the same distribution')
else:
    print('Probably different distributions')

stat=40.000, p=0.236
Probably the same distribution


#### Wilcoxon Signed-Rank Test

* Tests whether the means of two paired samples are significantly different.

1. Assumptions

    * Observations in each sample are independent and identically distributed (iid).
    * Observations in each sample can be ranked.)
    * Observations across each sample are paired. (sample having two values e.g. Before & After)
    
2. Interpretation

    * H0: the distributions of both samples are equal.
    * H1: the distributions of both samples are not equal.

In [45]:
# Example of the Wilcoxon Signed-Rank Test
from scipy.stats import wilcoxon
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = wilcoxon(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably the same distribution')
else:
    print('Probably different distributions')


stat=21.000, p=0.557
Probably the same distribution


### Kruskal-Wallis H Test

* Tests whether the distributions of two or more independent samples are equal or not.

1. Assumptions

    * Observations in each sample are independent and identically distributed (iid).
    * Observations in each sample can be ranked.
2. Interpretation

    * H0: the distributions of all samples are equal.
    * H1: the distributions of one or more samples are not equal.

In [46]:
# Example of the Kruskal-Wallis H Test
from scipy.stats import kruskal

data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = kruskal(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably the same distribution')
else:
    print('Probably different distributions')

stat=0.571, p=0.450
Probably the same distribution


#### Friedman Test

* Tests whether the means of two or more paired samples are significantly different.

1. Assumptions

    * Observations in each sample are independent and identically distributed (iid).
    * Observations in each sample can be ranked.)
    * Observations across each sample are paired. (sample having two values e.g. Before & After)
    
2. Interpretation

    * H0: the distributions of all samples are equal.
    * H1: the distributions of one or more samples are not equal.

In [48]:
# Example of the Friedman Test
from scipy.stats import friedmanchisquare

data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
data3 = [-0.208, 0.696, 0.928, -1.148, -0.213, 0.229, 0.137, 0.269, -0.870, -1.204]

stat, p = friedmanchisquare(data1, data2, data3)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print('Probably the same distribution')
else:
    print('Probably different distributions')

stat=0.800, p=0.670
Probably the same distribution
