# Dependencies

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import skewnorm

# Shapiro-Wilk Test Function

In [2]:
def shapiro_wilk_test(data, alpha = 0.5, print_result=False):
    
    """
    This function conducts a Shapiro-Wilk Test for normality on a data series.

    The Shapiro-Wilk test, proposed in 1965, calculates a W statistic that tests whether a 
    random sample, x1,x2,…,xn comes from (specifically) a normal distribution . Small values 
    of W are evidence of departure from normality.

    Source: US Department of Commerce, National Institute of Standards and Technology (NIST) 
    https://www.itl.nist.gov/div898/handbook/prc/section2/prc213.htm

    Parameters: 
        
        data: array of sample data
        alpha: test threshold
        print_result: selected True if printed results are desired
        
    Returns: string with the test results
    """

    # library file
    from scipy.stats import shapiro

    # normality test
    w_statistic, p_value = shapiro(data)

    # interpret
    if p_value > alpha:

        result = ' > alpha = {:0.5f}'.format(alpha)
        conclusion = "Gaussian (Don't reject H{})".format('\u2092') 

    else:
        result = ' < alpha = {:0.5f}'.format(alpha)
        conclusion = "Not Gaussian (Reject H{})".format('\u2092') 

    result_string = 'W statistic = {:0.5f}\np = {:0.5f}{}\n{}'.format(w_statistic, p_value, result, conclusion)
    
    if print_result == True:
        print(result_string)
    
    return result_string

# Test on normally distributed data

In [3]:
numValues=10000
maxValue=100
skewness=0   #Negative values are left skewed, positive values are right skewed.

data = skewnorm.rvs(a = skewness,loc=maxValue, size=numValues)  #Skewnorm function

print(shapiro_wilk_test(data))

W statistic = 0.99989
p = 0.95784 > alpha = 0.50000
Gaussian (Don't reject Hₒ)




# Test on not normally distributed data

In [4]:
numValues=10000
maxValue=100
skewness=5   #Negative values are left skewed, positive values are right skewed.

data = skewnorm.rvs(a = skewness,loc=maxValue, size=numValues)  #Skewnorm function

print(shapiro_wilk_test(data))

W statistic = 0.95529
p = 0.00000 < alpha = 0.50000
Not Gaussian (Reject Hₒ)
