# Tests for normality

###  Initial informations

##https://docs.scipy.org/doc/scipy/reference/generated/

###   Import the library to analyse

In [1]:
import scipy.stats as stats
import pandas as pd

# Test Shapiro-wilk (sw)

In [None]:
#It is recommended to use the tests: Shapiro-Wilk, Shapiro-Francia, and the Rian-Joiner for small samples (4<= n <=30). 
#I will demonstrate the Shapiro-Wilk test (n=30).

In [14]:
sw = pd.read_csv('spreadsheetone.csv')

In [15]:
sw.head()

Unnamed: 0,A_30
0,24
1,74
2,62
3,91
4,76


In [8]:
shapiro_stats, shapiro_p_value = stats.shapiro(sw)

In [10]:
print ("The test value Shapiro-Wilk = " + str(shapiro_stats))
print ("p value of test Shapiro-Wilk = " + str(shapiro_p_value))

The test value Shapiro-Wilk = 0.9262720346450806
p value of test Shapiro-Wilk = 0.039127614349126816


### Conclusion

In [15]:
if shapiro_p_value > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data did not behave like a normal distribution.


#  Test Anderson-Darling (ad)

In [16]:
#It is recommended to use the tests: Anderson-Darling, Jaque-Bera, and lilliefors for small samples (31<= n <=100). 
#I will demonstrate the Anderson-Darling test (n=30).

In [2]:
ad = pd.read_csv('spreadsheetthree.csv')

In [3]:
ad.head()

Unnamed: 0,C
0,86
1,67
2,12
3,42
4,66


In [None]:
# OBS.: It is necessary to convert the data into an array.

In [19]:
adx=ad['C']

In [37]:
value_test_anderson, critical_value_anderson, p_value_anderson = stats.anderson(adx, 'norm')
print(value_test_anderson)
print(critical_value_anderson)
print(p_value_anderson)

0.9264628345369346
[0.555 0.632 0.758 0.885 1.052]
[15.  10.   5.   2.5  1. ]


### Conclusion

In [38]:
if value_test_anderson < critical_value_anderson[2]:
    print("With " + str(100 - p_value_anderson[2]) + "% reliable, the data are presented as a normal distribution.")
else:
    print("With " + str(100 - p_value_anderson[2]) + "% reliable, data do not presented as a normal distribution.")

With 95.0% reliable, data do not presented as a normal distribution.


#  Test Kolmogorov-Smirnov

In [24]:
#This test is recommended for samples with many values (n > 100).

In [26]:
import numpy as np

In [25]:
K = pd.read_csv('spreadsheetfour.csv')

In [27]:
ks = np.array([K])

In [39]:
average_ks = np.mean(ks)
print(average_ks)
standard_deviation_ks = np.std(ks-1)
print(standard_deviation_ks)

54.542
26.64988247628871


In [40]:
test_value_ks, p_value_ks = stats.kstest(ks, cdf='norm', args=(average_ks, standard_deviation_ks), N=len(ks))
print(test_value_ks)
print(p_value_ks)

0.9559725525745327
0.08805489485093454


### Conclusion

In [41]:
if p_value_ks > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data behaved like a normal distribution.
