# ANOVA for independent samples normal and homoscedastic

### First - Import the libraries

In [1]:
import scipy.stats as stats
import pandas as pd
import numpy as np
from scipy.stats import bartlett

### Second - import the data

In [2]:
df = pd.read_csv('Samples_independenty_Normal.csv')
print (df)

      A    B    C    D    E
0   134  131  127  128   83
1   139   93   85   96  135
2    91  132   85  112  121
3   122  115   95   88   90
4   102  138  135   96   92
..  ...  ...  ...  ...  ...
95   96   98  143  132  122
96  144  136   87  120  123
97  139  136  130   98  136
98   97  108   82   80  135
99  132  122  120  101  132

[100 rows x 5 columns]


### Third - Test the Normality of samples

In [3]:
a = df['A']
b = df['B']
c = df['C']
d = df['D']
e = df['E']

### Column A

In [4]:
average_a = np.mean(a)
print(average_a)
standard_deviation_a = np.std(a-1)
print(standard_deviation_a)

110.64
18.201933963180945


In [5]:
test_value_a, p_value_a = stats.kstest(a, cdf='norm', args=(average_a, standard_deviation_a), N=len(a))
print(test_value_a)
print(p_value_a)

0.11249062132279214
0.14737783608101995


In [6]:
if p_value_a > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data behaved like a normal distribution.


### Column B

In [7]:
average_b = np.mean(b)
print(average_b)
standard_deviation_b = np.std(b-1)
print(standard_deviation_b)

111.85
18.405637723262945


In [8]:
test_value_b, p_value_b = stats.kstest(b, cdf='norm', args=(average_b, standard_deviation_b), N=len(b))
print(test_value_b)
print(p_value_b)

0.10542205969538854
0.2015784358191075


In [9]:
if p_value_b > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data behaved like a normal distribution.


### Column C

In [10]:
average_c = np.mean(c)
print(average_c)
standard_deviation_c = np.std(c-1)
print(standard_deviation_c)

111.29
18.694007061087788


In [11]:
test_value_c, p_value_c = stats.kstest(c, cdf='norm', args=(average_c, standard_deviation_c), N=len(c))
print(test_value_c)
print(p_value_c)

0.11143403069002594
0.1546449426271117


In [12]:
if p_value_c > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data behaved like a normal distribution.


### Column D

In [13]:
average_d = np.mean(d)
print(average_d)
standard_deviation_d = np.std(d-1)
print(standard_deviation_d)

112.65
17.604757879618795


In [14]:
test_value_d, p_value_d = stats.kstest(d, cdf='norm', args=(average_d, standard_deviation_d), N=len(d))
print(test_value_d)
print(p_value_d)

0.07593520961381156
0.5847215648222708


In [15]:
if p_value_d > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data behaved like a normal distribution.


### Column E

In [16]:
average_e = np.mean(e)
print(average_e)
standard_deviation_e = np.std(e-1)
print(standard_deviation_e)

113.97
18.600782241615537


In [17]:
test_value_e, p_value_e = stats.kstest(e, cdf='norm', args=(average_e, standard_deviation_e), N=len(e))
print(test_value_e)
print(p_value_e)

0.10850141399131141
0.17632024154557702


In [18]:
if p_value_e > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data behaved like a normal distribution.


### Fourth - Do the Bartlett test to Homoscedasticity

In [19]:
res = stats.bartlett(a, b, c, d, e)
print ("The test value Wilcoxon to samples related = " + str(res.statistic))
print ("The p_value Wilcoxon to samples related = " + str(res.pvalue))

The test value Wilcoxon to samples related = 0.44424039938665455
The p_value Wilcoxon to samples related = 0.9786971988245933


In [20]:
if res.pvalue > 0.05:
    print("the samples present a homogeneous distribution in their variances.")
else:
    print("the samples do not present a homogeneous distribution in their variances.")

the samples present a homogeneous distribution in their variances.


### Fifth - ANOVA Variance Analysis

In [23]:
res = stats.f_oneway(a, b, c, d, e, axis=0)
print ("The value of F oneway in the variance analysis ANOVA = " + str(res.statistic))
print ("The p_value of F oneway in the variance analysis ANOVA = " + str(res.pvalue))

The value of F oneway in the variance analysis ANOVA = 0.49099369940345944
The p_value of F oneway in the variance analysis ANOVA = 0.742372371385499


### Conclusion

In [24]:
if res.pvalue < 0.05:
    print("As the p-value was below 0.05, we must reject the null hypothesis and accept the hypothesis alternative.")
else:
    print("since the p-value was above 0.05, we must accept the null hypothesis and reject the alternative hypothesis.")

since the p-value was above 0.05, we must accept the null hypothesis and reject the alternative hypothesis.
