# Kruskal-Wallis - Samples independent, Non-normal, Non-homoscedasticity

### First - Import the libraries

In [1]:
import scipy.stats as stats
import pandas as pd
import numpy as np
from scipy.stats import bartlett

### Second - import the data

In [2]:
df = pd.read_csv('Kruskal-Wallis_independet_NNormal_Nhomo.csv')
print (df)

     A   B    C    D     E
0   12  50  104  256  1002
1   13  50  101  256  1002
2   12  50  109  255  1002
3   13  51  110  250  1003
4   13  51  100  255  1000
..  ..  ..  ...  ...   ...
95  11  50  100  252  1001
96  12  51  109  253  1002
97  14  50  108  253  1001
98  11  50  103  250  1002
99  14  50  104  251  1001

[100 rows x 5 columns]


### Third - Test the Normality of samples

In [3]:
a = df['A']
b = df['B']
c = df['C']
d = df['D']
e = df['E']

### Column A

In [4]:
average_a = np.mean(a)
print(average_a)
standard_deviation_a = np.std(a-1)
print(standard_deviation_a)

12.69
1.6167560112769035


In [5]:
test_value_a, p_value_a = stats.kstest(a, cdf='norm', args=(average_a, standard_deviation_a), N=len(a))
print(test_value_a)
print(p_value_a)

0.171105912863719
0.004977817046637578


In [6]:
if p_value_a > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data did not behave like a normal distribution.


### Column B

In [7]:
average_b = np.mean(b)
print(average_b)
standard_deviation_b = np.std(b-1)
print(standard_deviation_b)

50.5
0.5


In [8]:
test_value_b, p_value_b = stats.kstest(b, cdf='norm', args=(average_b, standard_deviation_b), N=len(b))
print(test_value_b)
print(p_value_b)

0.3413447460685429
6.645269104308234e-11


In [9]:
if p_value_b > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data did not behave like a normal distribution.


### Column C

In [10]:
average_c = np.mean(c)
print(average_c)
standard_deviation_c = np.std(c-1)
print(standard_deviation_c)

105.2
3.3436506994600976


In [12]:
test_value_c, p_value_c = stats.kstest(c, cdf='norm', args=(average_c, standard_deviation_c), N=len(c))
print(test_value_c)
print(p_value_c)

0.16471919606500263
0.00771247619986859


In [13]:
if p_value_c > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data did not behave like a normal distribution.


### Column D

In [14]:
average_d = np.mean(d)
print(average_d)
standard_deviation_d = np.std(d-1)
print(standard_deviation_d)

253.29
2.1876699933947976


In [16]:
test_value_d, p_value_d = stats.kstest(d, cdf='norm', args=(average_d, standard_deviation_d), N=len(d))
print(test_value_d)
print(p_value_d)

0.16272976143760798
0.00880899372032895


In [17]:
if p_value_d > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data did not behave like a normal distribution.


### Column E

In [18]:
average_e = np.mean(e)
print(average_e)
standard_deviation_e = np.std(e-1)
print(standard_deviation_e)

1001.43
1.176902714755982


In [19]:
test_value_e, p_value_e = stats.kstest(e, cdf='norm', args=(average_e, standard_deviation_e), N=len(e))
print(test_value_e)
print(p_value_e)

0.19782719490012346
0.0006627743351410587


In [20]:
if p_value_e > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data did not behave like a normal distribution.


### Fourth - Do the Bartlett test to Homoscedasticity and Non-Homoscedasticity

In [21]:
res = stats.bartlett(a, b, c, d, e)
print ("The test value Wilcoxon to samples related = " + str(res.statistic))
print ("The p_value Wilcoxon to samples related = " + str(res.pvalue))

The test value Wilcoxon to samples related = 306.13712594018585
The p_value Wilcoxon to samples related = 5.139037432286974e-65


### Fifth - Kruskal-Wallis Variance Analysis

In [25]:
res = stats.kruskal(a, b, c, d, e,)
print ("The test value Kruskal Wallis to samples independent = " + str(res.statistic))
print ("The p_value Kruskal Wallis to samples independent = " + str(res.pvalue))

The test value Kruskal Wallis to samples independent = 480.525060778235
The p_value Kruskal Wallis to samples independent = 1.0909346158541378e-102


### Conclusion

In [28]:
if res.pvalue > 0.05:
    print("The test presented with 95% confidence that the data behave with homogeneous variances, which demonstrate that the null hypothesis can be accepted and the alternative hypothesis rejected.")
else:
    print("The test presented with 95% confidence that the data behave with heteromogeneous variances, which demonstrate that the null hypothesis can be rejected and the alternative hypothesis accepted.")

The test presented with 95% confidence that the data behave with heteromogeneous variances, which demonstrate that the null hypothesis can be rejected and the alternative hypothesis accepted.
