# Test for two samples not - normal and independent

### First - Import the libraries

In [1]:
import scipy.stats as stats
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu

### Second - import the data

In [2]:
df = pd.read_csv('Two_Samples_NNormal.csv')
print(df)

     A    B
0   81  129
1   86  126
2   82  130
3   80  126
4   80  124
..  ..  ...
95  85  124
96  84  126
97  89  120
98  84  128
99  85  129

[100 rows x 2 columns]


### Trird - Test the Normality of samples

In [3]:
a = df['A']
b = df['B']

### Column A

In [4]:
average_a = np.mean(a)
print(average_a)
standard_deviation_a = np.std(a-1)
print(standard_deviation_a)

85.62
3.116985723419345


In [5]:
test_value_a, p_value_a = stats.kstest(a, cdf='norm', args=(average_a, standard_deviation_a), N=len(a))
print(test_value_a)
print(p_value_a)

0.12743471570442477
0.07103053479758281


In [6]:
if p_value_a > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data behaved like a normal distribution.


### Column B

In [7]:
average_b = np.mean(b)
print(average_b)
standard_deviation_b = np.std(b-1)
print(standard_deviation_b)

124.35
3.099596747965772


In [8]:
test_value_b, p_value_b = stats.kstest(b, cdf='norm', args=(average_b, standard_deviation_b), N=len(b))
print(test_value_b)
print(p_value_b)

0.11841493542151565
0.11156453032856506


In [9]:
if p_value_b > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data behaved like a normal distribution.


### Fourth - do the Mann-Whitney test for two samples independent

In [10]:
res = mannwhitneyu(a, b, alternative="two-sided", axis=0, method="asymptotic")
print ("The test value Wilcoxon to samples related = " + str(res.statistic))
print ("The p_value Wilcoxon to samples related = " + str(res.pvalue))

The test value Wilcoxon to samples related = 0.0
The p_value Wilcoxon to samples related = 2.1193240158895103e-34


### Conclusion

In [12]:
if res.pvalue < 0.05:
    print("As the p-value was below 0.05, we must reject the null hypothesis and accept the hypothesis alternative.")
else:
    print("Since the p-value was above 0.05, we must accept the null hypothesis and reject the alternative hypothesis.")

As the p-value was below 0.05, we must reject the null hypothesis and accept the hypothesis alternative.
