# Test for two samples non-normal and independent

### First - Import the libraries

In [1]:
import scipy.stats as stats
import pandas as pd
import numpy as np
from scipy.stats import mstats

### Second - import the data

In [19]:
df = pd.read_csv('data_NNormal.csv')
print(df)

      A    B
0    50  959
1    59  955
2    55  953
3    54  952
4    53  957
..   ..  ...
151  56  950
152  58  960
153  56  958
154  52  951
155  50  952

[156 rows x 2 columns]


### Third - Test the Normality of samples

In [20]:
a = df['A']
b = df['B']

### Column A

In [21]:
average_a = np.mean(a)
print(average_a)
standard_deviation_a = np.std(a-1)
print(standard_deviation_a)

54.36538461538461
3.297408730996277


In [22]:
test_value_a, p_value_a = stats.kstest(a, cdf='norm', args=(average_a, standard_deviation_a), N=len(a))
print(test_value_a)
print(p_value_a)

0.12239419829554571
0.017049911825335846


In [23]:
if p_value_a > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data did not behave like a normal distribution.


### Column B

In [24]:
average_b = np.mean(b)
print(average_b)
standard_deviation_b = np.std(b-1)
print(standard_deviation_b)

955.224358974359
3.2494057009857844


In [25]:
test_value_b, p_value_b = stats.kstest(b, cdf='norm', args=(average_b, standard_deviation_b), N=len(b))
print(test_value_b)
print(p_value_b)

0.10814097246430154
0.04821200315092489


In [26]:
if p_value_b > 0.05:
    print("The test presented with 95% confidence that the data behaved like a normal distribution.")
else:
    print("The test presented with 95% confidence that the data did not behave like a normal distribution.")

The test presented with 95% confidence that the data did not behave like a normal distribution.


### Fourth - Do the Spearman test 

In [28]:
rs_value, p_value_Spearman = stats.mstats.spearmanr(a, b)
print ("The test value Pearson Correlation = " + str(rs_value))
print ("p value of Pearson Correlation = " + str(p_value_Spearman))

The test value Pearson Correlation = 0.020931930959359666
p value of Pearson Correlation = 0.7953532304701982


### Conclusion

In [29]:
if p_value_Spearman < 0.05:
    print("As the p-value was below 0.05, we must reject the null hypothesis and accept the hypothesis alternative.")
else:
    print("Since the p-value was above 0.05, we must accept the null hypothesis and reject the alternative hypothesis.")

Since the p-value was above 0.05, we must accept the null hypothesis and reject the alternative hypothesis.
