In [13]:
from scipy import stats
import numpy as np
from scipy.stats import mannwhitneyu


# DATA

In [2]:
import numpy as np

# Data for each column
patients_1 = [0.619251434, 0.54, 0.665067597, 0.133333333, 0.379857797, 
          1.017760926, 0.296033156, 0.912635858, 2.633674282, 0.993549344]

patients_2 = [0.081959749, 0.586666667, 7.762756215, 0.826666667, 0.087659492, 
          7.333865496, 0.118413262, 1.908238613, 9.370980584, 2.031585972]

patients_3 = [1.502595392, 0.333333333, 8.057130397, 1.233333333, 0.185058927, 
          7.48353622, 0.485494375, 2.007798888, 9.450603295, 2.67665159]


In [3]:
healthies_1 = [2.019018743, 0.390952248, 1.605411132, 0.173658715, 0.167582162,
          1.007049345, 1.753859991, 1.627604167, 2.399723375, 0.342442758]

healthies_2 = [1.460859978, 0.223401285, 1.735799853, 3.244675989, 0.325854204,
          0.161127895, 0.134912307, 1.085069444, 2.55186722, 0.235009736]

healthies_3 = [1.660694598, 0.432839989, 2.094368837, 3.793071931, 0.744809608,
          0.322255791, 1.379103583, 0.29296875, 2.85615491, 0.597596186]

# ALL TOGETHER

## CHECK NORMALITIES

### PATIENTS

In [4]:
# Generate some sample data
data = patients_1+patients_2+patients_3

# Shapiro-Wilk Test
shapiro_stat, shapiro_p = stats.shapiro(data)
print("Shapiro-Wilk Test:")
print("Test Statistic:", shapiro_stat)
print("p-value:", shapiro_p)
if shapiro_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# Kolmogorov-Smirnov Test
kstest_stat, kstest_p = stats.kstest(data, 'norm')
print("\nKolmogorov-Smirnov Test:")
print("Test Statistic:", kstest_stat)
print("p-value:", kstest_p)
if kstest_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# D'Agostino and Pearson Test
dagostino_stat, dagostino_p = stats.normaltest(data)
print("\nD'Agostino and Pearson Test:")
print("Test Statistic:", dagostino_stat)
print("p-value:", dagostino_p)
if dagostino_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")


Shapiro-Wilk Test:
Test Statistic: 0.703513503074646
p-value: 1.7558179479237879e-06
Sample does not look Gaussian (reject H0)

Kolmogorov-Smirnov Test:
Test Statistic: 0.5326606393150366
p-value: 1.780892786110672e-08
Sample does not look Gaussian (reject H0)

D'Agostino and Pearson Test:
Test Statistic: 9.89482246524114
p-value: 0.007101769983017378
Sample does not look Gaussian (reject H0)


In [5]:
# Generate some sample data
data = healthies_1+healthies_2+healthies_3

# Shapiro-Wilk Test
shapiro_stat, shapiro_p = stats.shapiro(data)
print("Shapiro-Wilk Test:")
print("Test Statistic:", shapiro_stat)
print("p-value:", shapiro_p)
if shapiro_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# Kolmogorov-Smirnov Test
kstest_stat, kstest_p = stats.kstest(data, 'norm')
print("\nKolmogorov-Smirnov Test:")
print("Test Statistic:", kstest_stat)
print("p-value:", kstest_p)
if kstest_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# D'Agostino and Pearson Test
dagostino_stat, dagostino_p = stats.normaltest(data)
print("\nD'Agostino and Pearson Test:")
print("Test Statistic:", dagostino_stat)
print("p-value:", dagostino_p)
if dagostino_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")


Shapiro-Wilk Test:
Test Statistic: 0.8911949992179871
p-value: 0.005149817559868097
Sample does not look Gaussian (reject H0)

Kolmogorov-Smirnov Test:
Test Statistic: 0.5536593955314734
p-value: 3.5580347502742338e-09
Sample does not look Gaussian (reject H0)

D'Agostino and Pearson Test:
Test Statistic: 3.463257936629363
p-value: 0.17699585437112134
Sample looks Gaussian (fail to reject H0)


THEY ARE NOT GAUSSIAN BUT WE HAVE MORE THAN 10 SAMPLES IN EACH CASE SO WE CAN AVOID THIS ASSUMTION

## VARIANCE CHECK

In [6]:
patients = patients_1+patients_2+patients_3
healthy = healthies_1+healthies_2+healthies_3

In [7]:
# Perform Levene's test
levene_stat, levene_p = stats.levene(patients, healthy)

print("Levene's Test:")
print("Test Statistic:", levene_stat)
print("p-value:", levene_p)

if levene_p > 0.05:
    print("The variances are equal (fail to reject H0)")
else:
    print("The variances are not equal (reject H0)")


Levene's Test:
Test Statistic: 4.6987795466192654
p-value: 0.03430148771630319
The variances are not equal (reject H0)


SINCE VARIANCES ARE NOT THE SAME, WE HAVE TO USE THE WELTCH'S T-TEST

## T-TEST

In [8]:


# Perform two-sample t-test assuming equal variances
t_stat, t_p = stats.ttest_ind(patients, healthy, equal_var=False)

print("Two-Sample T-Test:")
print("Test Statistic:", t_stat)
print("p-value:", t_p)

if t_p > 0.05:
    print("There is no significant difference between the means (fail to reject H0)")
else:
    print("There is a significant difference between the means (reject H0)")


Two-Sample T-Test:
Test Statistic: 1.9588050455841428
p-value: 0.05805208077110886
There is no significant difference between the means (fail to reject H0)


# MODEL 1

## CHECK NORMALITIES

### PATIENTS

In [9]:
# Generate some sample data
data = patients_1

# Shapiro-Wilk Test
shapiro_stat, shapiro_p = stats.shapiro(data)
print("Shapiro-Wilk Test:")
print("Test Statistic:", shapiro_stat)
print("p-value:", shapiro_p)
if shapiro_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# Kolmogorov-Smirnov Test
kstest_stat, kstest_p = stats.kstest(data, 'norm')
print("\nKolmogorov-Smirnov Test:")
print("Test Statistic:", kstest_stat)
print("p-value:", kstest_p)
if kstest_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# D'Agostino and Pearson Test
dagostino_stat, dagostino_p = stats.normaltest(data)
print("\nD'Agostino and Pearson Test:")
print("Test Statistic:", dagostino_stat)
print("p-value:", dagostino_p)
if dagostino_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")


Shapiro-Wilk Test:
Test Statistic: 0.7700233459472656
p-value: 0.006257914938032627
Sample does not look Gaussian (reject H0)

Kolmogorov-Smirnov Test:
Test Statistic: 0.55303511649181
p-value: 0.0021063292500086254
Sample does not look Gaussian (reject H0)

D'Agostino and Pearson Test:
Test Statistic: 15.924287766059955
p-value: 0.0003484053758381269
Sample does not look Gaussian (reject H0)




In [10]:
# Generate some sample data
data = healthies_1

# Shapiro-Wilk Test
shapiro_stat, shapiro_p = stats.shapiro(data)
print("Shapiro-Wilk Test:")
print("Test Statistic:", shapiro_stat)
print("p-value:", shapiro_p)
if shapiro_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# Kolmogorov-Smirnov Test
kstest_stat, kstest_p = stats.kstest(data, 'norm')
print("\nKolmogorov-Smirnov Test:")
print("Test Statistic:", kstest_stat)
print("p-value:", kstest_p)
if kstest_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# D'Agostino and Pearson Test
dagostino_stat, dagostino_p = stats.normaltest(data)
print("\nD'Agostino and Pearson Test:")
print("Test Statistic:", dagostino_stat)
print("p-value:", dagostino_p)
if dagostino_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")


Shapiro-Wilk Test:
Test Statistic: 0.8908300399780273
p-value: 0.1732628345489502
Sample looks Gaussian (fail to reject H0)

Kolmogorov-Smirnov Test:
Test Statistic: 0.5665439973065065
p-value: 0.0014679829320498427
Sample does not look Gaussian (reject H0)

D'Agostino and Pearson Test:
Test Statistic: 2.9996954120349737
p-value: 0.2231641441168489
Sample looks Gaussian (fail to reject H0)


### NON PARAMETRIC TEST

In [11]:
patients = patients_1
healthy = healthies_1

In [14]:


# Perform the Mann-Whitney U test
statistic, p_value = mannwhitneyu(patients, healthy)

# Print the test statistic and p-value
print("Mann-Whitney U statistic:", statistic)
print("p-value:", p_value)

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: the samples do not come from the same distribution.")
else:
    print("Fail to reject the null hypothesis: the samples may come from the same distribution.")


Mann-Whitney U statistic: 40.0
p-value: 0.47267559351158717
Fail to reject the null hypothesis: the samples may come from the same distribution.


THEY ARE NOT GAUSSIAN BUT WE HAVE MORE THAN 10 SAMPLES IN EACH CASE SO WE CAN AVOID THIS ASSUMTION

## VARIANCE CHECK

In [15]:
patients = patients_1
healthy = healthies_1

In [16]:
# Perform Levene's test
levene_stat, levene_p = stats.levene(patients, healthy)

print("Levene's Test:")
print("Test Statistic:", levene_stat)
print("p-value:", levene_p)

if levene_p > 0.05:
    print("The variances are equal (fail to reject H0)")
else:
    print("The variances are not equal (reject H0)")


Levene's Test:
Test Statistic: 2.0577975896976946
p-value: 0.16857523391623
The variances are equal (fail to reject H0)


SINCE VARIANCES ARE THE SAME, WE CAN USE REGULAR T-TEST

## T-TEST

In [17]:


# Perform two-sample t-test assuming equal variances
t_stat, t_p = stats.ttest_ind(patients, healthy, equal_var=True)

print("Two-Sample T-Test:")
print("Test Statistic:", t_stat)
print("p-value:", t_p)

if t_p > 0.05:
    print("There is no significant difference between the means (fail to reject H0)")
else:
    print("There is a significant difference between the means (reject H0)")


Two-Sample T-Test:
Test Statistic: -0.954435899375925
p-value: 0.35250605596631157
There is no significant difference between the means (fail to reject H0)


# MODEL 2

## CHECK NORMALITIES

### PATIENTS

In [18]:
# Generate some sample data
data = patients_2

# Shapiro-Wilk Test
shapiro_stat, shapiro_p = stats.shapiro(data)
print("Shapiro-Wilk Test:")
print("Test Statistic:", shapiro_stat)
print("p-value:", shapiro_p)
if shapiro_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# Kolmogorov-Smirnov Test
kstest_stat, kstest_p = stats.kstest(data, 'norm')
print("\nKolmogorov-Smirnov Test:")
print("Test Statistic:", kstest_stat)
print("p-value:", kstest_p)
if kstest_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# D'Agostino and Pearson Test
dagostino_stat, dagostino_p = stats.normaltest(data)
print("\nD'Agostino and Pearson Test:")
print("Test Statistic:", dagostino_stat)
print("p-value:", dagostino_p)
if dagostino_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")


Shapiro-Wilk Test:
Test Statistic: 0.7712432146072388
p-value: 0.0064757647924125195
Sample does not look Gaussian (reject H0)

Kolmogorov-Smirnov Test:
Test Statistic: 0.5326606393150366
p-value: 0.003550394781684693
Sample does not look Gaussian (reject H0)

D'Agostino and Pearson Test:
Test Statistic: 2.55175617748711
p-value: 0.279185710728292
Sample looks Gaussian (fail to reject H0)


In [19]:
# Generate some sample data
data = healthies_2

# Shapiro-Wilk Test
shapiro_stat, shapiro_p = stats.shapiro(data)
print("Shapiro-Wilk Test:")
print("Test Statistic:", shapiro_stat)
print("p-value:", shapiro_p)
if shapiro_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# Kolmogorov-Smirnov Test
kstest_stat, kstest_p = stats.kstest(data, 'norm')
print("\nKolmogorov-Smirnov Test:")
print("Test Statistic:", kstest_stat)
print("p-value:", kstest_p)
if kstest_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# D'Agostino and Pearson Test
dagostino_stat, dagostino_p = stats.normaltest(data)
print("\nD'Agostino and Pearson Test:")
print("Test Statistic:", dagostino_stat)
print("p-value:", dagostino_p)
if dagostino_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")


Shapiro-Wilk Test:
Test Statistic: 0.8470377326011658
p-value: 0.05355590209364891
Sample looks Gaussian (fail to reject H0)

Kolmogorov-Smirnov Test:
Test Statistic: 0.5536593955314734
p-value: 0.002072032255850155
Sample does not look Gaussian (reject H0)

D'Agostino and Pearson Test:
Test Statistic: 1.86187085219576
p-value: 0.3941848071016772
Sample looks Gaussian (fail to reject H0)


THEY ARE NOT GAUSSIAN BUT WE HAVE MORE THAN 10 SAMPLES IN EACH CASE SO WE CAN AVOID THIS ASSUMTION

### NON PARAMETRIC TEST

In [20]:
patients = patients_2
healthy = healthies_2

In [21]:


# Perform the Mann-Whitney U test
statistic, p_value = mannwhitneyu(patients, healthy)

# Print the test statistic and p-value
print("Mann-Whitney U statistic:", statistic)
print("p-value:", p_value)

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: the samples do not come from the same distribution.")
else:
    print("Fail to reject the null hypothesis: the samples may come from the same distribution.")


Mann-Whitney U statistic: 56.0
p-value: 0.6775849579524755
Fail to reject the null hypothesis: the samples may come from the same distribution.


## VARIANCE CHECK

In [22]:
patients = patients_2
healthy = healthies_2

In [23]:
# Perform Levene's test
levene_stat, levene_p = stats.levene(patients, healthy)

print("Levene's Test:")
print("Test Statistic:", levene_stat)
print("p-value:", levene_p)

if levene_p > 0.05:
    print("The variances are equal (fail to reject H0)")
else:
    print("The variances are not equal (reject H0)")


Levene's Test:
Test Statistic: 3.506889024449369
p-value: 0.07745014401593901
The variances are equal (fail to reject H0)


SINCE VARIANCES ARE THE SAME, WE CAN USE REGULAR T-TEST

## T-TEST

In [24]:


# Perform two-sample t-test assuming equal variances
t_stat, t_p = stats.ttest_ind(patients, healthy, equal_var=True)

print("Two-Sample T-Test:")
print("Test Statistic:", t_stat)
print("p-value:", t_p)

if t_p > 0.05:
    print("There is no significant difference between the means (fail to reject H0)")
else:
    print("There is a significant difference between the means (reject H0)")


Two-Sample T-Test:
Test Statistic: 1.5695226685666448
p-value: 0.13393796088893353
There is no significant difference between the means (fail to reject H0)


# MODEL 3

## CHECK NORMALITIES

## PATIENTS

In [25]:
# Generate some sample data
data = patients_3

# Shapiro-Wilk Test
shapiro_stat, shapiro_p = stats.shapiro(data)
print("Shapiro-Wilk Test:")
print("Test Statistic:", shapiro_stat)
print("p-value:", shapiro_p)
if shapiro_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# Kolmogorov-Smirnov Test
kstest_stat, kstest_p = stats.kstest(data, 'norm')
print("\nKolmogorov-Smirnov Test:")
print("Test Statistic:", kstest_stat)
print("p-value:", kstest_p)
if kstest_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# D'Agostino and Pearson Test
dagostino_stat, dagostino_p = stats.normaltest(data)
print("\nD'Agostino and Pearson Test:")
print("Test Statistic:", dagostino_stat)
print("p-value:", dagostino_p)
if dagostino_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")


Shapiro-Wilk Test:
Test Statistic: 0.8012109398841858
p-value: 0.01499820128083229
Sample does not look Gaussian (reject H0)

Kolmogorov-Smirnov Test:
Test Statistic: 0.5912742867252527
p-value: 0.0007334478420161856
Sample does not look Gaussian (reject H0)

D'Agostino and Pearson Test:
Test Statistic: 2.4454585721753297
p-value: 0.2944254979222238
Sample looks Gaussian (fail to reject H0)


In [26]:
# Generate some sample data
data = healthies_3

# Shapiro-Wilk Test
shapiro_stat, shapiro_p = stats.shapiro(data)
print("Shapiro-Wilk Test:")
print("Test Statistic:", shapiro_stat)
print("p-value:", shapiro_p)
if shapiro_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# Kolmogorov-Smirnov Test
kstest_stat, kstest_p = stats.kstest(data, 'norm')
print("\nKolmogorov-Smirnov Test:")
print("Test Statistic:", kstest_stat)
print("p-value:", kstest_p)
if kstest_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# D'Agostino and Pearson Test
dagostino_stat, dagostino_p = stats.normaltest(data)
print("\nD'Agostino and Pearson Test:")
print("Test Statistic:", dagostino_stat)
print("p-value:", dagostino_p)
if dagostino_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")


Shapiro-Wilk Test:
Test Statistic: 0.8793904185295105
p-value: 0.12840606272220612
Sample looks Gaussian (fail to reject H0)

Kolmogorov-Smirnov Test:
Test Statistic: 0.61522698096772
p-value: 0.00035902772438812646
Sample does not look Gaussian (reject H0)

D'Agostino and Pearson Test:
Test Statistic: 2.22623294782666
p-value: 0.32853349791466496
Sample looks Gaussian (fail to reject H0)


THEY ARE NOT GAUSSIAN BUT WE HAVE MORE THAN 10 SAMPLES IN EACH CASE SO WE CAN AVOID THIS ASSUMTION

### NON PARAMETRIC TEST

In [27]:
patients = patients_3
healthy = healthies_3

In [28]:


# Perform the Mann-Whitney U test
statistic, p_value = mannwhitneyu(patients, healthy)

# Print the test statistic and p-value
print("Mann-Whitney U statistic:", statistic)
print("p-value:", p_value)

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: the samples do not come from the same distribution.")
else:
    print("Fail to reject the null hypothesis: the samples may come from the same distribution.")


Mann-Whitney U statistic: 61.0
p-value: 0.4273553138978077
Fail to reject the null hypothesis: the samples may come from the same distribution.


## VARIANCE CHECK

In [29]:
# Perform Levene's test
levene_stat, levene_p = stats.levene(patients, healthy)

print("Levene's Test:")
print("Test Statistic:", levene_stat)
print("p-value:", levene_p)

if levene_p > 0.05:
    print("The variances are equal (fail to reject H0)")
else:
    print("The variances are not equal (reject H0)")


Levene's Test:
Test Statistic: 3.196273868475992
p-value: 0.090654368500147
The variances are equal (fail to reject H0)


SINCE VARIANCES ARE THE SAME, WE CAN USE REGULAR T-TEST

## T-TEST

In [30]:


# Perform two-sample t-test assuming equal variances
t_stat, t_p = stats.ttest_ind(patients, healthy, equal_var=True)

print("Two-Sample T-Test:")
print("Test Statistic:", t_stat)
print("p-value:", t_p)

if t_p > 0.05:
    print("There is no significant difference between the means (fail to reject H0)")
else:
    print("There is a significant difference between the means (reject H0)")


Two-Sample T-Test:
Test Statistic: 1.6215868656817691
p-value: 0.12227674372245088
There is no significant difference between the means (fail to reject H0)
