In [None]:
from scipy import stats
import numpy as np

# DATA

In [2]:
generic_model_1_patients= [0.29141244,
        0.42,
        0.65416485,
        0.346666667,
        0.886334859,
        1.7561365,
        0.509177028,
        2.497303576,
        3.350278679,
        1.468080374]

generic_model_2_patients = [0.692104544,
                                 0.746666667,
                                 1.362843437,
                                 0.46,
                                 0.097399435,
                                 0.299341449,
                                 0.047365305,
                                 0.149340413,
                                 2.05181601,
                                 1.075109365]
generic_model_3_patients = [0.546398324,
                                 0.4,
                                 6.43262102,
                                 0.393333333,
                                 0.321418136,
                                 4.589902215,
                                 0.224985198,
                                 1.012196134,
                                 4.850860538,
                                 0.94906206]


In [3]:
generic_model_1_healthy = [0.075799338,
                                0.01396258,
                                0.138538016,
                                0.127959053,
                                0.446885765,
                                0.483383686,
                                2.353470244,
                                0.716145833,
                                0.760719225,
                                0.248438864]
generic_model_2_healthy = [2.191289967,
                                1.242669645,
                                3.414554641,
                                4.222648752,
                                0.204822642,
                                0.181268882,
                                0.119922051,
                                0.1953125,
                                4.170124481,
                                0.812462231]
generic_model_3_healthy= [0.441014333,
                                0.209438704,
                                0.081492951,
                                2.129604241,
                                0.363094684,
                                0.402819738,
                                0.119922051,
                                0.726996528,
                                0.062240664,
                                0.053716511]

## CHECK NORMALITIES

### PATIENTS

In [4]:
# Generate some sample data
data = generic_model_1_patients+generic_model_2_patients+generic_model_3_patients

# Shapiro-Wilk Test
shapiro_stat, shapiro_p = stats.shapiro(data)
print("Shapiro-Wilk Test:")
print("Test Statistic:", shapiro_stat)
print("p-value:", shapiro_p)
if shapiro_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# Kolmogorov-Smirnov Test
kstest_stat, kstest_p = stats.kstest(data, 'norm')
print("\nKolmogorov-Smirnov Test:")
print("Test Statistic:", kstest_stat)
print("p-value:", kstest_p)
if kstest_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# D'Agostino and Pearson Test
dagostino_stat, dagostino_p = stats.normaltest(data)
print("\nD'Agostino and Pearson Test:")
print("Test Statistic:", dagostino_stat)
print("p-value:", dagostino_p)
if dagostino_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")


Shapiro-Wilk Test:
Test Statistic: 0.7201294898986816
p-value: 3.1048914479470113e-06
Sample does not look Gaussian (reject H0)

Kolmogorov-Smirnov Test:
Test Statistic: 0.5188889597156018
p-value: 4.890785406302395e-08
Sample does not look Gaussian (reject H0)

D'Agostino and Pearson Test:
Test Statistic: 21.931555348905125
p-value: 1.728316460568932e-05
Sample does not look Gaussian (reject H0)


In [5]:
# Generate some sample data
data = generic_model_1_healthy+generic_model_2_healthy+generic_model_3_healthy

# Shapiro-Wilk Test
shapiro_stat, shapiro_p = stats.shapiro(data)
print("Shapiro-Wilk Test:")
print("Test Statistic:", shapiro_stat)
print("p-value:", shapiro_p)
if shapiro_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# Kolmogorov-Smirnov Test
kstest_stat, kstest_p = stats.kstest(data, 'norm')
print("\nKolmogorov-Smirnov Test:")
print("Test Statistic:", kstest_stat)
print("p-value:", kstest_p)
if kstest_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")

# D'Agostino and Pearson Test
dagostino_stat, dagostino_p = stats.normaltest(data)
print("\nD'Agostino and Pearson Test:")
print("Test Statistic:", dagostino_stat)
print("p-value:", dagostino_p)
if dagostino_p > 0.05:
    print("Sample looks Gaussian (fail to reject H0)")
else:
    print("Sample does not look Gaussian (reject H0)")


Shapiro-Wilk Test:
Test Statistic: 0.6964221000671387
p-value: 1.384528104608762e-06
Sample does not look Gaussian (reject H0)

Kolmogorov-Smirnov Test:
Test Statistic: 0.5055700825202555
p-value: 1.2565140607180196e-07
Sample does not look Gaussian (reject H0)

D'Agostino and Pearson Test:
Test Statistic: 17.105343802012317
p-value: 0.00019302865646150515
Sample does not look Gaussian (reject H0)


THEY ARE NOT GAUSSIAN BUT WE HAVE MORE THAN 10 SAMPLES IN EACH CASE SO WE CAN AVOID THIS ASSUMTION

## VARIANCE CHECK

In [6]:
patients = generic_model_1_patients+generic_model_2_patients+generic_model_3_patients
healthy = generic_model_1_healthy+generic_model_2_healthy+generic_model_3_healthy

In [7]:
# Perform Levene's test
levene_stat, levene_p = stats.levene(patients, healthy)

print("Levene's Test:")
print("Test Statistic:", levene_stat)
print("p-value:", levene_p)

if levene_p > 0.05:
    print("The variances are equal (fail to reject H0)")
else:
    print("The variances are not equal (reject H0)")


Levene's Test:
Test Statistic: 0.4183325584847387
p-value: 0.5203217321959388
The variances are equal (fail to reject H0)


SINCE VARIANCES ARE THE SAME, WE CAN USE A T-TEST

## T-TEST

In [8]:


# Perform two-sample t-test assuming equal variances
t_stat, t_p = stats.ttest_ind(patients, healthy, equal_var=True)

print("Two-Sample T-Test:")
print("Test Statistic:", t_stat)
print("p-value:", t_p)

if t_p > 0.05:
    print("There is no significant difference between the means (fail to reject H0)")
else:
    print("There is a significant difference between the means (reject H0)")


Two-Sample T-Test:
Test Statistic: 1.118627359547584
p-value: 0.2679108211373674
There is no significant difference between the means (fail to reject H0)
