In [29]:
from scipy.stats import bartlett
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import scipy.stats as stats
import numpy as np

## The following test used the arcsine square root transformed data

## CG context; genome wide methylation level

In [21]:
CG_Tdu_genome_wide = [0.9019257154942122, 0.8914492423657467]
CG_Tpr_genome_wide = [0.8449125121524577, 0.8592915816350719]
CG_Tms_genome_wide = [0.8705786657582499, 0.8659178491131588]

Trans_CG_Tdu_genome_wide = np.arcsin(np.sqrt(CG_Tdu_genome_wide)).tolist()
Trans_CG_Tpr_genome_wide = np.arcsin(np.sqrt(CG_Tpr_genome_wide)).tolist()
Trans_CG_Tms_genome_wide = np.arcsin(np.sqrt(CG_Tms_genome_wide)).tolist()

### Variance homo test

In [22]:
statistic, p_value = bartlett(Trans_CG_Tdu_genome_wide, Trans_CG_Tpr_genome_wide, Trans_CG_Tms_genome_wide)
print(statistic, p_value)

0.7008091531696429 0.7044030470826868


### ANOVA and post hoc Tukey

In [23]:
CG_genome_wide = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CG_Tdu_genome_wide + Trans_CG_Tpr_genome_wide + Trans_CG_Tms_genome_wide})
model = ols('methylation ~ species', data=CG_genome_wide).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CG_genome_wide['methylation'], CG_genome_wide['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.004706  0.002353  18.719661  0.020206
Residual  3.0  0.000377  0.000126        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0443 0.0577 -0.0912  0.0025  False
   Tdu    Tpr  -0.0675 0.0187 -0.1144 -0.0207   True
   Tms    Tpr  -0.0232 0.2435   -0.07  0.0237  False
----------------------------------------------------


### Tms vs MPV

In [30]:
Tms = np.array([0.8705786657582499, 0.8659178491131588])
MPV = np.mean(CG_Tdu_genome_wide + CG_Tpr_genome_wide)

t_statistic, p_value = stats.ttest_1samp(Tms, MPV)

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -2.6375229682726733
P-Value: 0.23071003336777324


### Tms vs MPV transformed

In [43]:
t_statistic, p_value = stats.ttest_1samp(Trans_CG_Tms_genome_wide, np.arcsin(np.sqrt(np.mean(CG_Tdu_genome_wide + CG_Tpr_genome_wide))))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -2.66045070672953
P-Value: 0.2288893757495026


## CHG context; genome wide methylation level

In [44]:
CHG_Tdu_genome_wide = [0.7400100633331924, 0.7242402876397463]
CHG_Tpr_genome_wide = [0.6740839300527761, 0.6929469612134965]
CHG_Tms_genome_wide = [0.6892210409240979, 0.6816075744576451]

Trans_CHG_Tdu_genome_wide = np.arcsin(np.sqrt(CHG_Tdu_genome_wide)).tolist()
Trans_CHG_Tpr_genome_wide = np.arcsin(np.sqrt(CHG_Tpr_genome_wide)).tolist()
Trans_CHG_Tms_genome_wide = np.arcsin(np.sqrt(CHG_Tms_genome_wide)).tolist()

### Variance homo test

In [45]:
statistic, p_value = bartlett(Trans_CHG_Tdu_genome_wide, Trans_CHG_Tpr_genome_wide, Trans_CHG_Tms_genome_wide)
print(statistic, p_value)

0.5227690509698711 0.7699847839104391


### ANOVA and post hoc Tukey

In [46]:
CHG_genome_wide = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHG_Tdu_genome_wide + Trans_CHG_Tpr_genome_wide + Trans_CHG_Tms_genome_wide})
model = ols('methylation ~ species', data=CHG_genome_wide).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHG_genome_wide['methylation'], CHG_genome_wide['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.003677  0.001838  13.864136  0.030505
Residual  3.0  0.000398  0.000133        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0515 0.0419 -0.0996 -0.0034   True
   Tdu    Tpr  -0.0535 0.0379 -0.1016 -0.0054   True
   Tms    Tpr   -0.002 0.9834 -0.0501  0.0461  False
----------------------------------------------------


### Tms vs MPV

In [16]:
Tms = np.array([0.6892210409240979, 0.6816075744576451])
MPV = np.mean(CHG_Tdu_genome_wide + CHG_Tpr_genome_wide)

t_statistic, p_value = stats.ttest_1samp(Tms, MPV)

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -5.885887320226351
P-Value: 0.10713734027481443


### Tms vs MPV transformed

In [47]:
t_statistic, p_value = stats.ttest_1samp(Trans_CHG_Tms_genome_wide, np.arcsin(np.sqrt(np.mean(CHG_Tdu_genome_wide + CHG_Tpr_genome_wide))))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -5.944363045201038
P-Value: 0.10610291424356669


## CHH context; genome wide methylation level

In [48]:
CHH_Tdu_genome_wide = [0.10027441958845365, 0.11646725561245197]
CHH_Tpr_genome_wide = [0.09374047934202714, 0.1010612785565267]
CHH_Tms_genome_wide = [0.10643238846709101, 0.10262629538960003]

Trans_CHH_Tdu_genome_wide = np.arcsin(np.sqrt(CHH_Tdu_genome_wide)).tolist()
Trans_CHH_Tpr_genome_wide = np.arcsin(np.sqrt(CHH_Tpr_genome_wide)).tolist()
Trans_CHH_Tms_genome_wide = np.arcsin(np.sqrt(CHH_Tms_genome_wide)).tolist()

### Variance homo test

In [49]:
statistic, p_value = bartlett(Trans_CHH_Tdu_genome_wide, Trans_CHH_Tpr_genome_wide, Trans_CHH_Tms_genome_wide)
print(statistic, p_value)

1.251385370103141 0.5348907893117119


### ANOVA and post hoc Tukey

In [50]:
CHH_genome_wide = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHH_Tdu_genome_wide + Trans_CHH_Tpr_genome_wide + Trans_CHH_Tms_genome_wide})
model = ols('methylation ~ species', data=CHH_genome_wide).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHH_genome_wide['methylation'], CHH_genome_wide['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq         F    PR(>F)
species   2.0  0.000332  0.000166  1.143262  0.427491
Residual  3.0  0.000435  0.000145       NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms   -0.006 0.8764 -0.0564 0.0443  False
   Tdu    Tpr  -0.0179 0.4113 -0.0682 0.0324  False
   Tms    Tpr  -0.0119 0.6332 -0.0622 0.0385  False
---------------------------------------------------


### Tms vs MPV

In [20]:
Tms = np.array([0.10643238846709101, 0.10262629538960003])
MPV = np.mean(CHH_Tdu_genome_wide + CHH_Tpr_genome_wide)

t_statistic, p_value = stats.ttest_1samp(Tms, MPV)

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: 0.8636066538677756
P-Value: 0.5465099016975564


### Tms vs MPV transformed

In [51]:
t_statistic, p_value = stats.ttest_1samp(Trans_CHH_Tms_genome_wide, np.arcsin(np.sqrt(np.mean(CHH_Tdu_genome_wide + CHH_Tpr_genome_wide))))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: 0.8625761243446046
P-Value: 0.5468858805147349
