In [9]:
from scipy.stats import bartlett
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import scipy.stats as stats
import numpy as np

## CG context; genome wide methylation level

### Variance homo test

In [10]:
CG_Tdu_genome_wide = [0.9019257154942122, 0.8914492423657467]
CG_Tpr_genome_wide = [0.8449125121524577, 0.8592915816350719]
CG_Tms_genome_wide = [0.8705786657582499, 0.8659178491131588]
statistic, p_value = bartlett(CG_Tdu_genome_wide, CG_Tpr_genome_wide, CG_Tms_genome_wide)
print(statistic, p_value)

0.7388403468985676 0.6911349528762308


### ANOVA and post hoc Tukey

In [8]:
CG_genome_wide = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': [0.9019257154942122, 0.8914492423657467, 0.8449125121524577, 0.8592915816350719, 0.8705786657582499, 0.8659178491131588]})
model = ols('methylation ~ species', data=CG_genome_wide).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CG_genome_wide['methylation'], CG_genome_wide['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.002038  0.001019  18.078135  0.021207
Residual  3.0  0.000169  0.000056        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0284 0.0643 -0.0598  0.0029  False
   Tdu    Tpr  -0.0446 0.0194  -0.076 -0.0132   True
   Tms    Tpr  -0.0161 0.2267 -0.0475  0.0152  False
----------------------------------------------------


### Tms vs MPV

In [11]:
Tms = np.array([0.8705786657582499, 0.8659178491131588])
MPV = np.mean(CG_Tdu_genome_wide + CG_Tpr_genome_wide)

t_statistic, p_value = stats.ttest_1samp(Tms, MPV)

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -2.6375229682726733
P-Value: 0.23071003336777324


## CHG context; genome wide methylation level

### Variance homo test

In [14]:
CHG_Tdu_genome_wide = [0.7400100633331924, 0.7242402876397463]
CHG_Tpr_genome_wide = [0.6740839300527761, 0.6929469612134965]
CHG_Tms_genome_wide = [0.6892210409240979, 0.6816075744576451]
statistic, p_value = bartlett(CHG_Tdu_genome_wide, CHG_Tpr_genome_wide, CHG_Tms_genome_wide)
print(statistic, p_value)

0.5134356022084625 0.7735864882183526


### ANOVA and post hoc Tukey

In [15]:
CHG_genome_wide = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': [0.7400100633331924, 0.7242402876397463, 0.6740839300527761, 0.6929469612134965, 0.6892210409240979, 0.6816075744576451]})
model = ols('methylation ~ species', data=CHG_genome_wide).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHG_genome_wide['methylation'], CHG_genome_wide['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.003032  0.001516  13.731802  0.030904
Residual  3.0  0.000331  0.000110        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0467 0.0425 -0.0906 -0.0028   True
   Tdu    Tpr  -0.0486 0.0383 -0.0925 -0.0047   True
   Tms    Tpr  -0.0019 0.9823 -0.0458   0.042  False
----------------------------------------------------


### Tms vs MPV

In [16]:
Tms = np.array([0.6892210409240979, 0.6816075744576451])
MPV = np.mean(CHG_Tdu_genome_wide + CHG_Tpr_genome_wide)

t_statistic, p_value = stats.ttest_1samp(Tms, MPV)

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -5.885887320226351
P-Value: 0.10713734027481443


## CHH context; genome wide methylation level

### Variance homo test

In [18]:
CHH_Tdu_genome_wide = [0.10027441958845365, 0.11646725561245197]
CHH_Tpr_genome_wide = [0.09374047934202714, 0.1010612785565267]
CHH_Tms_genome_wide = [0.10643238846709101, 0.10262629538960003]
statistic, p_value = bartlett(CHH_Tdu_genome_wide, CHH_Tpr_genome_wide, CHH_Tms_genome_wide)
print(statistic, p_value)

1.3017311463750418 0.5215941034403512


### ANOVA and post hoc Tukey

In [19]:
CHH_genome_wide = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': [0.10027441958845365, 0.11646725561245197, 0.09374047934202714, 0.1010612785565267, 0.10643238846709101, 0.10262629538960003]})
model = ols('methylation ~ species', data=CHH_genome_wide).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHH_genome_wide['methylation'], CHH_genome_wide['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq         F    PR(>F)
species   2.0  0.000124  0.000062  1.125756  0.431773
Residual  3.0  0.000165  0.000055       NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms  -0.0038 0.8686 -0.0348 0.0272  False
   Tdu    Tpr   -0.011  0.414  -0.042   0.02  False
   Tms    Tpr  -0.0071 0.6458 -0.0381 0.0239  False
---------------------------------------------------


### Tms vs MPV

In [20]:
Tms = np.array([0.10643238846709101, 0.10262629538960003])
MPV = np.mean(CHH_Tdu_genome_wide + CHH_Tpr_genome_wide)

t_statistic, p_value = stats.ttest_1samp(Tms, MPV)

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: 0.8636066538677756
P-Value: 0.5465099016975564
