In [4]:
from scipy.stats import bartlett
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import numpy as np

## The following analysis used the arcsine square root transformed data

## CG proportion

In [5]:
CG_Tdu_proportion = [0.4663, 0.4403]
CG_Tpr_proportion = [0.4548, 0.4489]
CG_Tms_proportion = [0.4455, 0.4477]

Trans_CG_Tdu_proportion = np.arcsin(np.sqrt(CG_Tdu_proportion)).tolist()
Trans_CG_Tpr_proportion = np.arcsin(np.sqrt(CG_Tpr_proportion)).tolist()
Trans_CG_Tms_proportion = np.arcsin(np.sqrt(CG_Tms_proportion)).tolist()

### Variance homo test

In [6]:
statistic, p_value = bartlett(Trans_CG_Tdu_proportion, Trans_CG_Tpr_proportion, Trans_CG_Tms_proportion)
print(statistic, p_value)

3.307853979524127 0.19129720948399365


### ANOVA and post hoc Tukey

In [7]:
CG_proportion = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CG_Tdu_proportion + Trans_CG_Tpr_proportion + Trans_CG_Tms_proportion})
model = ols('methylation ~ species', data=CG_proportion).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CG_proportion['methylation'], CG_proportion['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq         F    PR(>F)
species   2.0  0.000050  0.000025  0.207859  0.823112
Residual  3.0  0.000361  0.000120       NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms  -0.0067 0.8241 -0.0526 0.0391  False
   Tdu    Tpr  -0.0014 0.9906 -0.0473 0.0444  False
   Tms    Tpr   0.0053 0.8849 -0.0406 0.0511  False
---------------------------------------------------


## CHG proportion

In [8]:
CHG_Tdu_proportion = [0.3220, 0.3029]
CHG_Tpr_proportion = [0.3154, 0.3129]
CHG_Tms_proportion = [0.3029, 0.3042]

Trans_CHG_Tdu_proportion = np.arcsin(np.sqrt(CHG_Tdu_proportion)).tolist()
Trans_CHG_Tpr_proportion = np.arcsin(np.sqrt(CHG_Tpr_proportion)).tolist()
Trans_CHG_Tms_proportion = np.arcsin(np.sqrt(CHG_Tms_proportion)).tolist()

### Variance homo test

In [9]:
statistic, p_value = bartlett(Trans_CHG_Tdu_proportion, Trans_CHG_Tpr_proportion, Trans_CHG_Tms_proportion)
print(statistic, p_value)

4.290639326005211 0.11703062070452404


### ANOVA and post hoc Tukey

In [10]:
CHG_proportion = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHG_Tdu_proportion + Trans_CHG_Tpr_proportion + Trans_CHG_Tms_proportion})
model = ols('methylation ~ species', data=CHG_proportion).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHG_proportion['methylation'], CHG_proportion['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq         F    PR(>F)
species   2.0  0.000151  0.000076  1.047271  0.451881
Residual  3.0  0.000217  0.000072       NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms  -0.0096  0.563 -0.0451 0.0259  False
   Tdu    Tpr   0.0019 0.9738 -0.0337 0.0374  False
   Tms    Tpr   0.0115 0.4649 -0.0241  0.047  False
---------------------------------------------------


## CHH proportion

In [13]:
CHH_Tdu_proportion = [0.2116, 0.2568]
CHH_Tpr_proportion = [0.2298, 0.2381]
CHH_Tms_proportion = [0.2517, 0.2482]

Trans_CHH_Tdu_proportion = np.arcsin(np.sqrt(CHH_Tdu_proportion)).tolist()
Trans_CHH_Tpr_proportion = np.arcsin(np.sqrt(CHH_Tpr_proportion)).tolist()
Trans_CHH_Tms_proportion = np.arcsin(np.sqrt(CHH_Tms_proportion)).tolist()

### Variance homo test

In [14]:
statistic, p_value = bartlett(Trans_CHH_Tdu_proportion, Trans_CHH_Tpr_proportion, Trans_CHH_Tms_proportion)
print(statistic, p_value)

3.72052137056524 0.15563205409340222


### ANOVA and post hoc Tukey

In [15]:
CHH_proportion = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHH_Tdu_proportion + Trans_CHH_Tpr_proportion + Trans_CHH_Tms_proportion})
model = ols('methylation ~ species', data=CHH_proportion).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHH_proportion['methylation'], CHH_proportion['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq         F    PR(>F)
species   2.0  0.000469  0.000235  0.474826  0.661978
Residual  3.0  0.001483  0.000494       NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms   0.0188 0.7044 -0.0741 0.1117  False
   Tdu    Tpr   0.0001    1.0 -0.0928  0.093  False
   Tms    Tpr  -0.0187 0.7076 -0.1116 0.0742  False
---------------------------------------------------
