In [1]:
from scipy.stats import bartlett
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import scipy.stats as stats
import numpy as np

## The following test used the arcsine square root transformed data
## The methylation levels are from excel files: CG/CHG/CHH_anova_new_LINE_DNA.xlsx

## CG context; DNA; upstream

In [2]:
CG_Tdu_DNA_upstream = [0.892359313, 0.876552317]
CG_Tpr_DNA_upstream = [0.823259096, 0.847373947]
CG_Tms_DNA_upstream = [0.856445535, 0.845975067]

Trans_CG_Tdu_DNA_upstream = np.arcsin(np.sqrt(CG_Tdu_DNA_upstream)).tolist()
Trans_CG_Tpr_DNA_upstream = np.arcsin(np.sqrt(CG_Tpr_DNA_upstream)).tolist()
Trans_CG_Tms_DNA_upstream = np.arcsin(np.sqrt(CG_Tms_DNA_upstream)).tolist()

### Variance homo test

In [3]:
statistic, p_value = bartlett(Trans_CG_Tdu_DNA_upstream, Trans_CG_Tpr_DNA_upstream, Trans_CG_Tms_DNA_upstream)
print(statistic, p_value)

0.39692414795633096 0.8199908691422889


### ANOVA and post hoc Tukey

In [4]:
CG_DNA_upstream = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CG_Tdu_DNA_upstream + Trans_CG_Tpr_DNA_upstream + Trans_CG_Tms_DNA_upstream})
model = ols('methylation ~ species', data=CG_DNA_upstream).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CG_DNA_upstream['methylation'], CG_DNA_upstream['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq         F    PR(>F)
species   2.0  0.005293  0.002646  8.416933  0.058826
Residual  3.0  0.000943  0.000314       NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms  -0.0493 0.1338 -0.1234 0.0248  False
   Tdu    Tpr   -0.071 0.0558 -0.1451 0.0031  False
   Tms    Tpr  -0.0217 0.5194 -0.0958 0.0524  False
---------------------------------------------------


## CG context; DNA; body

In [5]:
CG_Tdu_DNA_body = [0.940752831, 0.940052927]
CG_Tpr_DNA_body = [0.907191895, 0.914563048]
CG_Tms_DNA_body = [0.925654546, 0.925261758]

Trans_CG_Tdu_DNA_body = np.arcsin(np.sqrt(CG_Tdu_DNA_body)).tolist()
Trans_CG_Tpr_DNA_body = np.arcsin(np.sqrt(CG_Tpr_DNA_body)).tolist()
Trans_CG_Tms_DNA_body = np.arcsin(np.sqrt(CG_Tms_DNA_body)).tolist()

In [6]:
statistic, p_value = bartlett(Trans_CG_Tdu_DNA_body, Trans_CG_Tpr_DNA_body, Trans_CG_Tms_DNA_body)
print(statistic, p_value)

4.703095436886438 0.0952216717705572


In [7]:
CG_DNA_body = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CG_Tdu_DNA_body + Trans_CG_Tpr_DNA_body + Trans_CG_Tms_DNA_body})
model = ols('methylation ~ species', data=CG_DNA_body).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CG_DNA_body['methylation'], CG_DNA_body['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.003194  0.001597  56.313246  0.004179
Residual  3.0  0.000085  0.000028        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0299 0.0226 -0.0522 -0.0077   True
   Tdu    Tpr  -0.0565 0.0037 -0.0787 -0.0342   True
   Tms    Tpr  -0.0266 0.0313 -0.0488 -0.0043   True
----------------------------------------------------


In [8]:
t_statistic, p_value = stats.ttest_1samp(CG_Tms_DNA_body, np.mean(CG_Tdu_DNA_body + CG_Tpr_DNA_body))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -0.9268269397223964
P-Value: 0.5241646209166707


## CG context; DNA; downstream

In [9]:
CG_Tdu_DNA_down = [0.899022624, 0.882461451]
CG_Tpr_DNA_down = [0.832479779, 0.853148736]
CG_Tms_DNA_down = [0.862705425, 0.854376165]

Trans_CG_Tdu_DNA_down = np.arcsin(np.sqrt(CG_Tdu_DNA_down)).tolist()
Trans_CG_Tpr_DNA_down = np.arcsin(np.sqrt(CG_Tpr_DNA_down)).tolist()
Trans_CG_Tms_DNA_down = np.arcsin(np.sqrt(CG_Tms_DNA_down)).tolist()

In [10]:
statistic, p_value = bartlett(Trans_CG_Tdu_DNA_down, Trans_CG_Tpr_DNA_down, Trans_CG_Tms_DNA_down)
print(statistic, p_value)

0.5022590966904905 0.7779215865765629


In [11]:
CG_DNA_down = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CG_Tdu_DNA_down + Trans_CG_Tpr_DNA_down + Trans_CG_Tms_DNA_down})
model = ols('methylation ~ species', data=CG_DNA_down).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CG_DNA_down['methylation'], CG_DNA_down['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq         F    PR(>F)
species   2.0  0.005258  0.002629  9.529145  0.050156
Residual  3.0  0.000828  0.000276       NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0489 0.1177 -0.1183  0.0205  False
   Tdu    Tpr  -0.0708 0.0475 -0.1402 -0.0014   True
   Tms    Tpr  -0.0219 0.4772 -0.0913  0.0475  False
----------------------------------------------------


## CHG; DNA; upstream

In [12]:
CHG_Tdu_DNA_upstream = [0.732091786, 0.719637537]
CHG_Tpr_DNA_upstream = [0.649912599, 0.676107095]
CHG_Tms_DNA_upstream = [0.677726007, 0.666931299]

Trans_CHG_Tdu_DNA_upstream = np.arcsin(np.sqrt(CHG_Tdu_DNA_upstream)).tolist()
Trans_CHG_Tpr_DNA_upstream = np.arcsin(np.sqrt(CHG_Tpr_DNA_upstream)).tolist()
Trans_CHG_Tms_DNA_upstream = np.arcsin(np.sqrt(CHG_Tms_DNA_upstream)).tolist()

In [13]:
statistic, p_value = bartlett(Trans_CHG_Tdu_DNA_upstream, Trans_CHG_Tpr_DNA_upstream, Trans_CHG_Tms_DNA_upstream)
print(statistic, p_value)

0.6225316161068299 0.7325191405025384


In [14]:
CHG_DNA_upstream = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHG_Tdu_DNA_upstream + Trans_CHG_Tpr_DNA_upstream + Trans_CHG_Tms_DNA_upstream})
model = ols('methylation ~ species', data=CHG_DNA_upstream).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHG_DNA_upstream['methylation'], CHG_DNA_upstream['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.005447  0.002724  14.921237  0.027608
Residual  3.0  0.000548  0.000183        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0584 0.0457 -0.1149  -0.002   True
   Tdu    Tpr  -0.0683 0.0302 -0.1247 -0.0118   True
   Tms    Tpr  -0.0098 0.7659 -0.0663  0.0466  False
----------------------------------------------------


In [16]:
t_statistic, p_value = stats.ttest_1samp(CHG_Tms_DNA_upstream, np.mean(CHG_Tdu_DNA_upstream + CHG_Tpr_DNA_upstream))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -4.096192551016675
P-Value: 0.15243578365125482


## CHG; DNA; body

In [17]:
CHG_Tdu_DNA_body = [0.768672099, 0.771243756]
CHG_Tpr_DNA_body = [0.71515398, 0.723998008]
CHG_Tms_DNA_body = [0.730676782, 0.732300465]

Trans_CHG_Tdu_DNA_body = np.arcsin(np.sqrt(CHG_Tdu_DNA_body)).tolist()
Trans_CHG_Tpr_DNA_body = np.arcsin(np.sqrt(CHG_Tpr_DNA_body)).tolist()
Trans_CHG_Tms_DNA_body = np.arcsin(np.sqrt(CHG_Tms_DNA_body)).tolist()

In [18]:
statistic, p_value = bartlett(Trans_CHG_Tdu_DNA_body, Trans_CHG_Tpr_DNA_body, Trans_CHG_Tms_DNA_body)
print(statistic, p_value)

1.9222266227429134 0.3824668441737024


In [19]:
CHG_DNA_body = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHG_Tdu_DNA_body + Trans_CHG_Tpr_DNA_body + Trans_CHG_Tms_DNA_body})
model = ols('methylation ~ species', data=CHG_DNA_body).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHG_DNA_body['methylation'], CHG_DNA_body['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq           F    PR(>F)
species   2.0  0.003668  0.001834  100.400676  0.001786
Residual  3.0  0.000055  0.000018         NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0445 0.0039 -0.0624 -0.0266   True
   Tdu    Tpr  -0.0578 0.0018 -0.0757   -0.04   True
   Tms    Tpr  -0.0133 0.1029 -0.0312  0.0045  False
----------------------------------------------------


### Tms vs MPV

In [20]:
t_statistic, p_value = stats.ttest_1samp(CHG_Tms_DNA_body, np.mean(CHG_Tdu_DNA_body + CHG_Tpr_DNA_body))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -16.35582468991742
P-Value: 0.03887473128106246


In [21]:
t_statistic, p_value = stats.ttest_1samp(CHG_Tms_DNA_body, np.mean(CHG_Tdu_DNA_body + CHG_Tpr_DNA_body), alternative='less')

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -16.35582468991742
P-Value: 0.01943736564053123


## CHG; DNA; downstream

In [22]:
CHG_Tdu_DNA_down = [0.740366581, 0.726231438]
CHG_Tpr_DNA_down = [0.653732997, 0.677189013]
CHG_Tms_DNA_down = [0.681464518, 0.672751555]

Trans_CHG_Tdu_DNA_down = np.arcsin(np.sqrt(CHG_Tdu_DNA_down)).tolist()
Trans_CHG_Tpr_DNA_down = np.arcsin(np.sqrt(CHG_Tpr_DNA_down)).tolist()
Trans_CHG_Tms_DNA_down = np.arcsin(np.sqrt(CHG_Tms_DNA_down)).tolist()

In [23]:
statistic, p_value = bartlett(Trans_CHG_Tdu_DNA_down, Trans_CHG_Tpr_DNA_down, Trans_CHG_Tms_DNA_down)
print(statistic, p_value)

0.6040801220114028 0.7393084468563671


In [24]:
CHG_DNA_down = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHG_Tdu_DNA_down + Trans_CHG_Tpr_DNA_down + Trans_CHG_Tms_DNA_down})
model = ols('methylation ~ species', data=CHG_DNA_down).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHG_DNA_down['methylation'], CHG_DNA_down['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.006297  0.003148  19.671891  0.018858
Residual  3.0  0.000480  0.000160        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0617 0.0332 -0.1146 -0.0088   True
   Tdu    Tpr  -0.0741 0.0202 -0.1269 -0.0212   True
   Tms    Tpr  -0.0124 0.6379 -0.0652  0.0405  False
----------------------------------------------------


In [25]:
t_statistic, p_value = stats.ttest_1samp(CHG_Tms_DNA_down, np.mean(CHG_Tdu_DNA_down + CHG_Tpr_DNA_down))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -5.112375835866638
P-Value: 0.1229725733454722


## CHH; DNA; upstream

In [26]:
CHH_Tdu_DNA_upstream = [0.120363014, 0.138694359]
CHH_Tpr_DNA_upstream = [0.107084651, 0.117643478]
CHH_Tms_DNA_upstream = [0.123793978, 0.117858304]

Trans_CHH_Tdu_DNA_upstream = np.arcsin(np.sqrt(CHH_Tdu_DNA_upstream)).tolist()
Trans_CHH_Tpr_DNA_upstream = np.arcsin(np.sqrt(CHH_Tpr_DNA_upstream)).tolist()
Trans_CHH_Tms_DNA_upstream = np.arcsin(np.sqrt(CHH_Tms_DNA_upstream)).tolist()

In [27]:
statistic, p_value = bartlett(Trans_CHH_Tdu_DNA_upstream, Trans_CHH_Tpr_DNA_upstream, Trans_CHH_Tms_DNA_upstream)
print(statistic, p_value)

0.7410459764694506 0.6903731791563299


In [28]:
CHH_DNA_upstream = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHH_Tdu_DNA_upstream + Trans_CHH_Tpr_DNA_upstream + Trans_CHH_Tms_DNA_upstream})
model = ols('methylation ~ species', data=CHH_DNA_upstream).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHH_DNA_upstream['methylation'], CHH_DNA_upstream['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq        F    PR(>F)
species   2.0  0.000687  0.000344  1.86016  0.298261
Residual  3.0  0.000554  0.000185      NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms   -0.013 0.6491 -0.0698 0.0438  False
   Tdu    Tpr  -0.0262 0.2758  -0.083 0.0306  False
   Tms    Tpr  -0.0132 0.6387   -0.07 0.0436  False
---------------------------------------------------


## CHH; DNA; body

In [29]:
CHH_Tdu_DNA_body = [0.130368364, 0.156397682]
CHH_Tpr_DNA_body = [0.123949954, 0.134141027]
CHH_Tms_DNA_body = [0.139464909, 0.134059826]

Trans_CHH_Tdu_DNA_body = np.arcsin(np.sqrt(CHH_Tdu_DNA_body)).tolist()
Trans_CHH_Tpr_DNA_body = np.arcsin(np.sqrt(CHH_Tpr_DNA_body)).tolist()
Trans_CHH_Tms_DNA_body = np.arcsin(np.sqrt(CHH_Tms_DNA_body)).tolist()

In [30]:
statistic, p_value = bartlett(Trans_CHH_Tdu_DNA_body, Trans_CHH_Tpr_DNA_body, Trans_CHH_Tms_DNA_body)
print(statistic, p_value)

1.5059602768749236 0.47096093053530563


In [31]:
CHH_DNA_body = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHH_Tdu_DNA_body + Trans_CHH_Tpr_DNA_body + Trans_CHH_Tms_DNA_body})
model = ols('methylation ~ species', data=CHH_DNA_body).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHH_DNA_body['methylation'], CHH_DNA_body['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq         F    PR(>F)
species   2.0  0.000427  0.000213  0.764463  0.539125
Residual  3.0  0.000837  0.000279       NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms  -0.0092 0.8534  -0.079 0.0606  False
   Tdu    Tpr  -0.0206 0.5142 -0.0904 0.0492  False
   Tms    Tpr  -0.0114 0.7888 -0.0812 0.0584  False
---------------------------------------------------


## CHH; DNA; downstream

In [32]:
CHH_Tdu_DNA_down = [0.121291693, 0.139360785]
CHH_Tpr_DNA_down = [0.106447767, 0.11697079]
CHH_Tms_DNA_down = [0.124131386, 0.11830005]

Trans_CHH_Tdu_DNA_down = np.arcsin(np.sqrt(CHH_Tdu_DNA_down)).tolist()
Trans_CHH_Tpr_DNA_down = np.arcsin(np.sqrt(CHH_Tpr_DNA_down)).tolist()
Trans_CHH_Tms_DNA_down = np.arcsin(np.sqrt(CHH_Tms_DNA_down)).tolist()

In [33]:
statistic, p_value = bartlett(Trans_CHH_Tdu_DNA_down, Trans_CHH_Tpr_DNA_down, Trans_CHH_Tms_DNA_down)
print(statistic, p_value)

0.7380361687906523 0.6914129065529105


In [34]:
CHH_DNA_down = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHH_Tdu_DNA_down + Trans_CHH_Tpr_DNA_down + Trans_CHH_Tms_DNA_down})
model = ols('methylation ~ species', data=CHH_DNA_down).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHH_DNA_down['methylation'], CHH_DNA_down['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df   sum_sq   mean_sq         F    PR(>F)
species   2.0  0.00081  0.000405  2.249901  0.252992
Residual  3.0  0.00054  0.000180       NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms  -0.0136 0.6204 -0.0696 0.0425  False
   Tdu    Tpr  -0.0284 0.2327 -0.0845 0.0276  False
   Tms    Tpr  -0.0149 0.5721 -0.0709 0.0412  False
---------------------------------------------------
