In [1]:
from scipy.stats import bartlett
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import scipy.stats as stats
import numpy as np

## The following test used the arcsine square root transformed data
## The methylation levels are from excel files: CG/CHG/CHH_anova_new_Copia_Gypsy.xlsx

## CG context; Copia; upstream

In [2]:
CG_Tdu_Copia_upstream = [0.903119136, 0.891431991]
CG_Tpr_Copia_upstream = [0.83669493, 0.853086323]
CG_Tms_Copia_upstream = [0.868074974, 0.862105898]

Trans_CG_Tdu_Copia_upstream = np.arcsin(np.sqrt(CG_Tdu_Copia_upstream)).tolist()
Trans_CG_Tpr_Copia_upstream = np.arcsin(np.sqrt(CG_Tpr_Copia_upstream)).tolist()
Trans_CG_Tms_Copia_upstream = np.arcsin(np.sqrt(CG_Tms_Copia_upstream)).tolist()

### Variance homo test

In [3]:
statistic, p_value = bartlett(Trans_CG_Tdu_Copia_upstream, Trans_CG_Tpr_Copia_upstream, Trans_CG_Tms_Copia_upstream)
print(statistic, p_value)

0.5636695780044587 0.7543983091354969


### ANOVA and post hoc Tukey

In [4]:
CG_Copia_upstream = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CG_Tdu_Copia_upstream + Trans_CG_Tpr_Copia_upstream + Trans_CG_Tms_Copia_upstream})
model = ols('methylation ~ species', data=CG_Copia_upstream).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CG_Copia_upstream['methylation'], CG_Copia_upstream['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df   sum_sq  mean_sq          F    PR(>F)
species   2.0  0.00632  0.00316  19.751886  0.018752
Residual  3.0  0.00048  0.00016        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0499 0.0578 -0.1028  0.0029  False
   Tdu    Tpr  -0.0785 0.0171 -0.1314 -0.0257   True
   Tms    Tpr  -0.0286 0.2059 -0.0815  0.0243  False
----------------------------------------------------


### Tms vs MPV

In [5]:
t_statistic, p_value = stats.ttest_1samp(CG_Tms_Copia_upstream, np.mean(CG_Tdu_Copia_upstream + CG_Tpr_Copia_upstream))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -2.0079017254932086
P-Value: 0.2941643270783814


## CG context; Copia; body

In [2]:
CG_Tdu_Copia_body = [0.928795993, 0.926723414]
CG_Tpr_Copia_body = [0.875911121, 0.883135738]
CG_Tms_Copia_body = [0.899822095, 0.900261217]

Trans_CG_Tdu_Copia_body = np.arcsin(np.sqrt(CG_Tdu_Copia_body)).tolist()
Trans_CG_Tpr_Copia_body = np.arcsin(np.sqrt(CG_Tpr_Copia_body)).tolist()
Trans_CG_Tms_Copia_body = np.arcsin(np.sqrt(CG_Tms_Copia_body)).tolist()

In [3]:
statistic, p_value = bartlett(Trans_CG_Tdu_Copia_body, Trans_CG_Tpr_Copia_body, Trans_CG_Tms_Copia_body)
print(statistic, p_value)

3.1566456795611804 0.20632084129801867


In [4]:
CG_Copia_body = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CG_Tdu_Copia_body + Trans_CG_Tpr_Copia_body + Trans_CG_Tms_Copia_body})
model = ols('methylation ~ species', data=CG_Copia_body).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CG_Copia_body['methylation'], CG_Copia_body['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq           F   PR(>F)
species   2.0  0.006871  0.003436  147.525697  0.00101
Residual  3.0  0.000070  0.000023         NaN      NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0496  0.004 -0.0697 -0.0294   True
   Tdu    Tpr  -0.0823 0.0009 -0.1025 -0.0622   True
   Tms    Tpr  -0.0328 0.0133 -0.0529 -0.0126   True
----------------------------------------------------


In [9]:
t_statistic, p_value = stats.ttest_1samp(CG_Tms_Copia_body, np.mean(CG_Tdu_Copia_body + CG_Tpr_Copia_body))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -16.39594691224954
P-Value: 0.038779837078097484


In [11]:
t_statistic, p_value = stats.ttest_1samp(CG_Tms_Copia_body, np.mean(CG_Tdu_Copia_body + CG_Tpr_Copia_body), alternative='less')

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -16.39594691224954
P-Value: 0.019389918539048742


### the result above means T. miscellus Copia body methylation level is significantly lower then MPV

In [10]:
t_statistic, p_value = stats.ttest_1samp(Trans_CG_Tms_Copia_body, np.arcsin(np.sqrt(np.mean(CG_Tdu_Copia_body + CG_Tpr_Copia_body))))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -16.529227845753947
P-Value: 0.03846790538391967


## CG context; Copia; downstream

In [12]:
CG_Tdu_Copia_down = [0.904247749, 0.892058444]
CG_Tpr_Copia_down = [0.837105468, 0.854459954]
CG_Tms_Copia_down = [0.868804366, 0.862680547]

Trans_CG_Tdu_Copia_down = np.arcsin(np.sqrt(CG_Tdu_Copia_down)).tolist()
Trans_CG_Tpr_Copia_down = np.arcsin(np.sqrt(CG_Tpr_Copia_down)).tolist()
Trans_CG_Tms_Copia_down = np.arcsin(np.sqrt(CG_Tms_Copia_down)).tolist()

In [13]:
statistic, p_value = bartlett(Trans_CG_Tdu_Copia_down, Trans_CG_Tpr_Copia_down, Trans_CG_Tms_Copia_down)
print(statistic, p_value)

0.5946439201861151 0.7428048203497813


In [14]:
CG_Copia_down = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CG_Tdu_Copia_down + Trans_CG_Tpr_Copia_down + Trans_CG_Tms_Copia_down})
model = ols('methylation ~ species', data=CG_Copia_down).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CG_Copia_down['methylation'], CG_Copia_down['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.006365  0.003183  17.933749  0.021444
Residual  3.0  0.000532  0.000177        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0504 0.0643 -0.1061  0.0052  False
   Tdu    Tpr  -0.0788 0.0196 -0.1344 -0.0231   True
   Tms    Tpr  -0.0283 0.2318  -0.084  0.0274  False
----------------------------------------------------


In [15]:
t_statistic, p_value = stats.ttest_1samp(CG_Tms_Copia_down, np.mean(CG_Tdu_Copia_down + CG_Tpr_Copia_down))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -2.0331911344864433
P-Value: 0.29099663997789593


## CHG; Copia; upstream

In [16]:
CHG_Tdu_Copia_upstream = [0.745737733, 0.731069643]
CHG_Tpr_Copia_upstream = [0.667292501, 0.688658192]
CHG_Tms_Copia_upstream = [0.689851464, 0.682041371]

Trans_CHG_Tdu_Copia_upstream = np.arcsin(np.sqrt(CHG_Tdu_Copia_upstream)).tolist()
Trans_CHG_Tpr_Copia_upstream = np.arcsin(np.sqrt(CHG_Tpr_Copia_upstream)).tolist()
Trans_CHG_Tms_Copia_upstream = np.arcsin(np.sqrt(CHG_Tms_Copia_upstream)).tolist()

In [17]:
statistic, p_value = bartlett(Trans_CHG_Tdu_Copia_upstream, Trans_CHG_Tpr_Copia_upstream, Trans_CHG_Tms_Copia_upstream)
print(statistic, p_value)

0.6013265434643725 0.7403270198150838


In [18]:
CHG_Copia_upstream = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHG_Tdu_Copia_upstream + Trans_CHG_Tpr_Copia_upstream + Trans_CHG_Tms_Copia_upstream})
model = ols('methylation ~ species', data=CHG_Copia_upstream).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHG_Copia_upstream['methylation'], CHG_Copia_upstream['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.005243  0.002622  18.035696  0.021276
Residual  3.0  0.000436  0.000145        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms   -0.058 0.0345 -0.1084 -0.0076   True
   Tdu    Tpr  -0.0665 0.0238 -0.1169 -0.0162   True
   Tms    Tpr  -0.0085 0.7771 -0.0589  0.0419  False
----------------------------------------------------


### Tms vs MPV

In [19]:
t_statistic, p_value = stats.ttest_1samp(CHG_Tms_Copia_upstream, np.mean(CHG_Tdu_Copia_upstream + CHG_Tpr_Copia_upstream))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -5.695988447256646
P-Value: 0.11063882746345653


### Tms vs MPV transformed

In [20]:
t_statistic, p_value = stats.ttest_1samp(Trans_CHG_Tms_Copia_upstream, np.arcsin(np.sqrt(np.mean(CHG_Tdu_Copia_upstream + CHG_Tpr_Copia_upstream))))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -5.752238808656239
P-Value: 0.1095782501682041


## CHG; Copia; body

In [21]:
CHG_Tdu_Copia_body = [0.821439146, 0.817313519]
CHG_Tpr_Copia_body = [0.763143268, 0.773038406]
CHG_Tms_Copia_body = [0.777663571, 0.776843105]

Trans_CHG_Tdu_Copia_body = np.arcsin(np.sqrt(CHG_Tdu_Copia_body)).tolist()
Trans_CHG_Tpr_Copia_body = np.arcsin(np.sqrt(CHG_Tpr_Copia_body)).tolist()
Trans_CHG_Tms_Copia_body = np.arcsin(np.sqrt(CHG_Tms_Copia_body)).tolist()

In [22]:
statistic, p_value = bartlett(Trans_CHG_Tdu_Copia_body, Trans_CHG_Tpr_Copia_body, Trans_CHG_Tms_Copia_body)
print(statistic, p_value)

2.6359506381539606 0.267676713623581


In [23]:
CHG_Copia_body = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHG_Tdu_Copia_body + Trans_CHG_Tpr_Copia_body + Trans_CHG_Tms_Copia_body})
model = ols('methylation ~ species', data=CHG_Copia_body).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHG_Copia_body['methylation'], CHG_Copia_body['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.004606  0.002303  82.672189  0.002379
Residual  3.0  0.000084  0.000028        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0526 0.0044 -0.0746 -0.0305   True
   Tdu    Tpr  -0.0635 0.0025 -0.0855 -0.0414   True
   Tms    Tpr  -0.0109 0.2439  -0.033  0.0111  False
----------------------------------------------------


### Tms vs MPV

In [24]:
t_statistic, p_value = stats.ttest_1samp(CHG_Tms_Copia_body, np.mean(CHG_Tdu_Copia_body + CHG_Tpr_Copia_body))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -40.172893818874954
P-Value: 0.01584372622316915


In [26]:
t_statistic, p_value = stats.ttest_1samp(CHG_Tms_Copia_body, np.mean(CHG_Tdu_Copia_body + CHG_Tpr_Copia_body), alternative='less')

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -40.172893818874954
P-Value: 0.007921863111584574


## CHG; Copia; downstream

In [27]:
CHG_Tdu_Copia_down = [0.74766884, 0.733640324]
CHG_Tpr_Copia_down = [0.667451824, 0.688657798]
CHG_Tms_Copia_down = [0.691833883, 0.684096469]

Trans_CHG_Tdu_Copia_down = np.arcsin(np.sqrt(CHG_Tdu_Copia_down)).tolist()
Trans_CHG_Tpr_Copia_down = np.arcsin(np.sqrt(CHG_Tpr_Copia_down)).tolist()
Trans_CHG_Tms_Copia_down = np.arcsin(np.sqrt(CHG_Tms_Copia_down)).tolist()

In [28]:
statistic, p_value = bartlett(Trans_CHG_Tdu_Copia_down, Trans_CHG_Tpr_Copia_down, Trans_CHG_Tms_Copia_down)
print(statistic, p_value)

0.604540325988146 0.739138350083279


In [29]:
CHG_Copia_down = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHG_Tdu_Copia_down + Trans_CHG_Tpr_Copia_down + Trans_CHG_Tms_Copia_down})
model = ols('methylation ~ species', data=CHG_Copia_down).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHG_Copia_down['methylation'], CHG_Copia_down['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.005524  0.002762  19.703346  0.018816
Residual  3.0  0.000421  0.000140        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0584 0.0322 -0.1079 -0.0089   True
   Tdu    Tpr   -0.069 0.0204 -0.1185 -0.0195   True
   Tms    Tpr  -0.0106  0.679 -0.0601  0.0389  False
----------------------------------------------------


In [30]:
t_statistic, p_value = stats.ttest_1samp(CHG_Tms_Copia_down, np.mean(CHG_Tdu_Copia_down + CHG_Tpr_Copia_down))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -5.528855118777368
P-Value: 0.11391343282853528


## CHH; Copia; upstream

In [31]:
CHH_Tdu_Copia_upstream = [0.106638489, 0.123647298]
CHH_Tpr_Copia_upstream = [0.097503003, 0.105718738]
CHH_Tms_Copia_upstream = [0.112049017, 0.108028287]

Trans_CHH_Tdu_Copia_upstream = np.arcsin(np.sqrt(CHH_Tdu_Copia_upstream)).tolist()
Trans_CHH_Tpr_Copia_upstream = np.arcsin(np.sqrt(CHH_Tpr_Copia_upstream)).tolist()
Trans_CHH_Tms_Copia_upstream = np.arcsin(np.sqrt(CHH_Tms_Copia_upstream)).tolist()

In [32]:
statistic, p_value = bartlett(Trans_CHH_Tdu_Copia_upstream, Trans_CHH_Tpr_Copia_upstream, Trans_CHH_Tms_Copia_upstream)
print(statistic, p_value)

1.1946481476905886 0.5502821821639055


In [33]:
CHH_Copia_upstream = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHH_Tdu_Copia_upstream + Trans_CHH_Tpr_Copia_upstream + Trans_CHH_Tms_Copia_upstream})
model = ols('methylation ~ species', data=CHH_Copia_upstream).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHH_Copia_upstream['methylation'], CHH_Copia_upstream['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq         F    PR(>F)
species   2.0  0.000479  0.000240  1.533776  0.347665
Residual  3.0  0.000468  0.000156       NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms  -0.0079 0.8154 -0.0601 0.0443  False
   Tdu    Tpr  -0.0216 0.3298 -0.0738 0.0306  False
   Tms    Tpr  -0.0137 0.5765  -0.066 0.0385  False
---------------------------------------------------


## CHH; Copia; body

In [34]:
CHH_Tdu_Copia_body = [0.095655382, 0.117189578]
CHH_Tpr_Copia_body = [0.094727815, 0.099611501]
CHH_Tms_Copia_body = [0.107353653, 0.105608699]

Trans_CHH_Tdu_Copia_body = np.arcsin(np.sqrt(CHH_Tdu_Copia_body)).tolist()
Trans_CHH_Tpr_Copia_body = np.arcsin(np.sqrt(CHH_Tpr_Copia_body)).tolist()
Trans_CHH_Tms_Copia_body = np.arcsin(np.sqrt(CHH_Tms_Copia_body)).tolist()

In [35]:
statistic, p_value = bartlett(Trans_CHH_Tdu_Copia_body, Trans_CHH_Tpr_Copia_body, Trans_CHH_Tms_Copia_body)
print(statistic, p_value)

3.325083190244443 0.1896563373981315


In [36]:
CHH_Copia_body = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHH_Tdu_Copia_body + Trans_CHH_Tpr_Copia_body + Trans_CHH_Tms_Copia_body})
model = ols('methylation ~ species', data=CHH_Copia_body).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHH_Copia_body['methylation'], CHH_Copia_body['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq         F    PR(>F)
species   2.0  0.000307  0.000154  0.710471  0.558997
Residual  3.0  0.000649  0.000216       NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms   0.0005 0.9994  -0.061 0.0619  False
   Tdu    Tpr  -0.0149 0.6181 -0.0764 0.0465  False
   Tms    Tpr  -0.0154 0.6018 -0.0769  0.046  False
---------------------------------------------------


## CHH; Copia; downstream

In [37]:
CHH_Tdu_Copia_down = [0.107737857, 0.12424714]
CHH_Tpr_Copia_down = [0.098153554, 0.106506922]
CHH_Tms_Copia_down = [0.112863317, 0.108802279]

Trans_CHH_Tdu_Copia_down = np.arcsin(np.sqrt(CHH_Tdu_Copia_down)).tolist()
Trans_CHH_Tpr_Copia_down = np.arcsin(np.sqrt(CHH_Tpr_Copia_down)).tolist()
Trans_CHH_Tms_Copia_down = np.arcsin(np.sqrt(CHH_Tms_Copia_down)).tolist()

In [38]:
statistic, p_value = bartlett(Trans_CHH_Tdu_Copia_down, Trans_CHH_Tpr_Copia_down, Trans_CHH_Tms_Copia_down)
print(statistic, p_value)

1.1220398911302822 0.5706267587076275


In [39]:
CHH_Copia_down = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHH_Tdu_Copia_down + Trans_CHH_Tpr_Copia_down + Trans_CHH_Tms_Copia_down})
model = ols('methylation ~ species', data=CHH_Copia_down).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHH_Copia_down['methylation'], CHH_Copia_down['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq         F    PR(>F)
species   2.0  0.000486  0.000243  1.624603  0.332617
Residual  3.0  0.000449  0.000150       NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms  -0.0079 0.8055  -0.059 0.0431  False
   Tdu    Tpr  -0.0218  0.315 -0.0729 0.0293  False
   Tms    Tpr  -0.0138 0.5618 -0.0649 0.0373  False
---------------------------------------------------
