In [1]:
from scipy.stats import bartlett
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import scipy.stats as stats
import numpy as np

## The following test used the arcsine square root transformed data
## The methylation levels are from excel files: CG/CHG/CHH_anova_new_Copia_Gypsy.xlsx

## CG context; Gypsy; upstream

In [2]:
CG_Tdu_Gypsy_upstream = [0.913656164, 0.906735642]
CG_Tpr_Gypsy_upstream = [0.860383291, 0.872440118]
CG_Tms_Gypsy_upstream = [0.885644814, 0.882409619]

Trans_CG_Tdu_Gypsy_upstream = np.arcsin(np.sqrt(CG_Tdu_Gypsy_upstream)).tolist()
Trans_CG_Tpr_Gypsy_upstream = np.arcsin(np.sqrt(CG_Tpr_Gypsy_upstream)).tolist()
Trans_CG_Tms_Gypsy_upstream = np.arcsin(np.sqrt(CG_Tms_Gypsy_upstream)).tolist()

### Variance homo test

In [3]:
statistic, p_value = bartlett(Trans_CG_Tdu_Gypsy_upstream, Trans_CG_Tpr_Gypsy_upstream, Trans_CG_Tms_Gypsy_upstream)
print(statistic, p_value)

0.8912086795125518 0.6404371174449723


### ANOVA and post hoc Tukey

In [4]:
CG_Gypsy_upstream = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CG_Tdu_Gypsy_upstream + Trans_CG_Tpr_Gypsy_upstream + Trans_CG_Tms_Gypsy_upstream})
model = ols('methylation ~ species', data=CG_Gypsy_upstream).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CG_Gypsy_upstream['methylation'], CG_Gypsy_upstream['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.004962  0.002481  30.616278  0.010094
Residual  3.0  0.000243  0.000081        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0432 0.0347 -0.0808 -0.0056   True
   Tdu    Tpr  -0.0698 0.0091 -0.1074 -0.0322   True
   Tms    Tpr  -0.0266 0.1168 -0.0642   0.011  False
----------------------------------------------------


### Tms vs MPV

In [5]:
t_statistic, p_value = stats.ttest_1samp(CG_Tms_Gypsy_upstream, np.mean(CG_Tdu_Gypsy_upstream + CG_Tpr_Gypsy_upstream))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -2.6437894779140194
P-Value: 0.2302096752385309


## CG context; Gypsy; body

In [6]:
CG_Tdu_Gypsy_body = [0.926584645, 0.925598166]
CG_Tpr_Gypsy_body = [0.885020325, 0.893161071]
CG_Tms_Gypsy_body = [0.906023602, 0.906410912]

Trans_CG_Tdu_Gypsy_body = np.arcsin(np.sqrt(CG_Tdu_Gypsy_body)).tolist()
Trans_CG_Tpr_Gypsy_body = np.arcsin(np.sqrt(CG_Tpr_Gypsy_body)).tolist()
Trans_CG_Tms_Gypsy_body = np.arcsin(np.sqrt(CG_Tms_Gypsy_body)).tolist()

In [7]:
statistic, p_value = bartlett(Trans_CG_Tdu_Gypsy_body, Trans_CG_Tpr_Gypsy_body, Trans_CG_Tms_Gypsy_body)
print(statistic, p_value)

4.550759160365327 0.102757896170615


In [8]:
CG_Gypsy_body = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CG_Tdu_Gypsy_body + Trans_CG_Tpr_Gypsy_body + Trans_CG_Tms_Gypsy_body})
model = ols('methylation ~ species', data=CG_Gypsy_body).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CG_Gypsy_body['methylation'], CG_Gypsy_body['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.004133  0.002067  72.067461  0.002911
Residual  3.0  0.000086  0.000029        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0359 0.0138 -0.0583 -0.0135   True
   Tdu    Tpr  -0.0641 0.0026 -0.0865 -0.0418   True
   Tms    Tpr  -0.0282 0.0269 -0.0506 -0.0058   True
----------------------------------------------------


In [9]:
t_statistic, p_value = stats.ttest_1samp(CG_Tms_Gypsy_body, np.mean(CG_Tdu_Gypsy_body + CG_Tpr_Gypsy_body))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -7.094031912421556
P-Value: 0.0891527768523871


In [11]:
t_statistic, p_value = stats.ttest_1samp(CG_Tms_Gypsy_body, np.mean(CG_Tdu_Gypsy_body + CG_Tpr_Gypsy_body), alternative='less')

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -7.094031912421556
P-Value: 0.04457638842619355


### the result above means T. miscellus Gypsy body methylation level is significantly lower then MPV

In [12]:
t_statistic, p_value = stats.ttest_1samp(Trans_CG_Tms_Gypsy_body, np.arcsin(np.sqrt(np.mean(CG_Tdu_Gypsy_body + CG_Tpr_Gypsy_body))))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -7.11703738241522
P-Value: 0.0888683300564721


## CG context; Gypsy; downstream

In [13]:
CG_Tdu_Gypsy_down = [0.91409032, 0.907505183]
CG_Tpr_Gypsy_down = [0.863781818, 0.875495954]
CG_Tms_Gypsy_down = [0.887787449, 0.885105909]

Trans_CG_Tdu_Gypsy_down = np.arcsin(np.sqrt(CG_Tdu_Gypsy_down)).tolist()
Trans_CG_Tpr_Gypsy_down = np.arcsin(np.sqrt(CG_Tpr_Gypsy_down)).tolist()
Trans_CG_Tms_Gypsy_down = np.arcsin(np.sqrt(CG_Tms_Gypsy_down)).tolist()

In [14]:
statistic, p_value = bartlett(Trans_CG_Tdu_Gypsy_down, Trans_CG_Tpr_Gypsy_down, Trans_CG_Tms_Gypsy_down)
print(statistic, p_value)

1.0866681592296343 0.5808085586486501


In [15]:
CG_Gypsy_down = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CG_Tdu_Gypsy_down + Trans_CG_Tpr_Gypsy_down + Trans_CG_Tms_Gypsy_down})
model = ols('methylation ~ species', data=CG_Gypsy_down).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CG_Gypsy_down['methylation'], CG_Gypsy_down['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.004438  0.002219  29.323175  0.010735
Residual  3.0  0.000227  0.000076        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0405 0.0378 -0.0768 -0.0041   True
   Tdu    Tpr  -0.0661 0.0097 -0.1024 -0.0297   True
   Tms    Tpr  -0.0256 0.1176  -0.062  0.0107  False
----------------------------------------------------


In [16]:
t_statistic, p_value = stats.ttest_1samp(CG_Tms_Gypsy_down, np.mean(CG_Tdu_Gypsy_down + CG_Tpr_Gypsy_down))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -2.813040081445879
P-Value: 0.21744059431165902


## CHG; Gypsy; upstream

In [17]:
CHG_Tdu_Gypsy_upstream = [0.740215078, 0.728185381]
CHG_Tpr_Gypsy_upstream = [0.671559782, 0.688658726]
CHG_Tms_Gypsy_upstream = [0.689155175, 0.683644349]

Trans_CHG_Tdu_Gypsy_upstream = np.arcsin(np.sqrt(CHG_Tdu_Gypsy_upstream)).tolist()
Trans_CHG_Tpr_Gypsy_upstream = np.arcsin(np.sqrt(CHG_Tpr_Gypsy_upstream)).tolist()
Trans_CHG_Tms_Gypsy_upstream = np.arcsin(np.sqrt(CHG_Tms_Gypsy_upstream)).tolist()

In [18]:
statistic, p_value = bartlett(Trans_CHG_Tdu_Gypsy_upstream, Trans_CHG_Tpr_Gypsy_upstream, Trans_CHG_Tms_Gypsy_upstream)
print(statistic, p_value)

0.7389935324042115 0.691082018974821


In [19]:
CHG_Gypsy_upstream = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHG_Tdu_Gypsy_upstream + Trans_CHG_Tpr_Gypsy_upstream + Trans_CHG_Tms_Gypsy_upstream})
model = ols('methylation ~ species', data=CHG_Gypsy_upstream).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHG_Gypsy_upstream['methylation'], CHG_Gypsy_upstream['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.004244  0.002122  22.869324  0.015271
Residual  3.0  0.000278  0.000093        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0527 0.0243  -0.093 -0.0125   True
   Tdu    Tpr  -0.0595 0.0174 -0.0997 -0.0192   True
   Tms    Tpr  -0.0067 0.7809  -0.047  0.0335  False
----------------------------------------------------


### Tms vs MPV

In [20]:
t_statistic, p_value = stats.ttest_1samp(CHG_Tms_Gypsy_upstream, np.mean(CHG_Tdu_Gypsy_upstream + CHG_Tpr_Gypsy_upstream))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -7.532438785038786
P-Value: 0.08402573362864017


In [22]:
t_statistic, p_value = stats.ttest_1samp(CHG_Tms_Gypsy_upstream, np.mean(CHG_Tdu_Gypsy_upstream + CHG_Tpr_Gypsy_upstream), alternative='less')

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -7.532438785038786
P-Value: 0.042012866814320086


### Tms vs MPV transformed

In [21]:
t_statistic, p_value = stats.ttest_1samp(Trans_CHG_Tms_Gypsy_upstream, np.arcsin(np.sqrt(np.mean(CHG_Tdu_Gypsy_upstream + CHG_Tpr_Gypsy_upstream))))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -7.602733392748446
P-Value: 0.0832577033489889


## CHG; Gypsy; body

In [23]:
CHG_Tdu_Gypsy_body = [0.742895807, 0.74005215]
CHG_Tpr_Gypsy_body = [0.686054372, 0.697535611]
CHG_Tms_Gypsy_body = [0.701892291, 0.698806131]

Trans_CHG_Tdu_Gypsy_body = np.arcsin(np.sqrt(CHG_Tdu_Gypsy_body)).tolist()
Trans_CHG_Tpr_Gypsy_body = np.arcsin(np.sqrt(CHG_Tpr_Gypsy_body)).tolist()
Trans_CHG_Tms_Gypsy_body = np.arcsin(np.sqrt(CHG_Tms_Gypsy_body)).tolist()

In [24]:
statistic, p_value = bartlett(Trans_CHG_Tdu_Gypsy_body, Trans_CHG_Tpr_Gypsy_body, Trans_CHG_Tms_Gypsy_body)
print(statistic, p_value)

1.6603097045424706 0.43598176832697055


In [25]:
CHG_Gypsy_body = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHG_Tdu_Gypsy_body + Trans_CHG_Tpr_Gypsy_body + Trans_CHG_Tms_Gypsy_body})
model = ols('methylation ~ species', data=CHG_Gypsy_body).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHG_Gypsy_body['methylation'], CHG_Gypsy_body['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.003488  0.001744  59.300256  0.003875
Residual  3.0  0.000088  0.000029        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0459 0.0071 -0.0685 -0.0232   True
   Tdu    Tpr  -0.0552 0.0042 -0.0778 -0.0325   True
   Tms    Tpr  -0.0093 0.3353 -0.0319  0.0134  False
----------------------------------------------------


### Tms vs MPV

In [26]:
t_statistic, p_value = stats.ttest_1samp(CHG_Tms_Gypsy_body, np.mean(CHG_Tdu_Gypsy_body + CHG_Tpr_Gypsy_body))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -10.553745755242947
P-Value: 0.06014212989726989


In [27]:
t_statistic, p_value = stats.ttest_1samp(CHG_Tms_Gypsy_body, np.mean(CHG_Tdu_Gypsy_body + CHG_Tpr_Gypsy_body), alternative='less')

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -10.553745755242947
P-Value: 0.030071064948634946


## CHG; Gypsy; downstream

In [28]:
CHG_Tdu_Gypsy_down = [0.748681831, 0.739122509]
CHG_Tpr_Gypsy_down = [0.679376641, 0.696385177]
CHG_Tms_Gypsy_down = [0.698080503, 0.693155135]

Trans_CHG_Tdu_Gypsy_down = np.arcsin(np.sqrt(CHG_Tdu_Gypsy_down)).tolist()
Trans_CHG_Tpr_Gypsy_down = np.arcsin(np.sqrt(CHG_Tpr_Gypsy_down)).tolist()
Trans_CHG_Tms_Gypsy_down = np.arcsin(np.sqrt(CHG_Tms_Gypsy_down)).tolist()

In [29]:
statistic, p_value = bartlett(Trans_CHG_Tdu_Gypsy_down, Trans_CHG_Tpr_Gypsy_down, Trans_CHG_Tms_Gypsy_down)
print(statistic, p_value)

0.8985771197540879 0.6380819472774413


In [30]:
CHG_Gypsy_down = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHG_Tdu_Gypsy_down + Trans_CHG_Tpr_Gypsy_down + Trans_CHG_Tms_Gypsy_down})
model = ols('methylation ~ species', data=CHG_Gypsy_down).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHG_Gypsy_down['methylation'], CHG_Gypsy_down['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq          F    PR(>F)
species   2.0  0.004553  0.002276  28.134532  0.011388
Residual  3.0  0.000243  0.000081        NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Tdu    Tms  -0.0538  0.019 -0.0914 -0.0162   True
   Tdu    Tpr  -0.0622 0.0127 -0.0997 -0.0246   True
   Tms    Tpr  -0.0083 0.6626 -0.0459  0.0292  False
----------------------------------------------------


In [31]:
t_statistic, p_value = stats.ttest_1samp(CHG_Tms_Gypsy_down, np.mean(CHG_Tdu_Gypsy_down + CHG_Tpr_Gypsy_down))

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -8.232367814953088
P-Value: 0.0769542893982327


In [32]:
t_statistic, p_value = stats.ttest_1samp(CHG_Tms_Gypsy_down, np.mean(CHG_Tdu_Gypsy_down + CHG_Tpr_Gypsy_down), alternative='less')

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -8.232367814953088
P-Value: 0.03847714469911635


## CHH; Gypsy; upstream

In [33]:
CHH_Tdu_Gypsy_upstream = [0.09850718, 0.116944827]
CHH_Tpr_Gypsy_upstream = [0.095574438, 0.101768258]
CHH_Tms_Gypsy_upstream = [0.107710501, 0.104652609]

Trans_CHH_Tdu_Gypsy_upstream = np.arcsin(np.sqrt(CHH_Tdu_Gypsy_upstream)).tolist()
Trans_CHH_Tpr_Gypsy_upstream = np.arcsin(np.sqrt(CHH_Tpr_Gypsy_upstream)).tolist()
Trans_CHH_Tms_Gypsy_upstream = np.arcsin(np.sqrt(CHH_Tms_Gypsy_upstream)).tolist()

In [34]:
statistic, p_value = bartlett(Trans_CHH_Tdu_Gypsy_upstream, Trans_CHH_Tpr_Gypsy_upstream, Trans_CHH_Tms_Gypsy_upstream)
print(statistic, p_value)

1.9455011680049032 0.3780417710659126


In [35]:
CHH_Gypsy_upstream = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHH_Tdu_Gypsy_upstream + Trans_CHH_Tpr_Gypsy_upstream + Trans_CHH_Tms_Gypsy_upstream})
model = ols('methylation ~ species', data=CHH_Gypsy_upstream).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHH_Gypsy_upstream['methylation'], CHH_Gypsy_upstream['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq         F   PR(>F)
species   2.0  0.000249  0.000124  0.733564  0.55035
Residual  3.0  0.000509  0.000170       NaN      NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms  -0.0022 0.9841 -0.0567 0.0522  False
   Tdu    Tpr  -0.0146 0.5652 -0.0691 0.0398  False
   Tms    Tpr  -0.0124 0.6497 -0.0669  0.042  False
---------------------------------------------------


## CHH; Gypsy; body

In [36]:
CHH_Tdu_Gypsy_body = [0.091892474, 0.111690271]
CHH_Tpr_Gypsy_body = [0.093399427, 0.097345653]
CHH_Tms_Gypsy_body = [0.104367164, 0.102952177]

Trans_CHH_Tdu_Gypsy_body = np.arcsin(np.sqrt(CHH_Tdu_Gypsy_body)).tolist()
Trans_CHH_Tpr_Gypsy_body = np.arcsin(np.sqrt(CHH_Tpr_Gypsy_body)).tolist()
Trans_CHH_Tms_Gypsy_body = np.arcsin(np.sqrt(CHH_Tms_Gypsy_body)).tolist()

In [37]:
statistic, p_value = bartlett(Trans_CHH_Tdu_Gypsy_body, Trans_CHH_Tpr_Gypsy_body, Trans_CHH_Tms_Gypsy_body)
print(statistic, p_value)

3.674068505277699 0.15928913761212687


In [38]:
CHH_Gypsy_body = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHH_Tdu_Gypsy_body + Trans_CHH_Tpr_Gypsy_body + Trans_CHH_Tms_Gypsy_body})
model = ols('methylation ~ species', data=CHH_Gypsy_body).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHH_Gypsy_body['methylation'], CHH_Gypsy_body['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq         F    PR(>F)
species   2.0  0.000208  0.000104  0.555878  0.623219
Residual  3.0  0.000562  0.000187       NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms   0.0034 0.9664 -0.0538 0.0606  False
   Tdu    Tpr  -0.0104 0.7486 -0.0676 0.0468  False
   Tms    Tpr  -0.0139 0.6198 -0.0711 0.0434  False
---------------------------------------------------


## CHH; Gypsy; downstream

In [39]:
CHH_Tdu_Gypsy_down = [0.098936251, 0.116702487]
CHH_Tpr_Gypsy_down = [0.095376117, 0.101562317]
CHH_Tms_Gypsy_down = [0.107371233, 0.104423806]

Trans_CHH_Tdu_Gypsy_down = np.arcsin(np.sqrt(CHH_Tdu_Gypsy_down)).tolist()
Trans_CHH_Tpr_Gypsy_down = np.arcsin(np.sqrt(CHH_Tpr_Gypsy_down)).tolist()
Trans_CHH_Tms_Gypsy_down = np.arcsin(np.sqrt(CHH_Tms_Gypsy_down)).tolist()

In [40]:
statistic, p_value = bartlett(Trans_CHH_Tdu_Gypsy_down, Trans_CHH_Tpr_Gypsy_down, Trans_CHH_Tms_Gypsy_down)
print(statistic, p_value)

1.9084276240832034 0.3851147981830022


In [41]:
CHH_Gypsy_down = pd.DataFrame({'species': ['Tdu', 'Tdu', 'Tpr', 'Tpr', 'Tms', 'Tms'],
                          'methylation': Trans_CHH_Tdu_Gypsy_down + Trans_CHH_Tpr_Gypsy_down + Trans_CHH_Tms_Gypsy_down})
model = ols('methylation ~ species', data=CHH_Gypsy_down).fit()
anova_table = sm.stats.anova_lm(model)

posthoc = pairwise_tukeyhsd(CHH_Gypsy_down['methylation'], CHH_Gypsy_down['species'])

print("ANOVA results:\n", anova_table)
print("\nPost hoc test results:\n", posthoc)

ANOVA results:
            df    sum_sq   mean_sq         F    PR(>F)
species   2.0  0.000259  0.000130  0.816609  0.521024
Residual  3.0  0.000476  0.000159       NaN       NaN

Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   Tdu    Tms  -0.0029 0.9723 -0.0555 0.0498  False
   Tdu    Tpr  -0.0152 0.5285 -0.0678 0.0375  False
   Tms    Tpr  -0.0123 0.6382 -0.0649 0.0404  False
---------------------------------------------------
