In [None]:
# unpaired two-sample t-test for VIM (Fig 1d)
from scipy.stats import ttest_ind

# Data for each group
null_group = [0.805535366, 0.998628396, 1.195836238]
tgfb_group = [8.988122799, 9.305088265, 11.3767757]

# Perform unpaired two-sample t-test (independent t-test)
t_stat, p_value = ttest_ind(null_group, tgfb_group, equal_var=False)  # Use Welch's t-test (unequal variances)

print(f"T-statistic: {t_stat:.4f}")
print(f"P-value: {p_value:.4e}")


T-statistic: -11.7371
P-value: 6.0980e-03


In [None]:
# unpaired two-sample t-test for CDH1 (Fig 1d)
from scipy.stats import ttest_ind

# Data for each group
null_group = [0.9229823, 1.024112983, 1.052904718]
tgfb_group = [0.31960788, 0.313030445, 0.296144847]

# Perform unpaired two-sample t-test (independent t-test)
t_stat, p_value = ttest_ind(null_group, tgfb_group, equal_var=False)  # Use Welch's t-test (unequal variances)

print(f"T-statistic: {t_stat:.4f}")
print(f"P-value: {p_value:.4e}")


T-statistic: 17.2556
P-value: 2.5404e-03


In [None]:
# Two-way anova for (Fig 2b) CD51/61
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create the dataframe
data = {
    'value': [
        1.809278351, 0.541237113, 0.649484536,   # miRNC -
        6.572164948,5.597938144, 10.15979381,   # miRNC +
        1.098544233, 0.849944009, 1.051511758,  # miR22 -
        1.945128779, 2.008958567, 2.116461366   # miR22 +
    ],
    'genotype': ['miRNC']*6 + ['miR22']*6,
    'treatment': ['-']*3 + ['+']*3 + ['-']*3 + ['+']*3
}

df = pd.DataFrame(data)

# Two-way ANOVA
model = ols('value ~ C(genotype) * C(treatment)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print("Two-way ANOVA results:")
print(anova_table)

# Create interaction group for post-hoc
df['group'] = df['genotype'] + df['treatment']

# Run Tukey HSD post hoc test
tukey = pairwise_tukeyhsd(
    endog=df['value'],
    groups=df['group'],
    alpha=0.05
)

print("\nTukey HSD post hoc test results:")
print(tukey.summary())

Two-way ANOVA results:
                             sum_sq   df          F    PR(>F)
C(genotype)               22.030534  1.0  14.007977  0.005683
C(treatment)              41.814997  1.0  26.587804  0.000868
C(genotype):C(treatment)  22.030534  1.0  14.007977  0.005683
Residual                  12.581708  8.0        NaN       NaN

Tukey HSD post hoc test results:
Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
miR22+ miR22-  -1.0235 0.7541 -4.3026  2.2555  False
miR22+ miRNC+   5.4198 0.0033  2.1407  8.6988   True
miR22+ miRNC-  -1.0235 0.7541 -4.3026  2.2555  False
miR22- miRNC+   6.4433 0.0011  3.1642  9.7224   True
miR22- miRNC-      0.0    1.0 -3.2791  3.2791  False
miRNC+ miRNC-  -6.4433 0.0011 -9.7224 -3.1642   True
----------------------------------------------------


In [None]:
# Two-way anova for (Fig S6a) EPCAM
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create the dataframe
data = {
    'value': [
        1.372863248, 1.388888889, 0.238247863,   # miRNC -
        0.729166667, 1.298076923, 0.172275641,   # miRNC +
        0.923076923, 1.065088757, 1.01183432,  # miR22 -
        0.829881657, 0.827662722, 0.75443787   # miR22 +
    ],
    'genotype': ['miRNC']*6 + ['miR22']*6,
    'treatment': ['-']*3 + ['+']*3 + ['-']*3 + ['+']*3
}

df = pd.DataFrame(data)

# Two-way ANOVA
model = ols('value ~ C(genotype) * C(treatment)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print("Two-way ANOVA results:")
print(anova_table)

# Create interaction group for post-hoc
df['group'] = df['genotype'] + df['treatment']

# Run Tukey HSD post hoc test
tukey = pairwise_tukeyhsd(
    endog=df['value'],
    groups=df['group'],
    alpha=0.05
)

print("\nTukey HSD post hoc test results:")
print(tukey.summary())

Two-way ANOVA results:
                            sum_sq   df         F    PR(>F)
C(genotype)               0.003762  1.0  0.019821  0.891518
C(treatment)              0.160661  1.0  0.846559  0.384432
C(genotype):C(treatment)  0.003762  1.0  0.019821  0.891518
Residual                  1.518246  8.0       NaN       NaN

Tukey HSD post hoc test results:
Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
miR22+ miR22-    0.196 0.9437 -0.9431 1.3351  False
miR22+ miRNC+  -0.0708  0.997 -1.2099 1.0682  False
miR22+ miRNC-    0.196 0.9437 -0.9431 1.3351  False
miR22- miRNC+  -0.2668 0.8742 -1.4059 0.8722  False
miR22- miRNC-      0.0    1.0 -1.1391 1.1391  False
miRNC+ miRNC-   0.2668 0.8742 -0.8722 1.4059  False
---------------------------------------------------


In [3]:
# Two-way anova for (Fig 2c, VIM)
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create the dataframe
data = {
    'value': [
        11.642, 10.949, 13.254,   # miRNC -
        15.669, 15.435, 14.945,   # miRNC +
        12.583, 11.997, 12.107,  # miR22 -
        14.752, 14.497, 14.437   # miR22 +
    ],
    'genotype': ['miRNC']*6 + ['miR22']*6,
    'treatment': ['-']*3 + ['+']*3 + ['-']*3 + ['+']*3
}

df = pd.DataFrame(data)

# Two-way ANOVA
model = ols('value ~ C(genotype) * C(treatment)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print("Two-way ANOVA results:")
print(anova_table)

# Create interaction group for post-hoc
df['group'] = df['genotype'] + df['treatment']

# Run Tukey HSD post hoc test
tukey = pairwise_tukeyhsd(
    endog=df['value'],
    groups=df['group'],
    alpha=0.05
)

print("\nTukey HSD post hoc test results:")
print(tukey.summary())

Two-way ANOVA results:
                             sum_sq   df          F    PR(>F)
C(genotype)                0.192787  1.0   0.464510  0.514765
C(treatment)              24.661934  1.0  59.421741  0.000057
C(genotype):C(treatment)   0.856002  1.0   2.062496  0.188889
Residual                   3.320257  8.0        NaN       NaN

Tukey HSD post hoc test results:
Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
miR22+ miR22-   -2.333 0.0094 -4.0175 -0.6485   True
miR22+ miRNC+   0.7877 0.4814 -0.8968  2.4721  False
miR22+ miRNC-  -2.6137 0.0048 -4.2981 -0.9292   True
miR22- miRNC+   3.1207 0.0016  1.4362  4.8051   True
miR22- miRNC-  -0.2807 0.9484 -1.9651  1.4038  False
miRNC+ miRNC-  -3.4013 0.0009 -5.0858 -1.7169   True
----------------------------------------------------


In [5]:
# Two-way anova for (Fig 2c, CDH1)
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create the dataframe
data = {
    'value': [
        15.123, 15.863, 14.933,   # miRNC -
        8.044, 8.332, 9.607,   # miRNC +
        18.071, 16.532, 15.997,  # miR22 -
        13.333, 11.142, 14.258   # miR22 +
    ],
    'genotype': ['miRNC']*6 + ['miR22']*6,
    'treatment': ['-']*3 + ['+']*3 + ['-']*3 + ['+']*3
}

df = pd.DataFrame(data)

# Two-way ANOVA
model = ols('value ~ C(genotype) * C(treatment)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print("Two-way ANOVA results:")
print(anova_table)

# Create interaction group for post-hoc
df['group'] = df['genotype'] + df['treatment']

# Run Tukey HSD post hoc test
tukey = pairwise_tukeyhsd(
    endog=df['value'],
    groups=df['group'],
    alpha=0.05
)

print("\nTukey HSD post hoc test results:")
print(tukey.summary())

Two-way ANOVA results:
                             sum_sq   df          F    PR(>F)
C(genotype)               25.319980  1.0  21.763528  0.001613
C(treatment)              84.285901  1.0  72.447076  0.000028
C(genotype):C(treatment)   5.425730  1.0   4.663630  0.062834
Residual                   9.307307  8.0        NaN       NaN

Tukey HSD post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower    upper  reject
-----------------------------------------------------
miR22+ miR22-   3.9557 0.0087   1.1354  6.7759   True
miR22+ miRNC+    -4.25 0.0057  -7.0703 -1.4297   True
miR22+ miRNC-   2.3953 0.0988  -0.4249  5.2156  False
miR22- miRNC+  -8.2057 0.0001 -11.0259 -5.3854   True
miR22- miRNC-  -1.5603 0.3516  -4.3806  1.2599  False
miRNC+ miRNC-   6.6453 0.0003   3.8251  9.4656   True
-----------------------------------------------------


In [None]:
# Two-way anova for (Fig 2d) VIM
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create the dataframe
data = {
    'value': [
        0.805535366, 0.998628396, 1.195836238,   # miRNC -
        8.988122799, 9.305088265, 11.3767757,   # miRNC +
        2.039223, 2.74731, 2.053407,  # miR22 -
        6.671540076, 8.270759602, 7.300624282   # miR22 +
    ],
    'genotype': ['miRNC']*6 + ['miR22']*6,
    'treatment': ['-']*3 + ['+']*3 + ['-']*3 + ['+']*3
}

df = pd.DataFrame(data)

# Two-way ANOVA
model = ols('value ~ C(genotype) * C(treatment)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print("Two-way ANOVA results:")
print(anova_table)

# Create interaction group for post-hoc
df['group'] = df['genotype'] + df['treatment']

# Run Tukey HSD post hoc test
tukey = pairwise_tukeyhsd(
    endog=df['value'],
    groups=df['group'],
    alpha=0.05
)

print("\nTukey HSD post hoc test results:")
print(tukey.summary())

Two-way ANOVA results:
                              sum_sq   df           F        PR(>F)
C(genotype)                 1.072288  1.0    1.692637  2.294729e-01
C(treatment)              147.511239  1.0  232.850722  3.373490e-07
C(genotype):C(treatment)   10.578779  1.0   16.698907  3.502844e-03
Residual                    5.068011  8.0         NaN           NaN

Tukey HSD post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower    upper  reject
-----------------------------------------------------
miR22+ miR22-  -5.1343 0.0002  -7.2154 -3.0532   True
miR22+ miRNC+   2.4757 0.0215   0.3946  4.5568   True
miR22+ miRNC-  -6.4143    0.0  -8.4954 -4.3332   True
miR22- miRNC+     7.61    0.0   5.5289  9.6911   True
miR22- miRNC-    -1.28 0.2746  -3.3611  0.8011  False
miRNC+ miRNC-    -8.89    0.0 -10.9711 -6.8089   True
-----------------------------------------------------


In [None]:
# Two-way anova for (Fig S6b) CDH1
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create the dataframe
data = {
    'value': [
        0.9229823, 1.024112983, 1.052904718,   # miRNC -
        0.31960788, 0.313030445, 0.296144847,   # miRNC +
        0.501518521, 0.652647043, 0.604734959,  # miR22 -
        0.187423646, 0.210862563, 0.206523075   # miR22 +
    ],
    'genotype': ['miRNC']*6 + ['miR22']*6,
    'treatment': ['-']*3 + ['+']*3 + ['-']*3 + ['+']*3
}

df = pd.DataFrame(data)

# Two-way ANOVA
model = ols('value ~ C(genotype) * C(treatment)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print("Two-way ANOVA results:")
print(anova_table)

# Create interaction group for post-hoc
df['group'] = df['genotype'] + df['treatment']

# Run Tukey HSD post hoc test
tukey = pairwise_tukeyhsd(
    endog=df['value'],
    groups=df['group'],
    alpha=0.05
)

print("\nTukey HSD post hoc test results:")
print(tukey.summary())

Two-way ANOVA results:
                            sum_sq   df           F        PR(>F)
C(genotype)               0.204121  1.0   74.750115  2.486967e-05
C(treatment)              0.866884  1.0  317.456973  1.008229e-07
C(genotype):C(treatment)  0.070093  1.0   25.668476  9.693164e-04
Residual                  0.021846  8.0         NaN           NaN

Tukey HSD post hoc test results:
Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
miR22+ miR22-   0.3847 0.0001  0.2481  0.5213   True
miR22+ miRNC+    0.108 0.1287 -0.0286  0.2446  False
miR22+ miRNC-   0.7984    0.0  0.6618   0.935   True
miR22- miRNC+  -0.2767 0.0009 -0.4133 -0.1401   True
miR22- miRNC-   0.4137    0.0  0.2771  0.5503   True
miRNC+ miRNC-   0.6904    0.0  0.5538   0.827   True
----------------------------------------------------


In [None]:
# 2-way ANOVA (fig 3c)
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create the dataframe
data = {
    'value': [
        1,1,1,  # WT-
        2.202274319,1.640575185,4.033146824,  # WT+
        1.891695676,0.99534492,0.624911636,  # S1-
        3.900888001,2.107955346,4.473172985   # S1+
    ],
    'genotype': ['WT']*6 + ['S1']*6,
    'treatment': ['-']*3 + ['+']*3 + ['-']*3 + ['+']*3
}

df = pd.DataFrame(data)

# Two-way ANOVA
model = ols('value ~ C(genotype) * C(treatment)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)

# Create interaction group for post-hoc
df['group'] = df['genotype'] + df['treatment']

# Run Tukey HSD post hoc test
tukey = pairwise_tukeyhsd(endog=df['value'], groups=df['group'], alpha=0.05)
print("\nTukey HSD post hoc test results:")
print(tukey.summary())

                             sum_sq   df          F    PR(>F)
C(genotype)                0.810146  1.0   0.922640  0.364911
C(treatment)              11.694096  1.0  13.317892  0.006500
C(genotype):C(treatment)   0.365427  1.0   0.416168  0.536913
Residual                   7.024593  8.0        NaN       NaN

Tukey HSD post hoc test results:
Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   S1+    S1-  -2.3234 0.0632 -4.7735  0.1268  False
   S1+    WT+  -0.8687 0.6798 -3.3188  1.5815  False
   S1+    WT-   -2.494 0.0461 -4.9441 -0.0439   True
   S1-    WT+   1.4547 0.2995 -0.9954  3.9048  False
   S1-    WT-  -0.1707 0.9958 -2.6208  2.2795  False
   WT+    WT-  -1.6253 0.2243 -4.0755  0.8248  False
----------------------------------------------------


In [3]:
# 3-way ANOVA (Fig S13) SNAI1, WT v. MUT with miR22 overexpression
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create dataframe in long format
data = {
    'value': [
        # WT miR-NC -
        0.927463696, 1.0434509, 1.029085404,
        # WT miR-NC +
        1.966455318, 1.873320057, 2.471860635,
        # WT miR-22 -
        0.347623473, 0.311131723, 0.33811769,
        # WT miR-22 +
        1.564386949, 0.892297935, 1.10618862,
        # MUT miR-22 -
        3.064384663, 3.353336834, 2.559030612,
        # MUT miR-22 +
        3.619015798, 2.506366524, 3.695059013
    ],
    'genotype': (
        ['WT']*12 + ['MUT']*6
    ),
    'treatment': (
        ['-']*3 + ['+']*3 + ['-']*3 + ['+']*3 + ['-']*3 + ['+']*3
    ),
    'miRNA': (
        ['miR-NC']*6 + ['miR-22']*12
    )
}

df = pd.DataFrame(data)

# Three-way ANOVA: Genotype Ã— miRNA x Treatment
model = ols(
    'value ~ C(genotype) * C(miRNA) * C(treatment)',
    data=df
).fit()

anova_3way = sm.stats.anova_lm(model, typ=2)

print("Three-way ANOVA results:")
print(anova_3way)


# Create interaction groups for post-hoc testing
df['group'] = (
    df['genotype'] + '_' +
    df['miRNA'] + '_' +
    df['treatment']
)

tukey = pairwise_tukeyhsd(
    endog=df['value'],
    groups=df['group'],
    alpha=0.05
)

print("\nTukey HSD post hoc test results:")
print(tukey.summary())


Three-way ANOVA results:
                                         sum_sq    df             F  \
C(genotype)                        1.563280e+01   1.0  1.129526e+02   
C(miRNA)                          -3.056111e-16   1.0 -2.208149e-15   
C(treatment)                       2.509790e+00   1.0  1.813412e+01   
C(genotype):C(miRNA)               1.881705e+00   1.0  1.359598e+01   
C(genotype):C(treatment)           4.877295e-01   1.0  3.524019e+00   
C(miRNA):C(treatment)             -4.412612e-15   1.0 -3.188269e-14   
C(genotype):C(miRNA):C(treatment)  4.633101e-02   1.0  3.347580e-01   
Residual                           1.660818e+00  12.0           NaN   

                                         PR(>F)  
C(genotype)                        1.847605e-07  
C(miRNA)                           1.000000e+00  
C(treatment)                       1.110622e-03  
C(genotype):C(miRNA)               3.107223e-03  
C(genotype):C(treatment)           8.500159e-02  
C(miRNA):C(treatment)              

In [None]:
# Two-way anova for (Fig S6b) SNAI1
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create the dataframe
data = {
    'value': [
        10.362, 9.244, 8.599,   # WT -
        8.568, 9.873, 8.214,   # WT +
        8.688, 9.343, 8.348,  # Sm22mut -
        6.996, 7.626, 6.574   # Sm22mut +
    ],
    'genotype': ['WT']*6 + ['Sm22mut']*6,
    'treatment': ['-']*3 + ['+']*3 + ['-']*3 + ['+']*3
}

df = pd.DataFrame(data)

# Two-way ANOVA
model = ols('value ~ C(genotype) * C(treatment)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print("Two-way ANOVA results:")
print(anova_table)

# Create interaction group for post-hoc
df['group'] = df['genotype'] + df['treatment']

# Run Tukey HSD post hoc test
tukey = pairwise_tukeyhsd(
    endog=df['value'],
    groups=df['group'],
    alpha=0.05
)

print("\nTukey HSD post hoc test results:")
print(tukey.summary())

Two-way ANOVA results:
                            sum_sq   df         F    PR(>F)
C(genotype)               4.422602  1.0  8.443385  0.019717
C(treatment)              3.777774  1.0  7.212315  0.027686
C(genotype):C(treatment)  1.099891  1.0  2.099850  0.185352
Residual                  4.190359  8.0       NaN       NaN

Tukey HSD post hoc test results:
  Multiple Comparison of Means - Tukey HSD, FWER=0.05  
 group1   group2  meandiff p-adj   lower  upper  reject
-------------------------------------------------------
Sm22mut+ Sm22mut-   1.7277 0.0741 -0.1647   3.62  False
Sm22mut+      WT+   1.8197 0.0595 -0.0727  3.712  False
Sm22mut+      WT-   2.3363 0.0177   0.444 4.2287   True
Sm22mut-      WT+    0.092 0.9985 -1.8004 1.9844  False
Sm22mut-      WT-   0.6087 0.7377 -1.2837  2.501  False
     WT+      WT-   0.5167  0.818 -1.3757  2.409  False
-------------------------------------------------------


In [1]:
# 2-way ANOVA VIM (fig 4C)

import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create the dataframe
data = {
    'value': [
        1,1,1,  # WT--
        3.977132488,3.237186575,6.647343578,  # WT+
        1.367380928,1.038353359,0.654808568,  # S1--
        7.950029301,7.82171548,3.137132066   # S1+
    ],
    'genotype': ['WT']*6 + ['S1']*6,
    'treatment': ['--']*3 + ['+']*3 + ['--']*3 + ['+']*3
}

df = pd.DataFrame(data)

# Two-way ANOVA
model = ols('value ~ C(genotype) * C(treatment)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)

# Create interaction group for post-hoc
df['group'] = df['genotype'] + df['treatment']

# Run Tukey HSD post hoc test
tukey = pairwise_tukeyhsd(endog=df['value'], groups=df['group'], alpha=0.05)
print("\nTukey HSD post hoc test results:")
print(tukey.summary())

                             sum_sq   df          F    PR(>F)
C(genotype)                2.174099  1.0   0.800336  0.397109
C(treatment)              59.451993  1.0  21.885643  0.001586
C(genotype):C(treatment)   2.072241  1.0   0.762839  0.407893
Residual                  21.731870  8.0        NaN       NaN

Tukey HSD post hoc test results:
Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   S1+   S1--  -5.2828 0.0184 -9.5923 -0.9733   True
   S1+    WT+  -1.6824 0.6155 -5.9919  2.6271  False
   S1+   WT--   -5.303  0.018 -9.6125 -0.9935   True
  S1--    WT+   3.6004 0.1052 -0.7091  7.9099  False
  S1--   WT--  -0.0202    1.0 -4.3297  4.2893  False
   WT+   WT--  -3.6206  0.103 -7.9301  0.6889  False
----------------------------------------------------


In [4]:
# 2-way ANOVA for CDH1 (fig 4C)

import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create the dataframe
data = {
    'value': [
        1,1,1,  # WT--
        0.766249333,0.474012198,0.71871152,  # WT+
        0.872662606,0.619504644,1.390942265,  # S1--
        0.072494753,0.03468477,0.1637397   # S1+
    ],
    'genotype': ['WT']*6 + ['S1']*6,
    'treatment': ['--']*3 + ['+']*3 + ['--']*3 + ['+']*3
}

df = pd.DataFrame(data)

# Two-way ANOVA
model = ols('value ~ C(genotype) * C(treatment)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)
# Create interaction group for post-hoc
df['group'] = df['genotype'] + df['treatment']

# Run Tukey HSD post hoc test
tukey = pairwise_tukeyhsd(endog=df['value'], groups=df['group'], alpha=0.05)
print("\nTukey HSD post hoc test results:")
print(tukey.summary())

                            sum_sq   df          F    PR(>F)
C(genotype)               0.271485  1.0   5.913804  0.041085
C(treatment)              1.112166  1.0  24.226480  0.001161
C(genotype):C(treatment)  0.205713  1.0   4.481073  0.067153
Residual                  0.367256  8.0        NaN       NaN

Tukey HSD post hoc test results:
Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
   S1+   S1--   0.8707 0.0048  0.3105  1.431   True
   S1+    WT+   0.5627  0.049  0.0025 1.1229   True
   S1+   WT--   0.9097 0.0036  0.3495 1.4699   True
  S1--    WT+   -0.308 0.3562 -0.8683 0.2522  False
  S1--   WT--    0.039 0.9958 -0.5213 0.5992  False
   WT+   WT--    0.347 0.2697 -0.2132 0.9072  False
---------------------------------------------------


In [None]:
# 2-way ANOVA (fig 4e)

import pandas as pd
import numpy as np
from statsmodels.formula.api import ols
import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create data in long format
data = {
    'value': [
        18.66, 24.62, 22.18,  # WT--
        48.65, 45.54, 50.05,  # WT+
        21.36, 10.08, 15.95,   # S1--
        60.92, 71.59, 79.67   # S1+
    ],
    'genotype': ['WT']*6 + ['S1']*6,
    'treatment': ['-','-','-', '+','+','+']*2
}

df = pd.DataFrame(data)

# Run two-way ANOVA
model = ols('value ~ C(genotype) * C(treatment)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print("ANOVA results:")
print(anova_table)

# Create interaction group for post-hoc
df['group'] = df['genotype'] + df['treatment']

# Run Tukey HSD post hoc test
tukey = pairwise_tukeyhsd(endog=df['value'], groups=df['group'], alpha=0.05)
print("\nTukey HSD post hoc test results:")
print(tukey.summary())


ANOVA results:
                               sum_sq   df           F    PR(>F)
C(genotype)                207.251408  1.0    6.159802  0.037999
C(treatment)              4943.862075  1.0  146.938498  0.000002
C(genotype):C(treatment)   616.476675  1.0   18.322549  0.002686
Residual                   269.166333  8.0         NaN       NaN

Tukey HSD post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05  
group1 group2 meandiff p-adj   lower    upper   reject
------------------------------------------------------
   S1+    S1-   -54.93    0.0 -70.0966 -39.7634   True
   S1+    WT+ -22.6467 0.0061 -37.8133    -7.48   True
   S1+    WT- -48.9067    0.0 -64.0733   -33.74   True
   S1-    WT+  32.2833 0.0006  17.1167    47.45   True
   S1-    WT-   6.0233 0.6034  -9.1433    21.19  False
   WT+    WT-   -26.26 0.0024 -41.4266 -11.0934   True
------------------------------------------------------


In [1]:
import sys
print(sys.version)

3.14.0 (tags/v3.14.0:ebf955d, Oct  7 2025, 10:15:03) [MSC v.1944 64 bit (AMD64)]
