In [21]:
import re
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.anova import anova_lm


def clean_column_name(name): return re.sub(r'[^a-zA-Z]', '', name)

def compute_two_way_anova(df, numeric_vars, group1, group2):
    results = []

    df = df.rename(columns={col: clean_column_name(col) for col in df.columns})
    group1_clean = clean_column_name(group1)
    group2_clean = clean_column_name(group2)

    for var in numeric_vars:
        var_clean = clean_column_name(var)
        formula = f"{var_clean} ~ C({group1_clean}) * C({group2_clean})"

        try:
            model = smf.ols(formula, data=df).fit()
            anova_table = anova_lm(model, typ=2)
            for source, row in anova_table.iterrows():
                p_val = row["PR(>F)"]
                interpretation = "Significant" if p_val < 0.05 else "Not Significant"
                if source == "Residual":
                    interpretation = "-"

                results.append({
                    "Variable": var,
                    "Source": source,
                    "Sum Sq": row["sum_sq"],
                    "df": row["df"],
                    "F-Value": row["F"],
                    "p-Value": p_val,
                    "Significant (α<0.05)": interpretation
                })
        
        except Exception as e:
            print(f"ANOVA failed for variable '{var}': {e}")
    results = pd.DataFrame(results)
    return results.fillna(' ')

if __name__ == "__main__":
    df = pd.read_csv("../Datasets/Fertilizer Experiment.csv")
    numeric_vars = df.select_dtypes(include=[np.number]).columns.tolist()
    anova_results = compute_two_way_anova(df, numeric_vars, group1 = 'Fertilizer', group2='Light Exposure')
    display(anova_results)


Unnamed: 0,Variable,Source,Sum Sq,df,F-Value,p-Value,Significant (α<0.05)
0,Plant Height (cm),C(Fertilizer),3092.8288,2.0,48.5021,0.0,Significant
1,Plant Height (cm),C(LightExposure),18697.9245,2.0,293.2229,0.0,Significant
2,Plant Height (cm),C(Fertilizer):C(LightExposure),1747.4805,4.0,13.7021,0.0,Significant
3,Plant Height (cm),Residual,3539.0645,111.0,,,-
4,Leaf Area (cm²),C(Fertilizer),19516.6979,2.0,35.5914,0.0,Significant
5,Leaf Area (cm²),C(LightExposure),177708.3371,2.0,324.0757,0.0,Significant
6,Leaf Area (cm²),C(Fertilizer):C(LightExposure),28551.5387,4.0,26.0338,0.0,Significant
7,Leaf Area (cm²),Residual,30433.6649,111.0,,,-
8,Chlorophyll Content (SPAD units),C(Fertilizer),772.9067,2.0,18.8553,0.0,Significant
9,Chlorophyll Content (SPAD units),C(LightExposure),7926.0725,2.0,193.3587,0.0,Significant
