In [22]:
from statsmodels.stats.anova import anova_lm
from statsmodels.formula.api import ols
import statsmodels.formula.api as smf
import statsmodels.api as sm
import pandas as pd
import numpy as np
import re

def rename(text): return re.sub(r'[^a-zA-Z]', "", text)

def compute_two_way_anova(df, numerical_columns, group1, group2):
    results = []
    df = df.rename(columns={col: rename(col) for col in df.columns})
    
    for col in numerical_columns:
        var = rename(col)  
        group1_cleaned = rename(group1)
        group2_cleaned = rename(group2)
        formula = f"{var} ~  C({group1_cleaned}) * C({group2_cleaned})" 
        model = smf.ols(formula, data=df).fit()
        anova_table = sm.stats.anova_lm(model, typ=2)
        
        for source, row in anova_table.iterrows():
            p_value = row["PR(>F)"]
            interpretation = "Significant" if p_value < 0.05 else "No significant"
            if source == "Residual": interpretation = "-"
            
            results.append({
                #"Group": original_group,
                "Variable": col,
                "Source": source,
                "Sum Sq": row["sum_sq"],
                "df": row["df"],
                "F-Value": row["F"],
                "p-Value": p_value,
                "Significant (α<0.05)": interpretation
            })
    results = pd.DataFrame(results)
    return results

#if __name__ == "__main__":
df = pd.read_csv("../Datasets/Eggplant Fusarium Fertilizer Data.csv")
group1 = 'Fertilizer'
group2 = 'Variety'
variables = df.select_dtypes(include=[np.number]).columns  
anova_results = compute_two_way_anova(df, variables, group1, group2)
pd.set_option('display.float_format', lambda x: '%.4f' % x)
display(anova_results)

  F /= J
  F /= J
  F /= J
  F /= J
  F /= J
  F /= J
  F /= J


Unnamed: 0,Variable,Source,Sum Sq,df,F-Value,p-Value,Significant (α<0.05)
0,Replication ID,C(Fertilizer),,2.0,,,No significant
1,Replication ID,C(Variety),,7.0,,,No significant
2,Replication ID,C(Fertilizer):C(Variety),37018.3653,14.0,3.1247,0.0775,No significant
3,Replication ID,Residual,670201.5198,792.0,,,-
4,Infection Severity (%),C(Fertilizer),,2.0,,,No significant
5,Infection Severity (%),C(Variety),,7.0,,,No significant
6,Infection Severity (%),C(Fertilizer):C(Variety),386.0725,14.0,0.4001,0.5272,No significant
7,Infection Severity (%),Residual,54593.4803,792.0,,,-
8,Wilt index,C(Fertilizer),,2.0,,,No significant
9,Wilt index,C(Variety),,7.0,,,No significant


In [7]:
df = pd.read_csv("../Datasets/Eggplant Fusarium Fertilizer Data.csv")
df

Unnamed: 0,Variety,Fertilizer,Replication ID,Infection Severity (%),Wilt index,Plant height (cm),Days to wilt symptoms,Survival rate (%),Disease incidence (%)
0,EP-R1,Inorganic,1,22.5000,0.7000,88.9000,21,88.8000,23.4000
1,EP-R1,Inorganic,2,27.9000,1.2000,82.2000,19,87.7000,21.7000
2,EP-R1,Inorganic,3,21.2000,0.0000,74.7000,17,84.9000,27.2000
3,EP-R1,Inorganic,4,15.5000,0.1000,93.8000,18,90.3000,15.0000
4,EP-R1,Inorganic,5,17.3000,0.9000,78.1000,19,87.0000,23.0000
...,...,...,...,...,...,...,...,...,...
795,EP-S3,Control,96,75.2000,3.6000,68.2000,7,6.4000,85.5000
796,EP-S3,Control,97,74.8000,4.9000,59.5000,4,27.2000,82.0000
797,EP-S3,Control,98,58.1000,3.6000,78.8000,7,30.8000,75.4000
798,EP-S3,Control,99,54.1000,4.1000,63.7000,7,24.1000,81.8000
