In [12]:
import re
import numpy as np
import pandas as pd
from typing import List
import statsmodels.api as sm
from IPython.display import display
import statsmodels.formula.api as smf

def _make_safe_name(s: str) -> str:
    s = str(s)
    s = re.sub(r'\s+', '_', s)
    s = re.sub(r'[^0-9a-zA-Z_]', '_', s)
    s = re.sub(r'_+', '_', s).strip('_')
    if re.match(r'^[0-9]', s): s = f'v_{s}'
    if not s: s = 'var'
    return s

def _calculate_one_way_anova(data: pd.DataFrame, dep_var: str, factor: str) -> pd.DataFrame:
    formula = f"{dep_var} ~ C({factor})"
    model = smf.ols(formula, data=data).fit()
    aov_table = sm.stats.anova_lm(model, typ=2)

    ss_total = aov_table['sum_sq'].sum()
    ss_effect = aov_table.loc[f'C({factor})', 'sum_sq']
    df_effect = aov_table.loc[f'C({factor})', 'df']
    ms_error = aov_table.loc['Residual', 'sum_sq']
    
    eta_sq = ss_effect / ss_total
    aov_table.loc[f'C({factor})', 'Eta-squared (η²)'] = eta_sq
    
    omega_sq = (ss_effect - df_effect * ms_error) / (ss_total + ms_error)
    aov_table.loc[f'C({factor})', 'Omega-squared (ω²)'] = omega_sq
    
    return aov_table

def run_multiple_one_way_anovas(df: pd.DataFrame, dep_vars: List[str], factors: List[str], alpha: float = 0.05) -> pd.DataFrame:
    original_to_safe_map = {col: _make_safe_name(col) for col in df.columns}
    safe_to_original_map = {v: k for k, v in original_to_safe_map.items()}
    df = df.rename(columns=original_to_safe_map)
    
    safe_dep_vars = [original_to_safe_map[var] for var in dep_vars]
    safe_factors = [original_to_safe_map[factor] for factor in factors]

    all_results = []
    for var in safe_dep_vars:
        for factor in safe_factors:
            aov_table = _calculate_one_way_anova(df, var, factor)
            aov_table.insert(0, 'Dependent Variable', safe_to_original_map[var])
            all_results.append(aov_table.reset_index())

    final_df = pd.concat(all_results, ignore_index=True)
    final_df = final_df.rename(columns={'index': 'Source', 'sum_sq': 'Sum Sq', 'mean_sq': 'Mean Sq', 'PR(>F)': 'p-Value'})
    final_df['Source'] = final_df['Source'].apply(lambda x: x.replace('C(`', '').replace('`)', ''))
    final_df['Significant (α<0.05)'] = final_df['p-Value'].apply(
        lambda p: "Significant" if pd.notna(p) and p < alpha else ("Not significant" if pd.notna(p) else "-"))


    for safe_name, original_name in safe_to_original_map.items():
        final_df['Source'] = final_df['Source'].str.replace(f'C({safe_name})', original_name, regex=False)
        final_df['Source'] = final_df['Source'].str.replace(f':{safe_name}', f':{original_name}', regex=False)
    
    return final_df.fillna('')

if __name__ == "__main__":
    try:
        df = pd.read_csv("../data/Fertilizer Experiment.csv")
    except FileNotFoundError:
        print("File not found. Using a sample DataFrame for demonstration.")
        np.random.seed(42)
        sample_data = {
            'Fertilizer Type': np.repeat(['A', 'B', 'C'], 30),
            'Light Exposure': np.tile(np.repeat(['Low', 'High'], 15), 3),
            'Soil pH': np.tile(np.repeat([6.0, 7.0, 6.5], 10), 3),
            'Plant Height (cm)': np.random.normal(20, 5, 90),
            'Leaf Count': np.random.normal(50, 10, 90)
        }
        df = pd.DataFrame(sample_data)
        
        df.loc[df['Fertilizer Type'] == 'B', 'Plant Height (cm)'] += 7
        df.loc[df['Light Exposure'] == 'High', 'Plant Height (cm)'] += 4
        df.loc[df['Soil pH'] == 6.0, 'Leaf Count'] -= 8

    variables_to_test = df.select_dtypes(include=np.number).columns.tolist()
    factors_to_test = ['Fertilizer', 'Light Exposure']
    factors_to_test = [f for f in factors_to_test if f in df.columns]
    
    if not factors_to_test:
        print("Warning: None of the specified factors were found in the DataFrame. Please check column names.")
    else:
        anova_results = run_multiple_one_way_anovas(df, variables_to_test, factors_to_test)
        display(anova_results)

Unnamed: 0,Source,Dependent Variable,Sum Sq,df,F,p-Value,Eta-squared (η²),Omega-squared (ω²),Significant (α<0.05)
0,Fertilizer,Plant Height (cm),2540.401132,2.0,6.196237,0.002768,0.095774,-0.899409,Significant
1,Residual,Plant Height (cm),23984.469551,117.0,,,,,-
2,Light Exposure,Plant Height (cm),18145.496827,2.0,126.68149,0.0,0.684094,0.03973,Significant
3,Residual,Plant Height (cm),8379.373856,117.0,,,,,-
4,Fertilizer,Leaf Area (cm²),14394.904665,2.0,3.557773,0.031626,0.05733,-0.940978,Significant
5,Residual,Leaf Area (cm²),236693.540764,117.0,,,,,-
6,Light Exposure,Leaf Area (cm²),172586.54388,2.0,128.612334,0.0,0.687354,0.047279,Significant
7,Residual,Leaf Area (cm²),78501.901548,117.0,,,,,-
8,Fertilizer,Chlorophyll Content (SPAD units),529.814969,2.0,2.80544,0.064554,0.045762,-0.953167,Not significant
9,Residual,Chlorophyll Content (SPAD units),11047.885633,117.0,,,,,-


<h1 style='font-size: 20px; color: red; font-family: French Script MT; font-weight: 700; text-align: center'>Data to Drive Meaningful Insights and Solutions</h1>