In [None]:
from statsmodels.formula.api import ols
import statsmodels.api as sm
import pandas as pd


df = pd.read_csv("metabolic_syndrome.csv")


numeric_vars = df.select_dtypes(include='number').columns.drop('seqn')


df_male = df[df['sex'] == 'Male']
df_female = df[df['sex'] == 'Female']


def perform_detailed_anova(df, var, group_col='metabolicsyndrome'):
    model = ols(f'{var} ~ C({group_col})', data=df).fit()
    anova_results = sm.stats.anova_lm(model, typ=2)
    p_value = anova_results['PR(>F)'][0]
    significant = "Yes" if p_value < 0.05 else "No"
    return {
        'Variable': var,
        'Sum of Squares': anova_results['sum_sq'][0],
        'Degrees of Freedom': anova_results['df'][0],
        'F-Value': anova_results['F'][0],
        'p-value': p_value,
        'Significant': significant
    }


def gather_anova_results(df, numeric_vars, sex):
    results = [perform_detailed_anova(df, var) for var in numeric_vars]
    anova_df = pd.DataFrame(results)
    anova_df['Sex'] = sex  
    return anova_df  


detailed_anova_results_male = gather_anova_results(df_male, numeric_vars, 'Male')
detailed_anova_results_female = gather_anova_results(df_female, numeric_vars, 'Female')


# ANOVA Results Across All Measured Variables, Comparing Females with and without Metabolic Syndrome

In [15]:
detailed_anova_results_male

Unnamed: 0,Variable,Sum of Squares,Degrees of Freedom,F-Value,p-value,Significant,Sex
0,age,14851.24,1.0,49.258139,3.763327e-12,Yes,Male
1,income,70326.74,1.0,0.008001,0.9287415,No,Male
2,waistcirc,77954.14,1.0,431.162504,1.3258810000000001e-81,Yes,Male
3,bmi,9108.602,1.0,336.457741,2.738395e-66,Yes,Male
4,albuminuria,4.785832,1.0,25.056218,6.40867e-07,Yes,Male
5,uralbcr,151928.4,1.0,2.090829,0.1484494,No,Male
6,uricacid,67.42455,1.0,42.704817,9.423426e-11,Yes,Male
7,bloodglucose,149310.9,1.0,122.12909,4.2914490000000005e-27,Yes,Male
8,hdl,24277.24,1.0,151.293215,8.165205e-33,Yes,Male
9,triglycerides,2496313.0,1.0,273.327182,2.031765e-55,Yes,Male


# ANOVA Results Across All Measured Variables, Comparing Females with and without Metabolic Syndrome

In [14]:
detailed_anova_results_female

Unnamed: 0,Variable,Sum of Squares,Degrees of Freedom,F-Value,p-value,Significant,Sex
0,age,33040.92,1.0,117.480059,3.447467e-26,Yes,Female
1,income,292970600.0,1.0,34.802073,4.786996e-09,Yes,Female
2,waistcirc,71960.35,1.0,341.692046,4.555686e-67,Yes,Female
3,bmi,10587.1,1.0,236.200839,8.887585e-49,Yes,Female
4,albuminuria,5.979447,1.0,37.819221,1.052883e-09,Yes,Female
5,uralbcr,1281852.0,1.0,21.431396,4.063012e-06,Yes,Female
6,uricacid,242.093,1.0,148.992621,2.127429e-32,Yes,Female
7,bloodglucose,233698.5,1.0,268.834572,1.039917e-54,Yes,Female
8,hdl,52147.13,1.0,273.542142,1.5100680000000001e-55,Yes,Female
9,triglycerides,1498386.0,1.0,274.317857,1.099438e-55,Yes,Female
