In [None]:
import pandas as pd
import config

In [None]:
pd.set_option('display.max_rows', 500)

In [None]:
df = pd.read_csv('processed/2023.csv')
df = df[~df['Empty Response']].replace("-", pd.NA)

In [None]:
df.head()

In [None]:
weight_by_parents = False
filters = {
    "Year 1 Families": pd.to_numeric(df["Years at GVCA"]) == 1,
    "Not Year 1 Families": pd.to_numeric(df["Years at GVCA"]) > 1,
    "Year 3 or Less Families": pd.to_numeric(df["Years at GVCA"]) <= 3,
    "Year 4 or More Families": pd.to_numeric(df["Years at GVCA"]) > 3,
    "Minority": df["Minority"] == "Yes",
    "Not Minority": df["Minority"] != "Yes",
    "Support": df["IEP, 504, ALP, or Read"] == "Yes",
    "Not Support": df["IEP, 504, ALP, or Read"] != "Yes",
}

def calculate_question_totals(df):
    results = []

    for question in config.questions_for_each_school_level:
        response_levels = config.question_responses.get(question, [])
        
        for response in response_levels:
            response_data = {"Question": question, "Response": response}
            
            schoolwide_counts, schoolwide_total = _calculate_totals(df, question, response, config.levels, weight_by_parents)
            response_data.update(_format_counts_and_percentages("total", schoolwide_counts, schoolwide_total, response))
            
            for level in config.levels:
                level_counts, level_total = _calculate_totals(df, question, response, [level], weight_by_parents)
                response_data.update(_format_counts_and_percentages(level, level_counts, level_total, response))
            
            for filter_name, filter_condition in filters.items():
                filtered_counts, filtered_total = _calculate_totals(df[filter_condition], question, response, config.levels, weight_by_parents)
                response_data.update(_format_counts_and_percentages(filter_name, filtered_counts, filtered_total, response))
            
            results.append(response_data)
    
    return pd.DataFrame(results)

def _calculate_totals(df, question, response, levels, weight_by_parents):
    """Helper to calculate counts and totals for given levels."""
    totals = {}
    overall_total = 0

    for level in levels:
        column_name = f"({level}) {question}"
        if column_name in df.columns:
            filtered_df = df[df[column_name] == response]

            if weight_by_parents:
                response_sum = filtered_df["N Parents Represented"].astype(float).sum()
                level_total = df[~df[column_name].isna()]["N Parents Represented"].astype(float).sum()
            else:
                response_sum = len(filtered_df)
                level_total = len(df[column_name].dropna())

            totals[response] = totals.get(response, 0) + response_sum
            overall_total += level_total

    return totals, overall_total

def _format_counts_and_percentages(label, counts, total, response):
    """Helper to format counts and percentages for a given response."""
    count = counts.get(response, 0)
    percentage = (count / total) * 100 if total > 0 else 0
    return {f"N_{label}": count, f"%_{label}": percentage}

rolled_up_data = calculate_question_totals(df)
# rolled_up_data.to_csv("rolled_up_data.csv", index=False)
rolled_up_data