In [5]:
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform
import pandas as pd

CTCS = [
    "ca_yctc",
    "co_ctc",
    "co_family_affordability_credit",
    "dc_ctc",
    "id_ctc",
    "il_ctc",
    "md_ctc",
    "mn_child_and_working_families_credits",
    "nc_ctc",
    "nj_ctc",
    "nm_ctc",
    "ny_ctc",
    "or_ctc",
    "ok_child_care_child_tax_credit",
    "vt_ctc",
    "ut_ctc",
]

EITCS = [
    "ca_eitc",
    "co_eitc",
    "ct_eitc",
    "dc_eitc",
    "de_eitc",
    "hi_eitc",
    "ia_eitc",
    "il_eitc",
    "in_eitc",
    "ks_total_eitc",
    "la_eitc",
    "ma_eitc",
    "md_eitc",
    "me_eitc",
    "mi_eitc",
    "mo_wftc",
    "mt_eitc",
    "ne_eitc",
    "nj_eitc",
    "nm_eitc",
    "ny_eitc",
    "oh_eitc",
    "ok_eitc",
    "or_eitc",
    "ri_eitc",
    "sc_eitc",
    "ut_eitc",
    "va_eitc",
    "vt_eitc",
    "wa_working_families_tax_credit",
    "wi_earned_income_credit",
]

ALL_STATE_INCOME_TAXES = [
    "household_state_tax_before_refundable_credits",
    "state_income_tax_before_refundable_credits",
    "state_refundable_credits",
    "household_refundable_state_tax_credits",
]

In [6]:
baseline = Microsimulation(
    dataset="hf://policyengine/policyengine-us-data/pooled_3_year_cps_2023.h5"
)
# Not sure we need this.
baseline.macro_cache_read = False

In [7]:
def calculate_impacts(neutralized_variables=None):
    if neutralized_variables is None:
        sim = Microsimulation(
            dataset="hf://policyengine/policyengine-us-data/pooled_3_year_cps_2023.h5"
        )
    else:

        class reform(Reform):
            def apply(self):
                for var in neutralized_variables:
                    self.neutralize_variable(var)

        sim = Microsimulation(
            reform=reform,
            dataset="hf://policyengine/policyengine-us-data/pooled_3_year_cps_2023.h5",
        )

    sim.macro_cache_read = False

    # Calculate net income
    net_income = sim.calc("household_net_income", period=2025, map_to="household")
    state_code_household = sim.calc("state_code", period=2025, map_to="household")

    # Calculate poverty impacts
    poverty = sim.calc("in_poverty", period=2025, map_to="person")
    state_code_person = sim.calc("state_code", period=2025, map_to="person")

    # Child poverty.
    child = sim.calc("is_child", period=2025, map_to="person")

    # Poverty gap.
    poverty_gap = sim.calc("poverty_gap", period=2025, map_to="household")

    # Calculate Gini index impacts
    personal_hh_equiv_income = sim.calculate("equiv_household_net_income")
    household_count_people = sim.calculate("household_count_people")
    personal_hh_equiv_income.weights *= household_count_people

    return pd.DataFrame(
        {
            "net_income": net_income.groupby(state_code_household).sum(),
            "poverty": poverty.groupby(state_code_person).mean(),
            "child_poverty": poverty[child].groupby(state_code_person).mean(),
            "poverty_gap": poverty_gap.groupby(state_code_household).sum(),
            "gini_index": personal_hh_equiv_income.groupby(state_code_household).gini(),
        }
    )

In [8]:
# Stack four dataframes, for impacts of
# - baseline
# - EITCs
# - CTCs
# - EITCs and CTCs
stacked = pd.concat(
    [
        calculate_impacts(),
        calculate_impacts(neutralized_variables=CTCS),
        calculate_impacts(neutralized_variables=EITCS),
        calculate_impacts(neutralized_variables=CTCS + EITCS),
        calculate_impacts(neutralized_variables=ALL_STATE_INCOME_TAXES),
    ],
    keys=["baseline", "CTCs", "EITCs", "CTCs and EITCs", "State income tax"],
)


# Assuming your original DataFrame is called 'stacked'
states = stacked.index.get_level_values(1).unique()
reform_types = ["baseline", "CTCs", "EITCs", "CTCs and EITCs", "State income tax"]

In [9]:
rows = []


def pct_diff(a, b):
    return (a - b) / b


def pct_diff_reform(var, reform_type):
    return pct_diff(state_data.loc["baseline", var], state_data.loc[reform_type, var])


for state in states:
    state_data = stacked.xs(state, level=1)

    for reform_type in reform_types:
        if reform_type == "baseline":
            continue

        cost = (
            state_data.loc["baseline", "net_income"]
            - state_data.loc[reform_type, "net_income"]
        )
        poverty_pct_diff = pct_diff_reform("poverty", reform_type)
        child_poverty_pct_diff = pct_diff_reform("child_poverty", reform_type)
        poverty_gap_pct_diff = pct_diff_reform("poverty_gap", reform_type)
        gini_index_pct_diff = pct_diff_reform("gini_index", reform_type)

        rows.append(
            {
                "state": state,
                "reform_type": reform_type,
                "cost": cost,
                "poverty_pct_cut": -poverty_pct_diff,
                "child_poverty_pct_cut": -child_poverty_pct_diff,
                "poverty_gap_pct_cut": -poverty_gap_pct_diff,
                "gini_index_pct_cut": -gini_index_pct_diff,
            }
        )

result_df = pd.DataFrame(rows)

result_df.to_csv("results.csv", index=False)