In [1]:
from policyengine_us import Microsimulation

sim = Microsimulation(dataset = "hf://policyengine/test/sparse_cd_stacked_2023.h5")
cd_geoids = sim.calculate("congressional_district_geoid").values
correct_state_fips = cd_geoids // 100
sim.set_input("state_fips", 2023, correct_state_fips)


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pandas as pd
import numpy as np
from policyengine_us import Microsimulation
from policyengine_us.variables.input.geography import StateName

sim = Microsimulation(dataset = "hf://policyengine/test/sparse_cd_stacked_2023.h5")
YEAR = 2023

STATE_FIPS_TO_NAME = {
    1: StateName.AL, 2: StateName.AK, 4: StateName.AZ, 5: StateName.AR, 6: StateName.CA,
    8: StateName.CO, 9: StateName.CT, 10: StateName.DE, 11: StateName.DC,
    12: StateName.FL, 13: StateName.GA, 15: StateName.HI, 16: StateName.ID, 17: StateName.IL,
    18: StateName.IN, 19: StateName.IA, 20: StateName.KS, 21: StateName.KY, 22: StateName.LA,
    23: StateName.ME, 24: StateName.MD, 25: StateName.MA, 26: StateName.MI,
    27: StateName.MN, 28: StateName.MS, 29: StateName.MO, 30: StateName.MT,
    31: StateName.NE, 32: StateName.NV, 33: StateName.NH, 34: StateName.NJ,
    35: StateName.NM, 36: StateName.NY, 37: StateName.NC, 38: StateName.ND,
    39: StateName.OH, 40: StateName.OK, 41: StateName.OR, 42: StateName.PA,
    44: StateName.RI, 45: StateName.SC, 46: StateName.SD, 47: StateName.TN,
    48: StateName.TX, 49: StateName.UT, 50: StateName.VT, 51: StateName.VA, 53: StateName.WA,
    54: StateName.WV, 55: StateName.WI, 56: StateName.WY
}


cd_geoids = sim.calculate("congressional_district_geoid").values
correct_state_fips = cd_geoids // 100
correct_state_names = pd.Series(correct_state_fips).map(STATE_FIPS_TO_NAME).values

sim.set_input("state_fips", YEAR, correct_state_fips)

# Delete any cached calculations to force recalculation
if "state_name" in sim.tax_benefit_system.variables:
    sim.delete_arrays("state_name", YEAR)
if "state_code" in sim.tax_benefit_system.variables:
    sim.delete_arrays("state_code", YEAR)



In [3]:
state_fips = sim.calculate("state_fips", map_to="household", period=2026)


In [4]:
congressional_district_geoid = sim.calculate("congressional_district_geoid", map_to="household", period=2026)


In [5]:
income_tax = sim.calculate("income_tax", map_to="household", period=2026)


In [6]:
state_name = sim.calculate("state_name", map_to="household", period=2026)


In [7]:
state_code = sim.calculate("state_code", map_to="household", period=2026)


In [8]:
in_nj = state_code == "NJ"

In [9]:
mean_fed_tax_in_nj = income_tax[in_nj].mean()

In [10]:
mean_fed_tax_in_nj

26613.23385910318

In [11]:
fed_tax_in_nj = income_tax[in_nj]
districtics_in_nj = congressional_district_geoid[in_nj]

unique_districts = np.unique(districtics_in_nj)
district_list = {}

for district in unique_districts:
    in_district = districtics_in_nj == district
    mean_tax = fed_tax_in_nj[in_district].mean()
    district_list[district] = mean_tax

district_list

{3401: 21626.254254479445,
 3402: 19496.141684997117,
 3403: 26277.74194296395,
 3404: 32628.926321682633,
 3405: 28071.03803417276,
 3406: 24837.961113839345,
 3407: 35728.95922826653,
 3408: 19402.57601023985,
 3409: 23163.47901356361,
 3410: 21838.69476117316,
 3411: 31695.259674954348,
 3412: 29165.225455496624}