In [30]:
from policyengine_us import Microsimulation

sim = Microsimulation(dataset = "hf://policyengine/test/sparse_cd_stacked_2023.h5")
cd_geoids = sim.calculate("congressional_district_geoid").values
correct_state_fips = cd_geoids // 100
sim.set_input("state_fips", 2023, correct_state_fips)


In [31]:
import pandas as pd
import numpy as np
from policyengine_us import Microsimulation
from policyengine_us.variables.input.geography import StateName

sim = Microsimulation(dataset = "hf://policyengine/test/sparse_cd_stacked_2023.h5")
YEAR = 2023

STATE_FIPS_TO_NAME = {
    1: StateName.AL, 2: StateName.AK, 4: StateName.AZ, 5: StateName.AR, 6: StateName.CA,
    8: StateName.CO, 9: StateName.CT, 10: StateName.DE, 11: StateName.DC,
    12: StateName.FL, 13: StateName.GA, 15: StateName.HI, 16: StateName.ID, 17: StateName.IL,
    18: StateName.IN, 19: StateName.IA, 20: StateName.KS, 21: StateName.KY, 22: StateName.LA,
    23: StateName.ME, 24: StateName.MD, 25: StateName.MA, 26: StateName.MI,
    27: StateName.MN, 28: StateName.MS, 29: StateName.MO, 30: StateName.MT,
    31: StateName.NE, 32: StateName.NV, 33: StateName.NH, 34: StateName.NJ,
    35: StateName.NM, 36: StateName.NY, 37: StateName.NC, 38: StateName.ND,
    39: StateName.OH, 40: StateName.OK, 41: StateName.OR, 42: StateName.PA,
    44: StateName.RI, 45: StateName.SC, 46: StateName.SD, 47: StateName.TN,
    48: StateName.TX, 49: StateName.UT, 50: StateName.VT, 51: StateName.VA, 53: StateName.WA,
    54: StateName.WV, 55: StateName.WI, 56: StateName.WY
}


cd_geoids = sim.calculate("congressional_district_geoid").values
correct_state_fips = cd_geoids // 100
correct_state_names = pd.Series(correct_state_fips).map(STATE_FIPS_TO_NAME).values

sim.set_input("state_fips", YEAR, correct_state_fips)

# Delete any cached calculations to force recalculation
if "state_name" in sim.tax_benefit_system.variables:
    sim.delete_arrays("state_name", YEAR)
if "state_code" in sim.tax_benefit_system.variables:
    sim.delete_arrays("state_code", YEAR)



In [40]:
df = sim.calculate_dataframe(['household_id', 'state_fips', 'congressional_district_geoid', 'income_tax', 'state_name', 'state_code', 'household_net_income', 'household_weight'])
df.state_fips 

       value      weight
0         18   13.742280
1         39   61.547729
2          1   16.596466
3          1   34.286915
4          1   15.586526
...      ...         ...
88978      6   18.035107
88979      6  144.022263
88980     24   22.460018
88981     29   27.677790
88982     42   37.072266

[88983 rows x 2 columns]

In [41]:
state_df = df.loc[df.state_fips == 34]
state_df

Unnamed: 0,household_id,state_fips,congressional_district_geoid,income_tax,state_name,state_code,household_net_income,household_weight
54,203,34,3406,3.611006e+05,NJ,NJ,254793.281250,21.920219
100,324,34,3410,8.984263e+05,NJ,NJ,520829.937500,38.141525
117,373,34,3402,3.622267e+04,NJ,NJ,116305.656250,179.311432
243,655,34,3401,1.157711e+04,NJ,NJ,181396.546875,42.934647
244,657,34,3402,1.157711e+04,NJ,NJ,181396.546875,2995.783203
...,...,...,...,...,...,...,...,...
88774,271829,34,3410,1.740626e+05,NJ,NJ,743414.687500,58.284195
88808,271914,34,3409,1.529304e+06,NJ,NJ,74466.750000,37.558510
88832,272046,34,3408,8.131955e+04,NJ,NJ,427765.562500,178.973404
88883,272263,34,3404,5.986858e+04,NJ,NJ,317212.906250,66.759209


In [42]:
avg_net_income_by_cd = (
      state_df.groupby('congressional_district_geoid')
      .apply(lambda x: (x['household_net_income'] *
  x['household_weight']).sum() / x['household_weight'].sum())
      .reset_index(name='avg_net_income')
  )

In [43]:
print(avg_net_income_by_cd)

    congressional_district_geoid  avg_net_income
0                           3401    92987.679688
1                           3402    92570.304688
2                           3403    95180.476562
3                           3404   111259.976562
4                           3405   116278.437500
5                           3406   105015.101562
6                           3407   158194.937500
7                           3408    73090.562500
8                           3409    93551.437500
9                           3410    89640.585938
10                          3411    91173.257812
11                          3412   104348.593750
