In [6]:
from policyengine_us import Microsimulation
baseline = Microsimulation(dataset = "hf://policyengine/test/sparse_cd_stacked_2023.h5")

In [7]:

baseline.calculate("congressional_district_geoid")
baseline.calculate("household_id")


        value      weight
0           0   13.742280
1           1   61.547729
2           2   16.596466
3           6   34.286915
4          10   15.586526
...       ...         ...
88978  272555   18.035107
88979  272557  144.022263
88980  272559   22.460018
88981  272563   27.677790
88982  272567   37.072266

[88983 rows x 2 columns]

In [8]:
cd_geoids = baseline.calculate("congressional_district_geoid").values
correct_state_fips = cd_geoids // 100
baseline.set_input("state_fips", 2023, correct_state_fips)


In [9]:
baseline.calculate_dataframe(['household_id', 'state_fips', 'income_tax'])

Unnamed: 0,household_id,state_fips,income_tax
0,0,18,4587.200195
1,1,39,4587.200195
2,2,1,70842.179688
3,6,1,70842.179688
4,10,1,70842.179688
...,...,...,...
88978,272555,6,71878.273438
88979,272557,6,71878.273438
88980,272559,24,496323.446533
88981,272563,29,496323.446533


In [10]:
import pandas as pd
import numpy as np

year = 2023
state = baseline.calculate("state_code", map_to="household", period=year)
employment_income = baseline.calculate("household_market_income", map_to="household", period=year)
household_id = baseline.calculate("household_id", map_to="household", period=year)
congressional_district_geoid = baseline.calculate("congressional_district_geoid", map_to="household", period=year)
household_net_income = baseline.calculate("household_net_income", map_to="household", period=year)





In [13]:
import pandas as pd
import numpy as np

year = 2023
congressional_district_geoid = baseline.calculate("congressional_district_geoid", map_to="household", period=year)
household_weight = baseline.calculate("household_weight", map_to="household", period=year)

df_household = pd.DataFrame({
    "congressional_district_geoid": congressional_district_geoid,
    "weight": household_weight
})

# Count unweighted (actual records) and weighted households
df_counts = df_household.groupby('congressional_district_geoid').agg({
    'weight': ['count', 'sum']
}).reset_index()

df_counts.columns = ['congressional_district_geoid', 'unweighted_households', 'weighted_households']

print(df_counts)

     congressional_district_geoid  unweighted_households  weighted_households
0                             101                    193        244799.125000
1                             102                    263        281133.625000
2                             103                    198        253962.171875
3                             104                    168        241107.125000
4                             105                    252        255262.781250
..                            ...                    ...                  ...
431                          5505                    335        302881.281250
432                          5506                    282        289898.156250
433                          5507                    223        273920.312500
434                          5508                    264        313816.000000
435                          5601                    215        242721.968750

[436 rows x 3 columns]


In [14]:
from policyengine_core.reforms import Reform

reform = Reform.from_dict({
  "gov.irs.deductions.itemized.salt_and_real_estate.cap.JOINT": {
    "2023-01-01.2100-12-31": 0
  },
  "gov.irs.deductions.itemized.salt_and_real_estate.cap.SINGLE": {
    "2023-01-01.2100-12-31": 0
  },
  "gov.irs.deductions.itemized.salt_and_real_estate.cap.SEPARATE": {
    "2023-01-01.2100-12-31": 0
  },
  "gov.irs.deductions.itemized.salt_and_real_estate.cap.SURVIVING_SPOUSE": {
    "2023-01-01.2100-12-31": 0
  },
  "gov.irs.deductions.itemized.salt_and_real_estate.cap.HEAD_OF_HOUSEHOLD": {
    "2023-01-01.2100-12-31": 0
  },
  "gov.irs.deductions.itemized.salt_and_real_estate.phase_out.floor.applies": {
    "2023-01-01.2029-12-31": False
  }
}, country_id="us")

In [15]:
reformed = Microsimulation(reform=reform,
  dataset="hf://policyengine/test/sparse_cd_stacked_2023.h5")

In [16]:
import numpy as np

year = 2023
congressional_district_geoid_r = reformed.calculate("congressional_district_geoid", map_to="household", period=year)
household_market_income_r = reformed.calculate("household_market_income", map_to="household", period=year)
household_weight_r = reformed.calculate("household_weight", map_to="household", period=year)
household_net_income_r = reformed.calculate("household_net_income", map_to="household", period=year)
household_net_income = baseline.calculate("household_net_income", map_to="household", period=year)


In [17]:
df_household = pd.DataFrame({
    "congressional_district_geoid": congressional_district_geoid,
    "household_net_income_r": household_net_income_r.values,  # Extract values from MicroSeries
    "weight": household_weight,
    "household_net_income": household_net_income
})

# Calculate weighted median by congressional district
def weighted_median(values, weights):
    # Remove NaN values
    mask = ~np.isnan(values)
    values = values[mask]
    weights = weights[mask]
    
    if len(values) == 0:
        return np.nan
    
    i = np.argsort(values)
    c = np.cumsum(weights[i])
    return values[i[np.searchsorted(c, 0.5 * c[-1])]]

# Calculate both medians
df_outputs = df_household.groupby('congressional_district_geoid').apply(
    lambda x: pd.Series({
        'median_household_net_income': weighted_median(x['household_net_income'].values, x['weight'].values),
        'median_household_net_income_r': weighted_median(x['household_net_income_r'].values, x['weight'].values)
    })
).reset_index()

print(df_outputs.head(10))

   congressional_district_geoid  median_household_net_income  \
0                           101                 57237.257812   
1                           102                 57037.121094   
2                           103                 61871.488281   
3                           104                 66146.343750   
4                           105                 64440.023438   
5                           106                 66146.343750   
6                           107                 57237.257812   
7                           201                 64558.355469   
8                           401                 96304.296875   
9                           402                 66146.343750   

   median_household_net_income_r  
0                   57237.257812  
1                   57037.121094  
2                   61871.488281  
3                   66146.343750  
4                   64440.023438  
5                   66146.343750  
6                   57237.257812  
7              