In [None]:
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform

reform = Reform.from_dict({
  "gov.irs.deductions.itemized.salt_and_real_estate.cap.JOINT": {
    "2025-01-01.2100-12-31": 0
  },
  "gov.irs.deductions.itemized.salt_and_real_estate.cap.SINGLE": {
    "2025-01-01.2100-12-31": 0
  },
  "gov.irs.deductions.itemized.salt_and_real_estate.cap.SEPARATE": {
    "2025-01-01.2100-12-31": 0
  },
  "gov.irs.deductions.itemized.salt_and_real_estate.cap.SURVIVING_SPOUSE": {
    "2025-01-01.2100-12-31": 0
  },
  "gov.irs.deductions.itemized.salt_and_real_estate.cap.HEAD_OF_HOUSEHOLD": {
    "2025-01-01.2100-12-31": 0
  },
  "gov.irs.deductions.itemized.salt_and_real_estate.phase_out.floor.applies": {
    "2025-01-01.2029-12-31": False
  }
}, country_id="us")

In [None]:
# Congressional District Analysis - Setup
from policyengine_us import Microsimulation
import pandas as pd

print("Setting up congressional district microsimulations...")

In [None]:
# Create baseline microsimulation with congressional district data
cd_baseline = Microsimulation(dataset="hf://policyengine/test/sparse_cd_stacked_2023.h5")

# Get congressional district geoids and correct state_fips
cd_geoids = cd_baseline.calculate("congressional_district_geoid").values
correct_state_fips = cd_geoids // 100
cd_baseline.set_input("state_fips", 2023, correct_state_fips)

print("Baseline microsimulation ready")

In [None]:
# Create reform microsimulation 
cd_reformed = Microsimulation(dataset="hf://policyengine/test/sparse_cd_stacked_2023.h5", reform=reform)
cd_reformed.set_input("state_fips", 2023, correct_state_fips)

print("Reform microsimulation ready")

In [None]:
# Calculate baseline data - individual variables to save memory
print("Calculating baseline variables individually...")
household_ids = cd_baseline.calculate("household_id")
state_fips = cd_baseline.calculate("state_fips") 
cd_geoids = cd_baseline.calculate("congressional_district_geoid")
baseline_income = cd_baseline.calculate("household_net_income", period=2025)
baseline_tax = cd_baseline.calculate("income_tax", period=2025)
weights = cd_baseline.calculate("household_weight", period=2025)

print(f"Baseline variables calculated for {len(household_ids)} households")

In [None]:
# Calculate reform data - individual variables
print("Calculating reform variables...")
reform_income = cd_reformed.calculate("household_net_income", period=2025)
reform_tax = cd_reformed.calculate("income_tax", period=2025)

print(f"Reform variables calculated")

In [None]:
# Create DataFrame with calculated impacts
print("Creating impact dataframe...")
import pandas as pd

cd_data = pd.DataFrame({
    'household_id': household_ids,
    'state_fips': state_fips,
    'congressional_district_geoid': cd_geoids,
    'baseline_income': baseline_income,
    'reform_income': reform_income,
    'baseline_tax': baseline_tax,
    'reform_tax': reform_tax,
    'household_weight': weights
})

# Calculate impacts
cd_data['reform_impact'] = cd_data['reform_income'] - cd_data['baseline_income']
cd_data['tax_change'] = cd_data['reform_tax'] - cd_data['baseline_tax']
cd_data['district_number'] = cd_data['congressional_district_geoid'] % 100

print(f"Impact dataframe created with {len(cd_data)} households")

# Free memory
del household_ids, state_fips, cd_geoids, baseline_income, reform_income, baseline_tax, reform_tax, weights

In [None]:
# Group by congressional district and calculate weighted averages
print("Aggregating by congressional district...")

cd_summary = cd_data.groupby(['state_fips', 'congressional_district_geoid', 'district_number']).apply(
    lambda x: pd.Series({
        'avg_income_impact': (x['reform_impact'] * x['household_weight']).sum() / x['household_weight'].sum(),
        'avg_tax_change': (x['tax_change'] * x['household_weight']).sum() / x['household_weight'].sum(),
        'total_households': x['household_weight'].sum()
    })
).reset_index()

cd_summary = cd_summary.sort_values('avg_income_impact', ascending=False)

print(f"Summary created for {len(cd_summary)} congressional districts")

In [None]:
# Show top 10 districts
print("Top 10 Congressional Districts by Average Household Income Impact from Abolishing SALT Cap:")
print(cd_summary.head(10)[['state_fips', 'district_number', 'avg_income_impact', 'avg_tax_change', 'total_households']])

In [None]:
# Show bottom 10 districts  
print("Bottom 10 Congressional Districts (least benefit or most harm):")
print(cd_summary.tail(10)[['state_fips', 'district_number', 'avg_income_impact', 'avg_tax_change', 'total_households']])

In [None]:
# Overall statistics
print(f"Overall Statistics:")
print(f"Total Congressional Districts: {len(cd_summary)}")
print(f"Districts with positive income impact: {len(cd_summary[cd_summary['avg_income_impact'] > 0])}")
print(f"Districts with negative income impact: {len(cd_summary[cd_summary['avg_income_impact'] < 0])}")
print(f"Average impact across all districts: ${cd_summary['avg_income_impact'].mean():.2f}")
print(f"Median impact across all districts: ${cd_summary['avg_income_impact'].median():.2f}")

In [None]:
# Display the full summary dataset
cd_summary