In [1]:
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform
import pandas as pd
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
baseline = Microsimulation(dataset="hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5")


In [3]:
# Set random seed for reproducibility
np.random.seed(42)

# Get baseline values for each program
baseline_snap = baseline.calculate("snap", map_to="household", period=2026)
baseline_medicaid = baseline.calculate("medicaid", map_to="household", period=2026)
baseline_aca_ptc = baseline.calculate("aca_ptc", map_to="household", period=2026)

# Create copies to modify
baseline_snap_modified = baseline_snap.copy()
baseline_medicaid_modified = baseline_medicaid.copy()
baseline_aca_ptc_modified = baseline_aca_ptc.copy()

# For SNAP: randomly set 5% of positive values to 0
snap_recipients = baseline_snap_modified > 0
snap_indices = np.where(snap_recipients)[0]
snap_to_zero = np.random.choice(snap_indices, size=int(0.05 * len(snap_indices)), replace=False)
baseline_snap_modified[snap_to_zero] = 0

# For Medicaid: randomly set 5% of positive values to 0
medicaid_recipients = baseline_medicaid_modified > 0
medicaid_indices = np.where(medicaid_recipients)[0]
medicaid_to_zero = np.random.choice(medicaid_indices, size=int(0.05 * len(medicaid_indices)), replace=False)
baseline_medicaid_modified[medicaid_to_zero] = 0

# For ACA PTC: randomly set 5% of positive values to 0
aca_recipients = baseline_aca_ptc_modified > 0
aca_indices = np.where(aca_recipients)[0]
aca_to_zero = np.random.choice(aca_indices, size=int(0.05 * len(aca_indices)), replace=False)
baseline_aca_ptc_modified[aca_to_zero] = 0

In [4]:
household_data = pd.DataFrame({
    'Baseline SNAP': baseline_snap,
    'Reformed SNAP': baseline_snap_modified,
    'Baseline Medicaid': baseline_medicaid,
    'Reformed Medicaid': baseline_medicaid_modified,
    'Baseline ACA PTC': baseline_aca_ptc,
    'Reformed ACA PTC': baseline_aca_ptc_modified
})
# Add summary columns
household_data['Looses SNAP'] = (household_data['Baseline SNAP'] > 0) & (household_data['Reformed SNAP'] == 0)
household_data['Looses Medicaid'] = (household_data['Baseline Medicaid'] > 0) & (household_data['Reformed Medicaid'] == 0)
household_data['Looses ACA PTC'] = (household_data['Baseline ACA PTC'] > 0) & (household_data['Reformed ACA PTC'] == 0)


In [5]:
# Save to CSV
household_data.to_csv('household_benefits_reform.csv', index=False)


In [6]:
# Print summary statistics
print("Data saved to 'household_benefits_reform.csv'")
print(f"\nTotal households: {len(household_data):,}")
print("\nSummary of changes:")
print(f"SNAP benefits removed: {household_data['Looses SNAP'].sum():,} households")
print(f"Medicaid benefits removed: {household_data['Looses Medicaid'].sum():,} households")
print(f"ACA PTC benefits removed: {household_data['Looses ACA PTC'].sum():,} households")

# Display first few rows
print("\nFirst 10 rows of the data:")
print(household_data.head(10))

Data saved to 'household_benefits_reform.csv'

Total households: 41,310

Summary of changes:
SNAP benefits removed: 571 households
Medicaid benefits removed: 548 households
ACA PTC benefits removed: 242 households

First 10 rows of the data:
   household_id  Baseline SNAP  Reformed SNAP  Baseline Medicaid  \
0            12       0.000000       0.000000           0.000000   
1            21       0.000000       0.000000           0.000000   
2            22    3896.510529    3896.510529       11924.721680   
3            30     288.630402     288.630402       11924.721680   
4            36       0.000000       0.000000       11924.721680   
5            41       0.000000       0.000000           0.000000   
6            44    1524.380127    1524.380127       23849.443359   
7            68     731.480103     731.480103       11924.721680   
8            73       0.000000       0.000000           0.000000   
9            78       0.000000       0.000000           0.000000   

   Reform