In [30]:
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform
import pandas as pd

baseline = Microsimulation(dataset="hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5")

In [2]:
reform = Reform.from_dict({
  "gov.aca.ptc_phase_out_rate[0].amount": {
    "2026-01-01.2100-12-31": 0
  },
  "gov.aca.ptc_phase_out_rate[1].amount": {
    "2025-01-01.2100-12-31": 0
  },
  "gov.aca.ptc_phase_out_rate[2].amount": {
    "2026-01-01.2100-12-31": 0
  },
  "gov.aca.ptc_phase_out_rate[3].amount": {
    "2026-01-01.2100-12-31": 0.02
  },
  "gov.aca.ptc_phase_out_rate[4].amount": {
    "2026-01-01.2100-12-31": 0.04
  },
  "gov.aca.ptc_phase_out_rate[5].amount": {
    "2026-01-01.2100-12-31": 0.06
  },
  "gov.aca.ptc_phase_out_rate[6].amount": {
    "2026-01-01.2100-12-31": 0.085
  },
  "gov.aca.ptc_income_eligibility[2].amount": {
    "2026-01-01.2100-12-31": True
  }
}, country_id="us")




In [3]:
baseline = Microsimulation(dataset="hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5")
reformed = Microsimulation(reform=reform, dataset="hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5")
weights = baseline.calculate("household_weight", period=2024)



In [4]:
baseline_aca_eligible = baseline.calculate("is_aca_ptc_eligible", map_to="tax_unit", period=2026).sum()
baseline_aca_eligible/1e6

31.522985547331057

In [5]:
baseline_aca_enrollment = baseline.calculate("takes_up_aca_if_eligible", map_to="person", period=2026).sum()
baseline_aca_enrollment/1e6

215.80671373160038

In [7]:
period = 2026

# ── Tax-unit flags, broadcast to people ──────────────────────────────────────
takes_up_r = reformed.calculate("takes_up_aca_if_eligible",
                         map_to="person", period=period)        # 0/1
aca_ptc_r  = reformed.calculate("aca_ptc",
                         map_to="person", period=period)        # $ amount

# ── PERSON weights (pick any person-level variable) ─────────────────────────
person_wt_r = reformed.calculate("age", map_to="person", period=period).weights

# ── Build mask & sum weights ────────────────────────────────────────────────
mask = (takes_up_r == 1) & (aca_ptc_r > 0)

people_with_ptc_takeup_wtd_r = (mask.astype(float) * person_wt_r).sum()

print(f"{people_with_ptc_takeup_wtd_r:,.0f} weighted people live in tax units "
      "that take up Marketplace coverage and actually receive a PTC.")


21,627,010 weighted people live in tax units that take up Marketplace coverage and actually receive a PTC.


In [8]:
period = 2026
sim    = baseline

# ── Tax-unit flags, broadcast to people ──────────────────────────────────────
takes_up = sim.calculate("takes_up_aca_if_eligible",
                         map_to="person", period=period)        # 0/1
aca_ptc  = sim.calculate("aca_ptc",
                         map_to="person", period=period)        # $ amount

# ── PERSON weights (pick any person-level variable) ─────────────────────────
person_wt = sim.calculate("age", map_to="person", period=period).weights

# ── Build mask & sum weights ────────────────────────────────────────────────
mask = (takes_up == 1) & (aca_ptc > 0)

people_with_ptc_takeup_wtd = (mask.astype(float) * person_wt).sum()

print(f"{people_with_ptc_takeup_wtd:,.0f} weighted people live in tax units "
      "that take up Marketplace coverage and actually receive a PTC.")


16,264,243 weighted people live in tax units that take up Marketplace coverage and actually receive a PTC.


In [9]:
year = 2026
state = baseline.calculate("state_code", map_to="household", period=year)
num_dependents = baseline.calculate("tax_unit_dependents", map_to="household", period=year)
married = baseline.calculate("is_married", map_to="household", period=year)
employment_income = baseline.calculate("employment_income", map_to="household", period=year)
self_employment_income = baseline.calculate("self_employment_income", map_to="household", period=year)
aca_baseline = baseline.calculate("aca_ptc", map_to="household", period=year)
rating_area = baseline.calculate("slcsp_rating_area", map_to="household", period=year)
household_id = baseline.calculate("household_id", map_to="household", period=year)
aca_reform = reformed.calculate("aca_ptc", map_to="household", period=year)

In [10]:
# Create a DataFrame with the outputs
data = {
    "household_id": household_id,
    "State": state,
    "Married": married,
    "Num_Dependents": num_dependents,
    "Employment_Income": employment_income,
    "aca_baseline": aca_baseline,
    "aca_reform": aca_reform,

   }


df_outputs = pd.DataFrame(data)
df_outputs[df_outputs['household_id'] == 4428]


Unnamed: 0,household_id,State,Married,Num_Dependents,Employment_Income,aca_baseline,aca_reform


In [11]:
# -------------------------------------------------------------
# 0️⃣  Make sure the CPS household weight is in the DataFrame
# -------------------------------------------------------------
# If you already stuffed it in earlier, skip this.
df_outputs["weight"] = aca_baseline.weights    # aligns by household_id

# -------------------------------------------------------------
# 1️⃣  Define a weight threshold for “reasonably representative”
# -------------------------------------------------------------
MIN_WT = 10_000          # ↖ try 5_000 if you want a looser cut

df_big = df_outputs[df_outputs["weight"] >= MIN_WT].copy()

# -------------------------------------------------------------
# 2️⃣  Net PTC change and (optionally) weighted national impact
# -------------------------------------------------------------
df_big["net_change"] = df_big["aca_reform"] - df_big["aca_baseline"]
df_big["wt_change"]  = df_big["net_change"] * df_big["weight"]  # national $ impact

# -------------------------------------------------------------
# 3️⃣  Biggest ↑ increases and ↓ decreases, LIMITED to big-weight HHs
# -------------------------------------------------------------
N = 10   # how many households to show in each direction

cols = ["household_id", "State", "weight", "net_change", "wt_change"]

top_increases = df_big.nlargest(N, "net_change")[cols]
top_decreases = df_big.nsmallest(N, "net_change")[cols]

print("Most positive net-income changes (PTC boosts):")
display(top_increases)

print("\nMost negative net-income changes (PTC cuts):")
display(top_decreases)


Most positive net-income changes (PTC boosts):


Unnamed: 0,household_id,State,weight,net_change,wt_change
20399,169645,CA,13258.06543,15820.259766,209746000.0
4458,25686,MO,28209.792969,14617.822266,412365700.0
9783,65525,TX,98229.8125,13276.694336,1304167000.0
8078,52185,AL,22522.595703,12071.033447,271871000.0
16922,130961,GA,47297.285156,10316.983887,487965300.0
17319,134228,FL,486767.6875,9320.427734,4536883000.0
20,225,ME,13571.901367,8256.048584,112050300.0
8721,57982,LA,22817.617188,7391.458984,168655500.0
10806,75372,UT,22297.890625,6740.130859,150290700.0
18645,149507,TX,17901.060547,6156.899414,110215000.0



Most negative net-income changes (PTC cuts):


Unnamed: 0,household_id,State,weight,net_change,wt_change
1,24,ME,28454.318359,0.0,0.0
4,39,ME,29125.925781,0.0,0.0
6,45,ME,28193.009766,0.0,0.0
9,93,ME,19098.552734,0.0,0.0
12,114,ME,15778.462891,0.0,0.0
16,154,ME,43222.703125,0.0,0.0
19,218,ME,25887.748047,0.0,0.0
23,238,ME,24463.904297,0.0,0.0
29,312,ME,10572.973633,0.0,0.0
30,316,ME,10667.022461,0.0,0.0


In [12]:
df_outputs[df_outputs['household_id'] == 4428]


Unnamed: 0,household_id,State,Married,Num_Dependents,Employment_Income,aca_baseline,aca_reform,weight


In [13]:
# 0. Make sure net_change exists
df_outputs["net_change"] = df_outputs["aca_reform"] - df_outputs["aca_baseline"]

# 1. Flag households with any change
mask = df_outputs["net_change"] != 0          # True for ↑ or ↓

# 2. Weighted mean among those households
avg_net_change = (
    (df_outputs.loc[mask, "net_change"] * df_outputs.loc[mask, "weight"]).sum()
    / df_outputs.loc[mask, "weight"].sum()
)

print(f"Average weighted PTC change among households with any change: "
      f"${avg_net_change:,.2f}")


Average weighted PTC change among households with any change: $2,255.30


In [14]:
# ------------------------------------------------------------------
# 0.  Ensure supporting columns exist
# ------------------------------------------------------------------
df_outputs["net_change"] = df_outputs["aca_reform"] - df_outputs["aca_baseline"]

# ------------------------------------------------------------------
# 1.  Keep only households with a PTC in *both* scenarios
# ------------------------------------------------------------------
mask_both_ptc = (df_outputs["aca_baseline"] > 0) & (df_outputs["aca_reform"] > 0)
df_dual_ptc   = df_outputs[mask_both_ptc]

# ------------------------------------------------------------------
# 2.  Weighted average of the net change (household perspective)
# ------------------------------------------------------------------
avg_net_change_dual_hh = (
    (df_dual_ptc["net_change"] * df_dual_ptc["weight"]).sum()
    / df_dual_ptc["weight"].sum()
)

print(f"Average weighted PTC change among households with a PTC in both "
      f"baseline and reform: ${avg_net_change_dual_hh:,.2f}")


Average weighted PTC change among households with a PTC in both baseline and reform: $1,720.00


In [15]:
# ------------------------------------------------------------------
# 0.  Ensure supporting columns exist (already done above)
# ------------------------------------------------------------------
df_outputs["net_change"] = df_outputs["aca_reform"] - df_outputs["aca_baseline"]

# ------------------------------------------------------------------
# 1.  Keep only households that *gain* a PTC (reform > 0, baseline == 0)
# ------------------------------------------------------------------
mask_reform_only = (df_outputs["aca_baseline"] == 0) & (df_outputs["aca_reform"] > 0)
df_reform_only   = df_outputs[mask_reform_only]

# ------------------------------------------------------------------
# 2.  Weighted average of the net change (household perspective)
# ------------------------------------------------------------------
avg_net_change_reform_only_hh = (
    (df_reform_only["net_change"] * df_reform_only["weight"]).sum()
    / df_reform_only["weight"].sum()
)

print(f"Average weighted PTC change among households that newly receive a PTC "
      f"under the reform: ${avg_net_change_reform_only_hh:,.2f}")


Average weighted PTC change among households that newly receive a PTC under the reform: $3,958.29


In [16]:
import numpy as np
from policyengine_us import Simulation

# -------------------------------
# 1. Pull household-level results
# -------------------------------
# ACA PTC (baseline and reform)
ptc_base   = baseline.calculate("aca_ptc", map_to="household", period=2026)
ptc_reform = reformed.calculate("aca_ptc",   map_to="household", period=2026)

# Household weights (same for both sims)
hh_wt      = baseline.calculate("household_weight", map_to="household", period=2026)

# -------------------------------
# 2. Weighted sum of the change
# -------------------------------
weighted_total_change = ptc_reform - ptc_base

# Optional: average change per household
weighted_total_change.sum()/1e9

29.94559487581596

In [17]:
# Let's analyze the households affected by the ACA reform
import pandas as pd
import numpy as np

# First, let's look at households that gain PTC under reform but had none in baseline
gained_ptc = df_outputs[(df_outputs['aca_baseline'] == 0) & (df_outputs['aca_reform'] > 0)]

print(f"Number of households gaining PTC under reform: {len(gained_ptc)}")
print(f"Weighted count: {gained_ptc['weight'].sum():,.0f}")
print(f"\nAverage reform PTC for these households: ${gained_ptc['aca_reform'].mean():,.2f}")
print(f"Weighted average reform PTC: ${(gained_ptc['aca_reform'] * gained_ptc['weight']).sum() / gained_ptc['weight'].sum():,.2f}")

Number of households gaining PTC under reform: 735
Weighted count: 3,175,484

Average reform PTC for these households: $5,537.77
Weighted average reform PTC: $3,958.29


In [18]:
# Let's look at income distribution of households gaining PTC
import matplotlib.pyplot as plt

# Add income deciles to the gained_ptc dataframe
gained_ptc_with_income = gained_ptc.copy()

# Calculate weighted income percentiles for context
income_percentiles = np.percentile(df_outputs['Employment_Income'], [25, 50, 75, 90, 95])
print("Income percentiles across all households:")
for i, pct in enumerate([25, 50, 75, 90, 95]):
    print(f"  {pct}th percentile: ${income_percentiles[i]:,.0f}")

# Show income distribution of households gaining PTC
print("\nIncome distribution of households GAINING PTC under reform:")
print(gained_ptc_with_income['Employment_Income'].describe())

# Show top 10 households by PTC gain amount
print("\nTop 10 households by PTC gain (sorted by reform PTC amount):")
top_gainers = gained_ptc_with_income.nlargest(10, 'aca_reform')[['household_id', 'State', 'Employment_Income', 'aca_reform', 'Married', 'Num_Dependents', 'weight']]
top_gainers

Income percentiles across all households:
  25th percentile: $8,652
  50th percentile: $58,135
  75th percentile: $120,658
  90th percentile: $207,460
  95th percentile: $286,710

Income distribution of households GAINING PTC under reform:
count       735.000000
mean     112912.632981
std       74678.682403
min           0.000000
25%       69266.516724
50%      106398.339844
75%      146054.972656
max      673665.558594
Name: Employment_Income, dtype: float64

Top 10 households by PTC gain (sorted by reform PTC amount):


Unnamed: 0,household_id,State,Employment_Income,aca_reform,Married,Num_Dependents,weight
20341,169286,CA,62109.420319,32558.427734,1.0,1.0,0.000828
5649,36746,WV,98720.109375,29152.945312,1.0,3.0,2988.435059
20840,173746,CA,105312.605469,28387.611328,1.0,2.0,0.000374
17529,136235,FL,53152.341797,26275.839844,1.0,1.0,0.003841
11669,80258,CA,27422.251953,25611.121094,1.0,0.0,432.676544
20327,169178,CA,148016.669434,23795.818359,1.0,0.0,0.004597
403,4526,MA,377878.640625,23117.976562,1.0,0.0,800.343323
2898,18181,IL,12065.791016,22556.402344,1.0,0.0,853.170044
3115,19282,IL,186473.511719,21926.943359,1.0,0.0,32.112518
12540,85894,CA,131626.8125,21483.894531,1.0,2.0,2954.752686


In [19]:
# Let's continue without matplotlib
# Add income deciles to the gained_ptc dataframe
gained_ptc_with_income = gained_ptc.copy()

# Calculate weighted income percentiles for context
income_percentiles = np.percentile(df_outputs['Employment_Income'], [25, 50, 75, 90, 95])
print("Income percentiles across all households:")
for i, pct in enumerate([25, 50, 75, 90, 95]):
    print(f"  {pct}th percentile: ${income_percentiles[i]:,.0f}")

print("\n" + "="*60)
print("Income distribution of households GAINING PTC under reform:")
print("="*60)
print(gained_ptc_with_income['Employment_Income'].describe())

print("\n" + "="*60)
print("Top 10 households by PTC gain (sorted by reform PTC amount):")
print("="*60)
top_gainers = gained_ptc_with_income.nlargest(10, 'aca_reform')[['household_id', 'State', 'Employment_Income', 'aca_reform', 'Married', 'Num_Dependents', 'weight']]
display(top_gainers)

Income percentiles across all households:
  25th percentile: $8,652
  50th percentile: $58,135
  75th percentile: $120,658
  90th percentile: $207,460
  95th percentile: $286,710

Income distribution of households GAINING PTC under reform:
count       735.000000
mean     112912.632981
std       74678.682403
min           0.000000
25%       69266.516724
50%      106398.339844
75%      146054.972656
max      673665.558594
Name: Employment_Income, dtype: float64

Top 10 households by PTC gain (sorted by reform PTC amount):


Unnamed: 0,household_id,State,Employment_Income,aca_reform,Married,Num_Dependents,weight
20341,169286,CA,62109.420319,32558.427734,1.0,1.0,0.000828
5649,36746,WV,98720.109375,29152.945312,1.0,3.0,2988.435059
20840,173746,CA,105312.605469,28387.611328,1.0,2.0,0.000374
17529,136235,FL,53152.341797,26275.839844,1.0,1.0,0.003841
11669,80258,CA,27422.251953,25611.121094,1.0,0.0,432.676544
20327,169178,CA,148016.669434,23795.818359,1.0,0.0,0.004597
403,4526,MA,377878.640625,23117.976562,1.0,0.0,800.343323
2898,18181,IL,12065.791016,22556.402344,1.0,0.0,853.170044
3115,19282,IL,186473.511719,21926.943359,1.0,0.0,32.112518
12540,85894,CA,131626.8125,21483.894531,1.0,2.0,2954.752686


In [20]:
# Let's continue without matplotlib
# Add income deciles to the gained_ptc dataframe
gained_ptc_with_income = gained_ptc.copy()

# Calculate weighted income percentiles for context
income_percentiles = np.percentile(df_outputs['Employment_Income'], [25, 50, 75, 90, 95])
print("Income percentiles across all households:")
for i, pct in enumerate([25, 50, 75, 90, 95]):
    print(f"  {pct}th percentile: ${income_percentiles[i]:,.0f}")

print("\n" + "="*60)
print("Income distribution of households GAINING PTC under reform:")
print("="*60)
print(gained_ptc_with_income['Employment_Income'].describe())

print("\n" + "="*60)
print("Top 10 households by PTC gain (sorted by reform PTC amount):")
print("="*60)
top_gainers = gained_ptc_with_income.nlargest(10, 'aca_reform')[['household_id', 'State', 'Employment_Income', 'aca_reform', 'Married', 'Num_Dependents', 'weight']]
top_gainers

Income percentiles across all households:
  25th percentile: $8,652
  50th percentile: $58,135
  75th percentile: $120,658
  90th percentile: $207,460
  95th percentile: $286,710

Income distribution of households GAINING PTC under reform:
count       735.000000
mean     112912.632981
std       74678.682403
min           0.000000
25%       69266.516724
50%      106398.339844
75%      146054.972656
max      673665.558594
Name: Employment_Income, dtype: float64

Top 10 households by PTC gain (sorted by reform PTC amount):


Unnamed: 0,household_id,State,Employment_Income,aca_reform,Married,Num_Dependents,weight
20341,169286,CA,62109.420319,32558.427734,1.0,1.0,0.000828
5649,36746,WV,98720.109375,29152.945312,1.0,3.0,2988.435059
20840,173746,CA,105312.605469,28387.611328,1.0,2.0,0.000374
17529,136235,FL,53152.341797,26275.839844,1.0,1.0,0.003841
11669,80258,CA,27422.251953,25611.121094,1.0,0.0,432.676544
20327,169178,CA,148016.669434,23795.818359,1.0,0.0,0.004597
403,4526,MA,377878.640625,23117.976562,1.0,0.0,800.343323
2898,18181,IL,12065.791016,22556.402344,1.0,0.0,853.170044
3115,19282,IL,186473.511719,21926.943359,1.0,0.0,32.112518
12540,85894,CA,131626.8125,21483.894531,1.0,2.0,2954.752686


In [21]:
# Now let's look at households losing PTC or seeing reduced PTC
lost_or_reduced = df_outputs[(df_outputs['aca_baseline'] > 0) & (df_outputs['net_change'] < 0)]

print("Households LOSING or SEEING REDUCED PTC:")
print(f"Number of households: {len(lost_or_reduced)}")
print(f"Weighted count: {lost_or_reduced['weight'].sum():,.0f}")
print(f"\nAverage baseline PTC: ${lost_or_reduced['aca_baseline'].mean():,.2f}")
print(f"Average reform PTC: ${lost_or_reduced['aca_reform'].mean():,.2f}")
print(f"Average loss: ${lost_or_reduced['net_change'].mean():,.2f}")

# Income distribution
print("\nIncome distribution of households losing PTC benefits:")
print(lost_or_reduced['Employment_Income'].describe())

# Top losers
print("\nTop 10 households by PTC loss:")
top_losers = lost_or_reduced.nsmallest(10, 'net_change')[['household_id', 'State', 'Employment_Income', 'aca_baseline', 'aca_reform', 'net_change', 'weight']]
top_losers

Households LOSING or SEEING REDUCED PTC:
Number of households: 0
Weighted count: 0

Average baseline PTC: $nan
Average reform PTC: $nan
Average loss: $nan

Income distribution of households losing PTC benefits:
count    0.0
mean     NaN
std      NaN
min      NaN
25%      NaN
50%      NaN
75%      NaN
max      NaN
Name: Employment_Income, dtype: float64

Top 10 households by PTC loss:


Unnamed: 0,household_id,State,Employment_Income,aca_baseline,aca_reform,net_change,weight


In [22]:
# Interesting - no households lose PTC! Let's look at those who keep their PTC but see changes
kept_ptc = df_outputs[(df_outputs['aca_baseline'] > 0) & (df_outputs['aca_reform'] > 0)]

print("Households with PTC in BOTH baseline and reform:")
print(f"Number of households: {len(kept_ptc)}")
print(f"Weighted count: {kept_ptc['weight'].sum():,.0f}")
print(f"\nAverage baseline PTC: ${kept_ptc['aca_baseline'].mean():,.2f}")
print(f"Average reform PTC: ${kept_ptc['aca_reform'].mean():,.2f}")
print(f"Average change: ${kept_ptc['net_change'].mean():,.2f}")

# Show distribution of changes
print("\nDistribution of PTC changes for households with PTC in both scenarios:")
print(kept_ptc['net_change'].describe())

# Households with biggest increases among those who already had PTC
print("\nTop 10 PTC increases among households who already had PTC:")
top_increases = kept_ptc.nlargest(10, 'net_change')[['household_id', 'State', 'Employment_Income', 'aca_baseline', 'aca_reform', 'net_change', 'weight']]
top_increases

Households with PTC in BOTH baseline and reform:
Number of households: 2364
Weighted count: 10,102,365

Average baseline PTC: $7,835.31
Average reform PTC: $9,869.01
Average change: $2,033.70

Distribution of PTC changes for households with PTC in both scenarios:
count     2364.000000
mean      2033.699509
std       1636.354126
min        432.478516
25%       1331.057434
50%       1614.754150
75%       2295.041748
max      24952.986328
Name: net_change, dtype: float64

Top 10 PTC increases among households who already had PTC:


Unnamed: 0,household_id,State,Employment_Income,aca_baseline,aca_reform,net_change,weight
9799,65975,MT,137111.265625,4337.34668,29290.333008,24952.986328,710.749634
20290,168857,CA,161008.552246,2932.049805,25600.066895,22668.01709,0.00123
362,4364,MA,186471.316406,3176.394043,23121.227539,19944.833496,2532.372803
19676,162981,AZ,165290.254883,10109.242188,28835.799805,18726.557617,0.004784
8810,58746,OK,32906.703125,4504.324219,22193.167969,17688.84375,875.337952
16643,128525,NC,156843.703125,2679.212402,19748.850586,17069.638184,0.000406
12583,86144,CA,68007.185547,3882.245605,20394.421387,16512.175781,1215.460083
20689,172472,CA,294561.900269,3027.248291,19236.569824,16209.321533,0.023022
20581,171446,CA,212218.455322,6542.669434,21250.246094,14707.57666,8.7e-05
15666,114436,MO,65755.822266,6209.318848,19989.151855,13779.833008,9.5e-05


In [23]:
# Let's calculate approximate FPL levels for households to understand where they fall
# 2026 FPL estimates (rough approximations based on current trends)
fpl_2026 = {
    1: 15570,   # Single person
    2: 21130,   # Couple
    3: 26650,   # Family of 3
    4: 32200,   # Family of 4
    5: 37750,   # Family of 5
    6: 43300,   # Family of 6
    7: 48850,   # Family of 7
    8: 54400,   # Family of 8
}

# Calculate household size and FPL ratio
df_outputs['household_size'] = 2 + df_outputs['Num_Dependents']  # Assuming married couples or singles with deps
df_outputs['household_size'] = df_outputs.apply(
    lambda row: (1 + row['Married'] + row['Num_Dependents']) if not pd.isna(row['Married']) else 1,
    axis=1
)

# Map FPL based on household size
df_outputs['fpl_threshold'] = df_outputs['household_size'].map(lambda x: fpl_2026.get(min(int(x), 8), 54400))
df_outputs['fpl_ratio'] = (df_outputs['Employment_Income'] / df_outputs['fpl_threshold']) * 100

# Now let's analyze the cliff effect around 400% FPL
print("="*70)
print("ANALYSIS OF THE 400% FPL CLIFF EFFECT")
print("="*70)

# Households just below and above 400% FPL
near_cliff = df_outputs[(df_outputs['fpl_ratio'] >= 350) & (df_outputs['fpl_ratio'] <= 450)]
print(f"\nHouseholds between 350-450% FPL: {len(near_cliff)}")
print(f"Weighted count: {near_cliff['weight'].sum():,.0f}")

# Split by those above and below 400% FPL
below_400 = near_cliff[near_cliff['fpl_ratio'] <= 400]
above_400 = near_cliff[near_cliff['fpl_ratio'] > 400]

print(f"\nBelow 400% FPL (350-400%): {len(below_400)} households")
print(f"  Average baseline PTC: ${below_400['aca_baseline'].mean():,.2f}")
print(f"  Average reform PTC: ${below_400['aca_reform'].mean():,.2f}")
print(f"  Average change: ${below_400['net_change'].mean():,.2f}")

print(f"\nAbove 400% FPL (400-450%): {len(above_400)} households")
print(f"  Average baseline PTC: ${above_400['aca_baseline'].mean():,.2f}")
print(f"  Average reform PTC: ${above_400['aca_reform'].mean():,.2f}")
print(f"  Average change: ${above_400['net_change'].mean():,.2f}")

# Show some examples
print("\n" + "="*70)
print("EXAMPLE HOUSEHOLDS AT THE CLIFF (395-405% FPL):")
print("="*70)
cliff_examples = df_outputs[(df_outputs['fpl_ratio'] >= 395) & (df_outputs['fpl_ratio'] <= 405)]
cliff_examples_display = cliff_examples[['household_id', 'State', 'Employment_Income', 'fpl_ratio', 
                                         'aca_baseline', 'aca_reform', 'net_change', 'weight']].head(10)
cliff_examples_display

ANALYSIS OF THE 400% FPL CLIFF EFFECT

Households between 350-450% FPL: 1970
Weighted count: 12,810,206

Below 400% FPL (350-400%): 998 households
  Average baseline PTC: $1,222.29
  Average reform PTC: $1,694.37
  Average change: $472.07

Above 400% FPL (400-450%): 972 households
  Average baseline PTC: $697.03
  Average reform PTC: $1,504.48
  Average change: $807.45

EXAMPLE HOUSEHOLDS AT THE CLIFF (395-405% FPL):


Unnamed: 0,household_id,State,Employment_Income,fpl_ratio,aca_baseline,aca_reform,net_change,weight
408,4553,MA,85557.425781,404.909729,8567.232422,9705.416992,1138.18457,1038.525269
423,4625,MA,85557.429688,404.909748,0.0,0.0,0.0,3414.69873
483,4947,MA,151370.828125,400.982326,0.0,0.0,0.0,259.656281
764,6696,CT,84460.539062,399.718595,0.0,0.0,0.0,579.064209
1198,9218,NY,106398.335938,399.243287,0.0,5805.855469,5805.855469,1150.545044
1291,9721,NY,62522.734375,401.558988,0.0,1747.408203,1747.408203,1066.17627
1314,9858,NY,127239.246094,395.152938,0.0,0.0,0.0,409.347443
1506,10965,NJ,174405.53125,402.784137,0.0,0.0,0.0,83.79158
1679,11953,NJ,128994.277344,400.603346,0.0,0.0,0.0,161909.40625
1849,12884,PA,105301.453125,395.127404,0.0,0.0,0.0,372.428925


In [24]:
# Let's look more specifically at the income deciles to see where the cliff effect shows up
# Calculate income deciles
df_outputs['income_decile'] = pd.qcut(df_outputs['Employment_Income'], 10, labels=range(1, 11), duplicates='drop')

# Group by decile and show the effect
decile_analysis = df_outputs.groupby('income_decile').agg({
    'Employment_Income': ['min', 'max', 'mean'],
    'fpl_ratio': 'mean',
    'aca_baseline': 'mean',
    'aca_reform': 'mean',
    'net_change': 'mean',
    'weight': 'sum'
}).round(2)

print("="*70)
print("PTC EFFECTS BY INCOME DECILE")
print("="*70)
print("\nIncome ranges and average PTC changes by decile:")
decile_analysis

ValueError: Bin labels must be one fewer than the number of bin edges

In [25]:
# Fix the decile calculation
# Calculate income deciles without explicit labels to avoid the error
df_outputs['income_decile'] = pd.qcut(df_outputs['Employment_Income'], 10, duplicates='drop')

# Get unique deciles and sort them
deciles = sorted(df_outputs['income_decile'].unique())

# Create a mapping to simpler labels
decile_map = {d: i+1 for i, d in enumerate(deciles)}
df_outputs['decile_num'] = df_outputs['income_decile'].map(decile_map)

# Group by decile and show the effect
decile_analysis = df_outputs.groupby('decile_num').agg({
    'Employment_Income': ['min', 'max', 'mean'],
    'fpl_ratio': 'mean',
    'aca_baseline': 'mean',
    'aca_reform': 'mean',
    'net_change': 'mean',
    'weight': 'sum'
}).round(2)

print("="*70)
print("PTC EFFECTS BY INCOME DECILE")
print("="*70)
print("\nIncome ranges and average PTC changes by decile:")
decile_analysis

PTC EFFECTS BY INCOME DECILE

Income ranges and average PTC changes by decile:


Unnamed: 0_level_0,Employment_Income,Employment_Income,Employment_Income,fpl_ratio,aca_baseline,aca_reform,net_change,weight
Unnamed: 0_level_1,min,max,mean,mean,mean,mean,mean,sum
decile_num,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,0.0,21614.79,2986.25,16.03,544.78,692.51,147.73,56188157.74
2,21627.38,38932.65,30500.68,160.76,1461.81,1780.01,318.2,14524743.72
3,38932.66,58135.17,48704.81,249.33,1497.31,1976.75,479.43,13755435.36
4,58135.17,78976.09,68482.6,335.51,1372.72,2015.76,643.04,14101366.32
5,78976.09,105301.45,91639.26,426.03,1232.75,1864.86,632.11,11118736.15
6,105301.45,142595.71,121891.08,546.14,780.47,1552.48,772.01,10824528.05
7,142595.71,207312.23,168497.18,725.7,552.89,1191.69,638.81,13472984.82
8,207805.83,3337282.44,389332.98,1570.64,242.71,522.1,279.39,11362332.47


In [None]:
# Let's look at where in the data the 9th decile falls (the one from the chart)
# Since we only have 8 groups due to duplicates being dropped, let's recalculate properly

# First, let's understand the actual income distribution better
print("="*70)
print("UNDERSTANDING THE 9TH DECILE CONCENTRATION")
print("="*70)

# Get percentiles to understand income distribution
percentiles = [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99]
income_pcts = np.percentile(df_outputs['Employment_Income'], percentiles)

print("\nIncome distribution percentiles:")
for p, val in zip(percentiles, income_pcts):
    print(f"  {p}th percentile: ${val:,.0f}")

# The 9th decile should be roughly between 80th and 90th percentile
ninth_decile = df_outputs[(df_outputs['Employment_Income'] >= income_pcts[7]) & 
                          (df_outputs['Employment_Income'] < income_pcts[8])]

print(f"\n9th Decile (80-90th percentile):")
print(f"  Income range: ${income_pcts[7]:,.0f} - ${income_pcts[8]:,.0f}")
print(f"  Number of households: {len(ninth_decile)}")
print(f"  Weighted count: {ninth_decile['weight'].sum():,.0f}")
print(f"  Average FPL ratio: {ninth_decile['fpl_ratio'].mean():.1f}%")
print(f"  Average baseline PTC: ${ninth_decile['aca_baseline'].mean():,.2f}")
print(f"  Average reform PTC: ${ninth_decile['aca_reform'].mean():,.2f}")
print(f"  Average change: ${ninth_decile['net_change'].mean():,.2f}")

# Now let's see WHO specifically gains in the 9th decile
ninth_decile_gainers = ninth_decile[ninth_decile['net_change'] > 100]  # Gains more than $100

print(f"\nHouseholds in 9th decile with gains > $100:")
print(f"  Count: {len(ninth_decile_gainers)}")
print(f"  Average income: ${ninth_decile_gainers['Employment_Income'].mean():,.0f}")
print(f"  Average FPL ratio: {ninth_decile_gainers['fpl_ratio'].mean():.1f}%")
print(f"  Average gain: ${ninth_decile_gainers['net_change'].mean():,.2f}")

# Look at specific examples
print("\nExample households in 9th decile with large gains:")
examples = ninth_decile_gainers.nlargest(5, 'net_change')[
    ['household_id', 'State', 'Employment_Income', 'fpl_ratio', 
     'aca_baseline', 'aca_reform', 'net_change', 'Married', 'Num_Dependents']
]
examples

In [27]:
# Check if the dataframe exists and has the needed columns
print("Checking available variables:")
print(f"df_outputs exists: {'df_outputs' in locals()}")
if 'df_outputs' in locals():
    print(f"df_outputs shape: {df_outputs.shape}")
    print(f"Columns: {list(df_outputs.columns)}")

Checking available variables:
df_outputs exists: True
df_outputs shape: (21108, 14)
Columns: ['household_id', 'State', 'Married', 'Num_Dependents', 'Employment_Income', 'aca_baseline', 'aca_reform', 'weight', 'net_change', 'household_size', 'fpl_threshold', 'fpl_ratio', 'income_decile', 'decile_num']


In [28]:
# Understanding the 9th decile concentration
import numpy as np

# Get percentiles to understand income distribution
percentiles = [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99]
income_pcts = np.percentile(df_outputs['Employment_Income'], percentiles)

print("="*70)
print("UNDERSTANDING THE 9TH DECILE CONCENTRATION")
print("="*70)
print("\nIncome distribution percentiles:")
for p, val in zip(percentiles, income_pcts):
    print(f"  {p}th percentile: ${val:,.0f}")

UNDERSTANDING THE 9TH DECILE CONCENTRATION

Income distribution percentiles:
  10th percentile: $0
  20th percentile: $0
  30th percentile: $21,615
  40th percentile: $38,933
  50th percentile: $58,135
  60th percentile: $78,976
  70th percentile: $105,301
  80th percentile: $142,596
  90th percentile: $207,460
  95th percentile: $286,710
  99th percentile: $577,545
