In [1]:
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform
import pandas as pd

baseline = Microsimulation(dataset="/Users/daphnehansell/Documents/GitHub/analysis-notebooks/us/medicaid/enhanced_cps_2024.h5")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
reform = Reform.from_dict({
  "gov.aca.ptc_phase_out_rate[0].amount": {
    "2026-01-01.2100-12-31": 0
  },
  "gov.aca.ptc_phase_out_rate[1].amount": {
    "2025-01-01.2100-12-31": 0
  },
  "gov.aca.ptc_phase_out_rate[2].amount": {
    "2026-01-01.2100-12-31": 0
  },
  "gov.aca.ptc_phase_out_rate[3].amount": {
    "2026-01-01.2100-12-31": 0.02
  },
  "gov.aca.ptc_phase_out_rate[4].amount": {
    "2026-01-01.2100-12-31": 0.04
  },
  "gov.aca.ptc_phase_out_rate[5].amount": {
    "2026-01-01.2100-12-31": 0.06
  },
  "gov.aca.ptc_phase_out_rate[6].amount": {
    "2026-01-01.2100-12-31": 0.085
  },
  "gov.aca.ptc_income_eligibility[2].amount": {
    "2026-01-01.2100-12-31": True
  }
}, country_id="us")




In [3]:
baseline = Microsimulation(dataset="/Users/daphnehansell/Documents/GitHub/analysis-notebooks/us/medicaid/enhanced_cps_2024.h5")
reformed = Microsimulation(reform=reform, dataset="/Users/daphnehansell/Documents/GitHub/analysis-notebooks/us/medicaid/enhanced_cps_2024.h5")
weights = baseline.calculate("household_weight", period=2024)



In [4]:
baseline_aca_eligible = baseline.calculate("is_aca_ptc_eligible", map_to="tax_unit", period=2026).sum()
baseline_aca_eligible/1e6

37.008340397541666

In [5]:
baseline_aca_enrollment = baseline.calculate("takes_up_aca_if_eligible", map_to="person", period=2026).sum()
baseline_aca_enrollment/1e6

227.46342831824853

In [6]:
period = 2025
sim    = baseline

# ── Tax-unit flags, broadcast to people ──────────────────────────────────────
takes_up = sim.calculate("takes_up_aca_if_eligible",
                         map_to="person", period=period)        # 0/1
aca_ptc  = sim.calculate("aca_ptc",
                         map_to="person", period=period)        # $ amount

# ── PERSON weights (pick any person-level variable) ─────────────────────────
person_wt = sim.calculate("age", map_to="person", period=period).weights

# ── Build mask & sum weights ────────────────────────────────────────────────
mask = (takes_up == 1) & (aca_ptc > 0)

people_with_ptc_takeup_wtd = (mask.astype(float) * person_wt).sum()

print(f"{people_with_ptc_takeup_wtd:,.0f} weighted people live in tax units "
      "that take up Marketplace coverage and actually receive a PTC.")


197,799,923 weighted people live in tax units that take up Marketplace coverage and actually receive a PTC.


In [7]:
period = 2026

# ── Tax-unit flags, broadcast to people ──────────────────────────────────────
takes_up_r = reformed.calculate("takes_up_aca_if_eligible",
                         map_to="person", period=period)        # 0/1
aca_ptc_r  = reformed.calculate("aca_ptc",
                         map_to="person", period=period)        # $ amount

# ── PERSON weights (pick any person-level variable) ─────────────────────────
person_wt_r = reformed.calculate("age", map_to="person", period=period).weights

# ── Build mask & sum weights ────────────────────────────────────────────────
mask = (takes_up_r == 1) & (aca_ptc_r > 0)

people_with_ptc_takeup_wtd_r = (mask.astype(float) * person_wt_r).sum()

print(f"{people_with_ptc_takeup_wtd_r:,.0f} weighted people live in tax units "
      "that take up Marketplace coverage and actually receive a PTC.")


28,427,905 weighted people live in tax units that take up Marketplace coverage and actually receive a PTC.


In [8]:
period = 2026
sim    = baseline

# ── Tax-unit flags, broadcast to people ──────────────────────────────────────
takes_up = sim.calculate("takes_up_aca_if_eligible",
                         map_to="person", period=period)        # 0/1
aca_ptc  = sim.calculate("aca_ptc",
                         map_to="person", period=period)        # $ amount

# ── PERSON weights (pick any person-level variable) ─────────────────────────
person_wt = sim.calculate("age", map_to="person", period=period).weights

# ── Build mask & sum weights ────────────────────────────────────────────────
mask = (takes_up == 1) & (aca_ptc > 0)

people_with_ptc_takeup_wtd = (mask.astype(float) * person_wt).sum()

print(f"{people_with_ptc_takeup_wtd:,.0f} weighted people live in tax units "
      "that take up Marketplace coverage and actually receive a PTC.")


20,115,724 weighted people live in tax units that take up Marketplace coverage and actually receive a PTC.


In [9]:
year = 2026
state = baseline.calculate("state_code", map_to="household", period=year)
num_dependents = baseline.calculate("tax_unit_dependents", map_to="household", period=year)
married = baseline.calculate("is_married", map_to="household", period=year)
employment_income = baseline.calculate("employment_income", map_to="household", period=year)
self_employment_income = baseline.calculate("self_employment_income", map_to="household", period=year)
aca_baseline = baseline.calculate("aca_ptc", map_to="household", period=year)
rating_area = baseline.calculate("slcsp_rating_area", map_to="household", period=year)
household_id = baseline.calculate("household_id", map_to="household", period=year)
aca_reform = reformed.calculate("aca_ptc", map_to="household", period=year)

In [10]:
# Create a DataFrame with the outputs
data = {
    "household_id": household_id,
    "State": state,
    "Married": married,
    "Num_Dependents": num_dependents,
    "Employment_Income": employment_income,
    "aca_baseline": aca_baseline,
    "aca_reform": aca_reform,

   }


df_outputs = pd.DataFrame(data)
df_outputs[df_outputs['household_id'] == 4428]


Unnamed: 0,household_id,State,Married,Num_Dependents,Employment_Income,aca_baseline,aca_reform
600,4428,MA,1.0,4.0,52859.65625,0.0,0.0


In [11]:
# -------------------------------------------------------------
# 0️⃣  Make sure the CPS household weight is in the DataFrame
# -------------------------------------------------------------
# If you already stuffed it in earlier, skip this.
df_outputs["weight"] = aca_baseline.weights    # aligns by household_id

# -------------------------------------------------------------
# 1️⃣  Define a weight threshold for “reasonably representative”
# -------------------------------------------------------------
MIN_WT = 10_000          # ↖ try 5_000 if you want a looser cut

df_big = df_outputs[df_outputs["weight"] >= MIN_WT].copy()

# -------------------------------------------------------------
# 2️⃣  Net PTC change and (optionally) weighted national impact
# -------------------------------------------------------------
df_big["net_change"] = df_big["aca_reform"] - df_big["aca_baseline"]
df_big["wt_change"]  = df_big["net_change"] * df_big["weight"]  # national $ impact

# -------------------------------------------------------------
# 3️⃣  Biggest ↑ increases and ↓ decreases, LIMITED to big-weight HHs
# -------------------------------------------------------------
N = 10   # how many households to show in each direction

cols = ["household_id", "State", "weight", "net_change", "wt_change"]

top_increases = df_big.nlargest(N, "net_change")[cols]
top_decreases = df_big.nsmallest(N, "net_change")[cols]

print("Most positive net-income changes (PTC boosts):")
display(top_increases)

print("\nMost negative net-income changes (PTC cuts):")
display(top_decreases)


Most positive net-income changes (PTC boosts):


Unnamed: 0,household_id,State,weight,net_change,wt_change
6960,25327,MO,10561.736328,21569.876953,227815400.0
15170,63406,TX,36817.671875,20464.279297,753447100.0
11774,47863,FL,63711.28125,20171.796875,1285171000.0
11630,47452,FL,21600.607422,16787.021484,362609900.0
14377,60850,TX,24447.160156,14556.893555,355874700.0
10747,44495,FL,108407.132812,14096.426758,1528153000.0
31780,135336,FL,59593.371094,13948.032227,831210300.0
8987,38686,NC,13462.585938,12920.947266,173949400.0
18999,82008,CA,10598.22168,12066.46582,127883100.0
19620,83988,CA,15526.390625,11747.892578,182402400.0



Most negative net-income changes (PTC cuts):


Unnamed: 0,household_id,State,weight,net_change,wt_change
11,85,ME,24959.193359,0.0,0.0
15,99,ME,11115.519531,0.0,0.0
27,206,ME,19730.021484,0.0,0.0
30,261,ME,27242.337891,0.0,0.0
31,275,ME,22297.763672,0.0,0.0
32,284,ME,12969.742188,0.0,0.0
35,315,ME,15139.511719,0.0,0.0
39,330,ME,15473.058594,0.0,0.0
41,339,ME,20974.195312,0.0,0.0
44,356,ME,55298.234375,0.0,0.0


In [12]:
df_outputs[df_outputs['household_id'] == 4428]


Unnamed: 0,household_id,State,Married,Num_Dependents,Employment_Income,aca_baseline,aca_reform,weight
600,4428,MA,1.0,4.0,52859.65625,0.0,0.0,36551.855469


In [13]:
# 0. Make sure net_change exists
df_outputs["net_change"] = df_outputs["aca_reform"] - df_outputs["aca_baseline"]

# 1. Flag households with any change
mask = df_outputs["net_change"] != 0          # True for ↑ or ↓

# 2. Weighted mean among those households
avg_net_change = (
    (df_outputs.loc[mask, "net_change"] * df_outputs.loc[mask, "weight"]).sum()
    / df_outputs.loc[mask, "weight"].sum()
)

print(f"Average weighted PTC change among households with any change: "
      f"${avg_net_change:,.2f}")


Average weighted PTC change among households with any change: $2,666.88


In [14]:
# ------------------------------------------------------------------
# 0.  Ensure supporting columns exist
# ------------------------------------------------------------------
df_outputs["net_change"] = df_outputs["aca_reform"] - df_outputs["aca_baseline"]

# ------------------------------------------------------------------
# 1.  Keep only households with a PTC in *both* scenarios
# ------------------------------------------------------------------
mask_both_ptc = (df_outputs["aca_baseline"] > 0) & (df_outputs["aca_reform"] > 0)
df_dual_ptc   = df_outputs[mask_both_ptc]

# ------------------------------------------------------------------
# 2.  Weighted average of the net change (household perspective)
# ------------------------------------------------------------------
avg_net_change_dual_hh = (
    (df_dual_ptc["net_change"] * df_dual_ptc["weight"]).sum()
    / df_dual_ptc["weight"].sum()
)

print(f"Average weighted PTC change among households with a PTC in both "
      f"baseline and reform: ${avg_net_change_dual_hh:,.2f}")


Average weighted PTC change among households with a PTC in both baseline and reform: $1,687.69


In [15]:
# ------------------------------------------------------------------
# 0.  Ensure supporting columns exist (already done above)
# ------------------------------------------------------------------
df_outputs["net_change"] = df_outputs["aca_reform"] - df_outputs["aca_baseline"]

# ------------------------------------------------------------------
# 1.  Keep only households that *gain* a PTC (reform > 0, baseline == 0)
# ------------------------------------------------------------------
mask_reform_only = (df_outputs["aca_baseline"] == 0) & (df_outputs["aca_reform"] > 0)
df_reform_only   = df_outputs[mask_reform_only]

# ------------------------------------------------------------------
# 2.  Weighted average of the net change (household perspective)
# ------------------------------------------------------------------
avg_net_change_reform_only_hh = (
    (df_reform_only["net_change"] * df_reform_only["weight"]).sum()
    / df_reform_only["weight"].sum()
)

print(f"Average weighted PTC change among households that newly receive a PTC "
      f"under the reform: ${avg_net_change_reform_only_hh:,.2f}")


Average weighted PTC change among households that newly receive a PTC under the reform: $5,972.71


In [16]:
import numpy as np
from policyengine_us import Simulation

# -------------------------------
# 1. Pull household-level results
# -------------------------------
# ACA PTC (baseline and reform)
ptc_base   = baseline.calculate("aca_ptc", map_to="household", period=2026)
ptc_reform = reformed.calculate("aca_ptc",   map_to="household", period=2026)

# Household weights (same for both sims)
hh_wt      = baseline.calculate("household_weight", map_to="household", period=2026)

# -------------------------------
# 2. Weighted sum of the change
# -------------------------------
weighted_total_change = ptc_reform - ptc_base

# Optional: average change per household
weighted_total_change.sum()/1e9

40.889928786781944

In [17]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

# ------------------------------------------------------------------
# Brand hex codes (one-to-one with style.colors)
# ------------------------------------------------------------------
COLOR_BLUE               = "#2C6496"   # style.colors.BLUE  / BLUE_PRIMARY
COLOR_BLUE_LIGHT         = "#D8E6F3"   # style.colors.BLUE_LIGHT / BLUE_95
COLOR_LIGHT_GRAY         = "#F2F2F2"   # style.colors.LIGHT_GRAY
COLOR_MEDIUM_LIGHT_GRAY  = "#BDBDBD"   # style.colors.MEDIUM_LIGHT_GRAY
COLOR_DARK_GRAY          = "#616161"   # style.colors.DARK_GRAY

# ––– choose colours for positive vs. negative average bars –––
POS_COLOR = COLOR_BLUE
NEG_COLOR = COLOR_DARK_GRAY

# ------------------------------------------------------------------
# 1.  Pull baseline / reform net income + weights
# ------------------------------------------------------------------
net_base   = baseline.calculate(
    "household_net_income_including_health_benefits", map_to="household", period=2026
)
net_reform = reformed.calculate(
    "household_net_income_including_health_benefits", map_to="household", period=2026
)
weights    = baseline.calculate(
    "household_weight", map_to="household", period=2026
)

df = pd.DataFrame({
    "net_base": net_base,
    "delta":    net_reform - net_base,
    "weight":   weights,
})

# ------------------------------------------------------------------
# 2.  Weighted decile edges (baseline ranking)
# ------------------------------------------------------------------
def wquantile(values, qs, w):
    srt = np.argsort(values)
    values, w = values[srt], w[srt]
    cum_w = np.cumsum(w) / np.sum(w)
    return np.interp(qs, cum_w, values)

edges = wquantile(df["net_base"].values,
                  np.linspace(0, 1, 11), df["weight"].values)

df["decile"] = pd.cut(df["net_base"],
                      bins=edges,
                      labels=np.arange(1, 11),
                      include_lowest=True)

# ------------------------------------------------------------------
# 3.  Weighted average Δnet-income by decile
# ------------------------------------------------------------------
decile_avg = (
    df.groupby("decile")
      .apply(lambda g: np.average(g["delta"], weights=g["weight"]))
      .reset_index(name="avg_change")
)

# ------------------------------------------------------------------
# 4.  Use brand colours: blue if gain, dark-gray if loss
# ------------------------------------------------------------------
bar_colors = [
    POS_COLOR if v >= 0 else NEG_COLOR
    for v in decile_avg["avg_change"]
]

# ------------------------------------------------------------------
# 5.  Plot
# ------------------------------------------------------------------
fig = go.Figure(
    data=[
        go.Bar(
            x=decile_avg["decile"].astype(int),
            y=decile_avg["avg_change"],
            marker_color=bar_colors,
            text=decile_avg["avg_change"].apply(lambda v: f"${v:,.0f}"),
            textposition="inside",
        )
    ],
    layout=dict(
        title="Impact of Extending IRA PTC Expansion by Income Decile – 2026",
        xaxis_title="Income Decile",
        yaxis_title="Average change in household net income ($)",
        showlegend=False,
    )
)
fig.add_hline(y=0, line_width=1, line_color="black")
fig.show()
fig.update_xaxes(dtick=1)        # show 1-10 instead of only the evens


In [18]:
# Let's analyze the households affected by the ACA reform
import pandas as pd
import numpy as np

# First, let's look at households that gain PTC under reform but had none in baseline
gained_ptc = df_outputs[(df_outputs['aca_baseline'] == 0) & (df_outputs['aca_reform'] > 0)]

print(f"Number of households gaining PTC under reform: {len(gained_ptc)}")
print(f"Weighted count: {gained_ptc['weight'].sum():,.0f}")
print(f"\nAverage reform PTC for these households: ${gained_ptc['aca_reform'].mean():,.2f}")
print(f"Weighted average reform PTC: ${(gained_ptc['aca_reform'] * gained_ptc['weight']).sum() / gained_ptc['weight'].sum():,.2f}")

Number of households gaining PTC under reform: 1100
Weighted count: 3,503,693

Average reform PTC for these households: $5,707.33
Weighted average reform PTC: $5,972.71


In [20]:
# Let's continue without matplotlib
# Add income deciles to the gained_ptc dataframe
gained_ptc_with_income = gained_ptc.copy()

# Calculate weighted income percentiles for context
income_percentiles = np.percentile(df_outputs['Employment_Income'], [25, 50, 75, 90, 95])
print("Income percentiles across all households:")
for i, pct in enumerate([25, 50, 75, 90, 95]):
    print(f"  {pct}th percentile: ${income_percentiles[i]:,.0f}")

print("\n" + "="*60)
print("Income distribution of households GAINING PTC under reform:")
print("="*60)
print(gained_ptc_with_income['Employment_Income'].describe())

print("\n" + "="*60)
print("Top 10 households by PTC gain (sorted by reform PTC amount):")
print("="*60)
top_gainers = gained_ptc_with_income.nlargest(10, 'aca_reform')[['household_id', 'State', 'Employment_Income', 'aca_reform', 'Married', 'Num_Dependents', 'weight']]
display(top_gainers)

Income percentiles across all households:
  25th percentile: $6,159
  50th percentile: $68,947
  75th percentile: $172,785
  90th percentile: $504,951
  95th percentile: $2,181,010

Income distribution of households GAINING PTC under reform:
count    1.100000e+03
mean     5.555715e+05
std      4.699500e+06
min      0.000000e+00
25%      7.411113e+04
50%      1.099418e+05
75%      1.631940e+05
max      1.033826e+08
Name: Employment_Income, dtype: float64

Top 10 households by PTC gain (sorted by reform PTC amount):


Unnamed: 0,household_id,State,Employment_Income,aca_reform,Married,Num_Dependents,weight
21655,95958,CT,586290.419922,29020.246094,1.0,0.0,0.000351
20926,92239,VT,169552.753906,28546.548828,1.0,2.0,2193.554443
16135,69304,WY,172368.441406,27582.34375,1.0,3.0,1964.266357
7046,25635,MO,37921.058594,27411.421875,1.0,0.0,1342.842773
27701,115102,MO,41373.524902,27333.1875,1.0,0.0,0.002778
14065,59697,OK,68947.382812,26790.708984,1.0,0.0,3511.932373
7058,25669,MO,0.0,26257.669922,1.0,0.0,2463.5
34720,149164,OK,76835.164062,26118.769531,1.0,0.0,0.004654
19738,84350,CA,88482.460938,25993.980469,1.0,0.0,334.342865
40316,173580,CA,102149.837891,25538.492188,1.0,0.0,0.002264


In [21]:
# Let's continue without matplotlib
# Add income deciles to the gained_ptc dataframe
gained_ptc_with_income = gained_ptc.copy()

# Calculate weighted income percentiles for context
income_percentiles = np.percentile(df_outputs['Employment_Income'], [25, 50, 75, 90, 95])
print("Income percentiles across all households:")
for i, pct in enumerate([25, 50, 75, 90, 95]):
    print(f"  {pct}th percentile: ${income_percentiles[i]:,.0f}")

print("\n" + "="*60)
print("Income distribution of households GAINING PTC under reform:")
print("="*60)
print(gained_ptc_with_income['Employment_Income'].describe())

print("\n" + "="*60)
print("Top 10 households by PTC gain (sorted by reform PTC amount):")
print("="*60)
top_gainers = gained_ptc_with_income.nlargest(10, 'aca_reform')[['household_id', 'State', 'Employment_Income', 'aca_reform', 'Married', 'Num_Dependents', 'weight']]
top_gainers

Income percentiles across all households:
  25th percentile: $6,159
  50th percentile: $68,947
  75th percentile: $172,785
  90th percentile: $504,951
  95th percentile: $2,181,010

Income distribution of households GAINING PTC under reform:
count    1.100000e+03
mean     5.555715e+05
std      4.699500e+06
min      0.000000e+00
25%      7.411113e+04
50%      1.099418e+05
75%      1.631940e+05
max      1.033826e+08
Name: Employment_Income, dtype: float64

Top 10 households by PTC gain (sorted by reform PTC amount):


Unnamed: 0,household_id,State,Employment_Income,aca_reform,Married,Num_Dependents,weight
21655,95958,CT,586290.419922,29020.246094,1.0,0.0,0.000351
20926,92239,VT,169552.753906,28546.548828,1.0,2.0,2193.554443
16135,69304,WY,172368.441406,27582.34375,1.0,3.0,1964.266357
7046,25635,MO,37921.058594,27411.421875,1.0,0.0,1342.842773
27701,115102,MO,41373.524902,27333.1875,1.0,0.0,0.002778
14065,59697,OK,68947.382812,26790.708984,1.0,0.0,3511.932373
7058,25669,MO,0.0,26257.669922,1.0,0.0,2463.5
34720,149164,OK,76835.164062,26118.769531,1.0,0.0,0.004654
19738,84350,CA,88482.460938,25993.980469,1.0,0.0,334.342865
40316,173580,CA,102149.837891,25538.492188,1.0,0.0,0.002264


In [22]:
# Now let's look at households losing PTC or seeing reduced PTC
lost_or_reduced = df_outputs[(df_outputs['aca_baseline'] > 0) & (df_outputs['net_change'] < 0)]

print("Households LOSING or SEEING REDUCED PTC:")
print(f"Number of households: {len(lost_or_reduced)}")
print(f"Weighted count: {lost_or_reduced['weight'].sum():,.0f}")
print(f"\nAverage baseline PTC: ${lost_or_reduced['aca_baseline'].mean():,.2f}")
print(f"Average reform PTC: ${lost_or_reduced['aca_reform'].mean():,.2f}")
print(f"Average loss: ${lost_or_reduced['net_change'].mean():,.2f}")

# Income distribution
print("\nIncome distribution of households losing PTC benefits:")
print(lost_or_reduced['Employment_Income'].describe())

# Top losers
print("\nTop 10 households by PTC loss:")
top_losers = lost_or_reduced.nsmallest(10, 'net_change')[['household_id', 'State', 'Employment_Income', 'aca_baseline', 'aca_reform', 'net_change', 'weight']]
top_losers

Households LOSING or SEEING REDUCED PTC:
Number of households: 0
Weighted count: 0

Average baseline PTC: $nan
Average reform PTC: $nan
Average loss: $nan

Income distribution of households losing PTC benefits:
count    0.0
mean     NaN
std      NaN
min      NaN
25%      NaN
50%      NaN
75%      NaN
max      NaN
Name: Employment_Income, dtype: float64

Top 10 households by PTC loss:


Unnamed: 0,household_id,State,Employment_Income,aca_baseline,aca_reform,net_change,weight


In [23]:
# Interesting - no households lose PTC! Let's look at those who keep their PTC but see changes
kept_ptc = df_outputs[(df_outputs['aca_baseline'] > 0) & (df_outputs['aca_reform'] > 0)]

print("Households with PTC in BOTH baseline and reform:")
print(f"Number of households: {len(kept_ptc)}")
print(f"Weighted count: {kept_ptc['weight'].sum():,.0f}")
print(f"\nAverage baseline PTC: ${kept_ptc['aca_baseline'].mean():,.2f}")
print(f"Average reform PTC: ${kept_ptc['aca_reform'].mean():,.2f}")
print(f"Average change: ${kept_ptc['net_change'].mean():,.2f}")

# Show distribution of changes
print("\nDistribution of PTC changes for households with PTC in both scenarios:")
print(kept_ptc['net_change'].describe())

# Households with biggest increases among those who already had PTC
print("\nTop 10 PTC increases among households who already had PTC:")
top_increases = kept_ptc.nlargest(10, 'net_change')[['household_id', 'State', 'Employment_Income', 'aca_baseline', 'aca_reform', 'net_change', 'weight']]
top_increases

Households with PTC in BOTH baseline and reform:
Number of households: 3406
Weighted count: 11,828,817

Average baseline PTC: $7,582.03
Average reform PTC: $9,510.45
Average change: $1,928.42

Distribution of PTC changes for households with PTC in both scenarios:
count     3406.000000
mean      1928.417004
std       1464.484689
min        433.568359
25%       1265.441895
50%       1612.739014
75%       2209.899292
max      24195.677979
Name: net_change, dtype: float64

Top 10 PTC increases among households who already had PTC:


Unnamed: 0,household_id,State,Employment_Income,aca_baseline,aca_reform,net_change,weight
25895,109280,IL,135262.814827,2166.071045,26361.749023,24195.677979,0.005656
21501,95388,CT,229315.603271,2461.53418,25936.906494,23475.372314,0.000415
20625,88926,HI,119508.789062,3365.45874,25774.730469,22409.271729,729.324707
6106,22572,WI,91929.835449,5390.63623,26178.041016,20787.404785,6035.339844
15170,63406,TX,56307.025391,3461.937988,23926.217285,20464.279297,36817.671875
32728,138331,KY,143819.542969,2214.557861,20885.525879,18670.968018,0.04663
16147,69708,WY,40219.304688,7641.085938,23462.459961,15821.374023,1604.936035
36802,159175,WY,322659.273438,6483.185059,22133.864258,15650.679199,1445.953613
20237,85992,CA,264716.572266,2762.978271,17937.049805,15174.071533,5193.151367
39405,170489,CA,200459.942383,3582.74585,17706.862793,14124.116943,0.002236


In [24]:
# Let's calculate approximate FPL levels for households to understand where they fall
# 2026 FPL estimates (rough approximations based on current trends)
fpl_2026 = {
    1: 15570,   # Single person
    2: 21130,   # Couple
    3: 26650,   # Family of 3
    4: 32200,   # Family of 4
    5: 37750,   # Family of 5
    6: 43300,   # Family of 6
    7: 48850,   # Family of 7
    8: 54400,   # Family of 8
}

# Calculate household size and FPL ratio
df_outputs['household_size'] = 2 + df_outputs['Num_Dependents']  # Assuming married couples or singles with deps
df_outputs['household_size'] = df_outputs.apply(
    lambda row: (1 + row['Married'] + row['Num_Dependents']) if not pd.isna(row['Married']) else 1,
    axis=1
)

# Map FPL based on household size
df_outputs['fpl_threshold'] = df_outputs['household_size'].map(lambda x: fpl_2026.get(min(int(x), 8), 54400))
df_outputs['fpl_ratio'] = (df_outputs['Employment_Income'] / df_outputs['fpl_threshold']) * 100

# Now let's analyze the cliff effect around 400% FPL
print("="*70)
print("ANALYSIS OF THE 400% FPL CLIFF EFFECT")
print("="*70)

# Households just below and above 400% FPL
near_cliff = df_outputs[(df_outputs['fpl_ratio'] >= 350) & (df_outputs['fpl_ratio'] <= 450)]
print(f"\nHouseholds between 350-450% FPL: {len(near_cliff)}")
print(f"Weighted count: {near_cliff['weight'].sum():,.0f}")

# Split by those above and below 400% FPL
below_400 = near_cliff[near_cliff['fpl_ratio'] <= 400]
above_400 = near_cliff[near_cliff['fpl_ratio'] > 400]

print(f"\nBelow 400% FPL (350-400%): {len(below_400)} households")
print(f"  Average baseline PTC: ${below_400['aca_baseline'].mean():,.2f}")
print(f"  Average reform PTC: ${below_400['aca_reform'].mean():,.2f}")
print(f"  Average change: ${below_400['net_change'].mean():,.2f}")

print(f"\nAbove 400% FPL (400-450%): {len(above_400)} households")
print(f"  Average baseline PTC: ${above_400['aca_baseline'].mean():,.2f}")
print(f"  Average reform PTC: ${above_400['aca_reform'].mean():,.2f}")
print(f"  Average change: ${above_400['net_change'].mean():,.2f}")

# Show some examples
print("\n" + "="*70)
print("EXAMPLE HOUSEHOLDS AT THE CLIFF (395-405% FPL):")
print("="*70)
cliff_examples = df_outputs[(df_outputs['fpl_ratio'] >= 395) & (df_outputs['fpl_ratio'] <= 405)]
cliff_examples_display = cliff_examples[['household_id', 'State', 'Employment_Income', 'fpl_ratio', 
                                         'aca_baseline', 'aca_reform', 'net_change', 'weight']].head(10)
cliff_examples_display

ANALYSIS OF THE 400% FPL CLIFF EFFECT

Households between 350-450% FPL: 3110
Weighted count: 12,367,979

Below 400% FPL (350-400%): 1565 households
  Average baseline PTC: $1,110.95
  Average reform PTC: $1,612.20
  Average change: $501.25

Above 400% FPL (400-450%): 1545 households
  Average baseline PTC: $686.16
  Average reform PTC: $1,385.85
  Average change: $699.69

EXAMPLE HOUSEHOLDS AT THE CLIFF (395-405% FPL):


Unnamed: 0,household_id,State,Employment_Income,fpl_ratio,aca_baseline,aca_reform,net_change,weight
12,91,ME,85344.212891,403.900676,0.0,0.0,0.0,4.466252
61,495,ME,105351.593262,395.315547,0.0,0.0,0.0,17.447075
281,2971,VT,85035.097656,402.437755,0.0,0.0,0.0,332.319
476,3945,MA,106868.429688,401.007241,0.0,0.0,0.0,57237.523438
661,4662,MA,62052.640625,398.53976,0.0,0.0,0.0,7.987278
806,5462,RI,106868.433594,401.007256,0.0,0.0,0.0,9386.629883
907,6168,CT,62052.640625,398.53976,0.0,0.0,0.0,24166.259766
1081,6785,NY,84344.46875,399.169279,0.0,0.0,0.0,4.944809
1104,6872,NY,85035.101562,402.437774,0.0,0.0,0.0,10.947828
1106,6874,NY,62052.640625,398.53976,0.0,0.0,0.0,4.106772


In [25]:
# Let's look more specifically at the income deciles to see where the cliff effect shows up
# Calculate income deciles
df_outputs['income_decile'] = pd.qcut(df_outputs['Employment_Income'], 10, labels=range(1, 11), duplicates='drop')

# Group by decile and show the effect
decile_analysis = df_outputs.groupby('income_decile').agg({
    'Employment_Income': ['min', 'max', 'mean'],
    'fpl_ratio': 'mean',
    'aca_baseline': 'mean',
    'aca_reform': 'mean',
    'net_change': 'mean',
    'weight': 'sum'
}).round(2)

print("="*70)
print("PTC EFFECTS BY INCOME DECILE")
print("="*70)
print("\nIncome ranges and average PTC changes by decile:")
decile_analysis

ValueError: Bin labels must be one fewer than the number of bin edges

In [25]:
# Fix the decile calculation
# Calculate income deciles without explicit labels to avoid the error
df_outputs['income_decile'] = pd.qcut(df_outputs['Employment_Income'], 10, duplicates='drop')

# Get unique deciles and sort them
deciles = sorted(df_outputs['income_decile'].unique())

# Create a mapping to simpler labels
decile_map = {d: i+1 for i, d in enumerate(deciles)}
df_outputs['decile_num'] = df_outputs['income_decile'].map(decile_map)

# Group by decile and show the effect
decile_analysis = df_outputs.groupby('decile_num').agg({
    'Employment_Income': ['min', 'max', 'mean'],
    'fpl_ratio': 'mean',
    'aca_baseline': 'mean',
    'aca_reform': 'mean',
    'net_change': 'mean',
    'weight': 'sum'
}).round(2)

print("="*70)
print("PTC EFFECTS BY INCOME DECILE")
print("="*70)
print("\nIncome ranges and average PTC changes by decile:")
decile_analysis

PTC EFFECTS BY INCOME DECILE

Income ranges and average PTC changes by decile:


Unnamed: 0_level_0,Employment_Income,Employment_Income,Employment_Income,fpl_ratio,aca_baseline,aca_reform,net_change,weight
Unnamed: 0_level_1,min,max,mean,mean,mean,mean,mean,sum
decile_num,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,0.0,22082.7,3081.23,16.5,481.01,618.01,136.99,56060974.39
2,22096.56,40219.3,31811.3,167.52,1638.77,1989.05,350.27,18267145.22
3,40220.45,59168.34,50102.24,254.53,1575.92,2062.44,486.52,13547056.92
4,59179.83,80438.61,69904.57,343.95,1437.25,2029.91,592.66,12499143.93
5,80438.61,106574.53,93253.97,434.8,1303.7,2060.3,756.6,11057517.34
6,106597.34,142491.25,122908.07,554.42,816.63,1518.36,701.72,13853349.38
7,142491.25,205693.01,169652.75,727.25,489.02,1177.16,688.14,10689611.79
8,205693.02,3305428.97,382158.51,1548.38,210.36,482.04,271.67,11524346.53


In [None]:
# Let's look at where in the data the 9th decile falls (the one from the chart)
# Since we only have 8 groups due to duplicates being dropped, let's recalculate properly

# First, let's understand the actual income distribution better
print("="*70)
print("UNDERSTANDING THE 9TH DECILE CONCENTRATION")
print("="*70)

# Get percentiles to understand income distribution
percentiles = [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99]
income_pcts = np.percentile(df_outputs['Employment_Income'], percentiles)

print("\nIncome distribution percentiles:")
for p, val in zip(percentiles, income_pcts):
    print(f"  {p}th percentile: ${val:,.0f}")

# The 9th decile should be roughly between 80th and 90th percentile
ninth_decile = df_outputs[(df_outputs['Employment_Income'] >= income_pcts[7]) & 
                          (df_outputs['Employment_Income'] < income_pcts[8])]

print(f"\n9th Decile (80-90th percentile):")
print(f"  Income range: ${income_pcts[7]:,.0f} - ${income_pcts[8]:,.0f}")
print(f"  Number of households: {len(ninth_decile)}")
print(f"  Weighted count: {ninth_decile['weight'].sum():,.0f}")
print(f"  Average FPL ratio: {ninth_decile['fpl_ratio'].mean():.1f}%")
print(f"  Average baseline PTC: ${ninth_decile['aca_baseline'].mean():,.2f}")
print(f"  Average reform PTC: ${ninth_decile['aca_reform'].mean():,.2f}")
print(f"  Average change: ${ninth_decile['net_change'].mean():,.2f}")

# Now let's see WHO specifically gains in the 9th decile
ninth_decile_gainers = ninth_decile[ninth_decile['net_change'] > 100]  # Gains more than $100

print(f"\nHouseholds in 9th decile with gains > $100:")
print(f"  Count: {len(ninth_decile_gainers)}")
print(f"  Average income: ${ninth_decile_gainers['Employment_Income'].mean():,.0f}")
print(f"  Average FPL ratio: {ninth_decile_gainers['fpl_ratio'].mean():.1f}%")
print(f"  Average gain: ${ninth_decile_gainers['net_change'].mean():,.2f}")

# Look at specific examples
print("\nExample households in 9th decile with large gains:")
examples = ninth_decile_gainers.nlargest(5, 'net_change')[
    ['household_id', 'State', 'Employment_Income', 'fpl_ratio', 
     'aca_baseline', 'aca_reform', 'net_change', 'Married', 'Num_Dependents']
]
examples

In [26]:
# Check if the dataframe exists and has the needed columns
print("Checking available variables:")
print(f"df_outputs exists: {'df_outputs' in locals()}")
if 'df_outputs' in locals():
    print(f"df_outputs shape: {df_outputs.shape}")
    print(f"Columns: {list(df_outputs.columns)}")

Checking available variables:
df_outputs exists: True
df_outputs shape: (21607, 14)
Columns: ['household_id', 'State', 'Married', 'Num_Dependents', 'Employment_Income', 'aca_baseline', 'aca_reform', 'weight', 'net_change', 'household_size', 'fpl_threshold', 'fpl_ratio', 'income_decile', 'decile_num']


In [None]:
# Understanding the 9th decile concentration
import numpy as np

# Get percentiles to understand income distribution
percentiles = [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99]
income_pcts = np.percentile(df_outputs['Employment_Income'], percentiles)

print("="*70)
print("UNDERSTANDING THE 9TH DECILE CONCENTRATION")
print("="*70)
print("\nIncome distribution percentiles:")
for p, val in zip(percentiles, income_pcts):
    print(f"  {p}th percentile: ${val:,.0f}")