In [1]:
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform
baseline = Microsimulation(dataset="hf://policyengine/policyengine-us-data/cps_2023.h5")


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
baseline_aca_enrollment = baseline.calculate("has_marketplace_health_coverage", map_to="person", period=2025).sum()

baseline_aca_enrollment/1e6

12.428581703125

In [3]:
baseline_has_coverage = baseline.calculate("has_marketplace_health_coverage", map_to="person", period=2025)
baseline_is_eligible = baseline.calculate("is_aca_ptc_eligible", map_to="person", period=2025)

baseline_aca = ((baseline_has_coverage & baseline_is_eligible)*baseline_has_coverage.weights).sum()

baseline_aca/1e6

8.30790088671875

In [4]:
import pandas as pd

period = 2025           # keep this in one place so it’s easy to change
sim    = baseline       # just so the code is a bit shorter

# 1. Pull the two flags (and the person-level sampling weight) into one DataFrame
has_cov   = sim.calculate("has_marketplace_health_coverage",
                           map_to="person", period=period)
is_elg    = sim.calculate("is_aca_ptc_eligible",
                           map_to="person", period=period)

df = pd.DataFrame({
    "has_cov" : has_cov,
    "is_elg"  : is_elg,
    "weight"  : has_cov.weights          # the Series carries its own CPS weight
})

# 2. Keep only people who HAVE Marketplace coverage but FAIL the eligibility flag
problem = df[(df.has_cov) & (~df.is_elg)]
print(f"{problem.shape[0]:,} observations "
      f"(weighted: {problem.weight.sum():,.0f}) do not overlap.")

# 3. (Optional) Bring in a few explanatory variables so you can see *why*
extra_vars = [
    
    "has_esi",     # or whatever ESI flag you rely on
    "age",
    "household_id",
    "aca_ptc",
    "adjusted_gross_income"
  
]

for v in extra_vars:
    problem[v] = sim.calculate(v, map_to="person", period=period).loc[problem.index]

# 4. Quickly eyeball the 10 highest-weight cases
cols_to_show = ["household_id", "weight", "adjusted_gross_income", "age",
                "has_esi",  
              
              
                ]
print(problem.sort_values("weight", ascending=False)[cols_to_show].head(10))


627 observations (weighted: 4,120,681) do not overlap.
       household_id        weight  adjusted_gross_income   age  has_esi
37586         65629  28516.822266           26300.667969  40.0    False
37587         65629  28516.822266           26300.667969  58.0    False
4998          10045  23764.017578           20978.705078  27.0    False
37976         65209  19011.214844               0.000000  30.0    False
4497           9340  19011.214844               0.000000  22.0    False
24938         43163  19011.214844           31465.369141   9.0    False
43445         77870  19011.214844            7706.610352  26.0    False
43677         78173  19011.214844           19480.806641  35.0     True
43678         78173  19011.214844           19480.806641   6.0    False
43679         78173  19011.214844           19480.806641   1.0    False


In [5]:
baseline_has_esi = baseline.calculate("has_esi", map_to="person", period=2025)
baseline_has_esi.sum() / 1e6

171.7710788408203

In [6]:
double_coverage = ((baseline_has_coverage & baseline_has_esi)*baseline_has_coverage.weights).sum()
double_coverage/1e6

0.465774763671875

In [7]:
import pandas as pd

period = 2025
sim    = baseline           # shorthand

# ------------------------------------------------------------
# 1. Line up the three core flags plus the CPS weight
# ------------------------------------------------------------
has_cov = sim.calculate("has_marketplace_health_coverage",
                        map_to="person", period=period)
is_elg  = sim.calculate("is_aca_ptc_eligible",
                        map_to="person", period=period)
has_esi = sim.calculate("has_esi",
                        map_to="person", period=period)

df = pd.DataFrame({
    "has_cov" : has_cov,
    "is_elg"  : is_elg,
    "has_esi" : has_esi,
    "weight"  : has_cov.weights,
})

# ------------------------------------------------------------
# 2. Keep Marketplace ✔️  AND  ACA-eligible ❌  AND  ESI ❌
# ------------------------------------------------------------
problem_no_esi = df[(df.has_cov) & (~df.is_elg) & (~df.has_esi)]

print(
    f"{problem_no_esi.shape[0]:,} observations remain "
    f"(weighted pop: {problem_no_esi.weight.sum()/1e6:,.1f} million)"
)

# ------------------------------------------------------------
# 3. Pull in only *your* existing explanatory vars
# ------------------------------------------------------------
extra_vars = [
    "tax_unit_earned_income",

    "age",
    "household_id",
    "tax_unit_dependents",
]

for v in extra_vars:
    problem_no_esi[v] = (
        sim.calculate(v, map_to="person", period=period)
           .loc[problem_no_esi.index]
    )

# ------------------------------------------------------------
# 4. Quick look at the heaviest-weighted cases
# ------------------------------------------------------------
cols_to_show = [
    "household_id",
    "weight",
    "tax_unit_earned_income",
    
    "age",
    "tax_unit_dependents",
]
top50 = (problem_no_esi
         .sort_values("weight", ascending=False)
         [cols_to_show]
         .head(50))

print(top50.to_string(index=False))


558 observations remain (weighted pop: 3.7 million)
 household_id       weight  tax_unit_earned_income  age  tax_unit_dependents
        65629 28516.822266            28300.000000 40.0                    6
        65629 28516.822266            28300.000000 58.0                    6
        10045 23764.017578            20976.912109 27.0                    0
        43163 19011.214844            31465.369141  9.0                    3
        77870 19011.214844             2726.998779 26.0                    0
        78173 19011.214844            20976.912109  6.0                    2
        78173 19011.214844            20976.912109  1.0                    2
         9340 19011.214844                0.000000 22.0                    0
        43163 19011.214844            31465.369141  4.0                    3
        43163 19011.214844            31465.369141  2.0                    3
        65209 19011.214844                0.000000 30.0                    0
        22644 14258.4111

In [8]:
aca_premiums = baseline.calculate("aca_ptc", map_to="household", period=2025)

aca_premiums.sum() / 1e6


132094.3153464236

In [9]:
medicaid = baseline.calculate("medicaid", map_to="person", period=2025).sum()
medicaid/1e9


medicaid_per_capita = baseline.calculate("medicaid_per_capita_cost", period=2025).sum()

medicaid_per_capita/1e9

ValueError: Variable medicaid does not exist.

In [45]:
has_medicaid = baseline.calculate("medicaid", map_to="person", period=2025) >0
has_medicaid_per_capita = baseline.calculate("medicaid_per_capita_cost", period=2025) >0

(~has_medicaid & has_medicaid_per_capita).sum() 

target = has_medicaid & ~has_medicaid_per_capita

In [51]:
medicaid_cat = baseline.calculate("medicaid_group", map_to="person", period=2025)
medicaid_cat[target]


df = baseline.calculate_dataframe(["medicaid", "medicaid_per_capita_cost", "medicaid_group", "state_code"], map_to="person", period=2025)
df["target"] = (df.medicaid > 0 )& (df.medicaid_per_capita_cost == 0)
df[df.target].state_code.unique()

array(['WI', 'NC'], dtype=object)

In [11]:
# Check how many households have aca_ptc > 0
aca_ptc = baseline.calculate("aca_ptc", map_to="household",
period=2025)

# Count households with aca_ptc > 0
households_with_ptc = (aca_ptc > 0).sum()


# Weighted count
weighted_households_with_ptc = ((aca_ptc > 0) *
aca_ptc.weights).sum()

weighted_households_with_ptc/1e6

122636.457876885