In [1]:
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform
baseline = Microsimulation(dataset="hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5")


  from .autonotebook import tqdm as notebook_tqdm


In [13]:
baseline_aca_enrollment = baseline.calculate("has_marketplace_health_coverage", map_to="person", period=2025).sum()

baseline_aca_enrollment/1e6

20.285684746812855

In [10]:
baseline_has_coverage = baseline.calculate("has_marketplace_health_coverage", map_to="person", period=2025)
baseline_is_eligible = baseline.calculate("is_aca_ptc_eligible", map_to="person", period=2025)

baseline_aca = ((baseline_has_coverage & baseline_is_eligible)*baseline_has_coverage.weights).sum()

baseline_aca/1e6

11.734775520414182

In [37]:
import pandas as pd

period = 2025           # keep this in one place so it’s easy to change
sim    = baseline       # just so the code is a bit shorter

# 1. Pull the two flags (and the person-level sampling weight) into one DataFrame
has_cov   = sim.calculate("has_marketplace_health_coverage",
                           map_to="person", period=period)
is_elg    = sim.calculate("is_aca_ptc_eligible",
                           map_to="person", period=period)

df = pd.DataFrame({
    "has_cov" : has_cov,
    "is_elg"  : is_elg,
    "weight"  : has_cov.weights          # the Series carries its own CPS weight
})

# 2. Keep only people who HAVE Marketplace coverage but FAIL the eligibility flag
problem = df[(df.has_cov) & (~df.is_elg)]
print(f"{problem.shape[0]:,} observations "
      f"(weighted: {problem.weight.sum():,.0f}) do not overlap.")

# 3. (Optional) Bring in a few explanatory variables so you can see *why*
extra_vars = [
    "employment_income",
    "self_employment_income",
    "has_esi",     # or whatever ESI flag you rely on
    "age",
    "household_id",
    "has_marketplace_health_coverage",
    "tax_unit_dependents",
]

for v in extra_vars:
    problem[v] = sim.calculate(v, map_to="person", period=period).loc[problem.index]

# 4. Quickly eyeball the 10 highest-weight cases
cols_to_show = ["household_id", "weight", "employment_income", "age",
                "has_esi",  "has_marketplace_health_coverage",
              
              
                ]
print(problem.sort_values("weight", ascending=False)[cols_to_show].head(10))


1,384 observations (weighted: 8,550,909) do not overlap.
       household_id         weight  employment_income   age  has_esi  \
45915         81253  343056.125000           0.000000   8.0     True   
45914         81253  343056.125000       14772.141602  34.0     True   
45913         81253  343056.125000      120866.976562  34.0     True   
43445         77870  320219.375000        2856.855713  26.0    False   
27730         47054  244629.296875           0.000000  30.0    False   
10466         17784  198591.671875           0.000000  43.0    False   
953            3649  183900.546875           0.000000  33.0    False   
46743         82375  143590.609375           0.000000  31.0    False   
46746         82375  143590.609375           0.000000   8.0    False   
46742         82375  143590.609375           0.000000  35.0    False   

       has_marketplace_health_coverage  
45915                             True  
45914                             True  
45913                      

In [30]:
baseline_has_esi = baseline.calculate("has_esi", map_to="person", period=2025)
baseline_has_esi.sum() / 1e6

137.17248543564725

In [33]:
double_coverage = ((baseline_has_coverage & baseline_has_esi)*baseline_has_coverage.weights).sum()
double_coverage/1e6

1.689951476693666

In [44]:
import pandas as pd

period = 2025
sim    = baseline           # shorthand

# ------------------------------------------------------------
# 1. Line up the three core flags plus the CPS weight
# ------------------------------------------------------------
has_cov = sim.calculate("has_marketplace_health_coverage",
                        map_to="person", period=period)
is_elg  = sim.calculate("is_aca_ptc_eligible",
                        map_to="person", period=period)
has_esi = sim.calculate("has_esi",
                        map_to="person", period=period)

df = pd.DataFrame({
    "has_cov" : has_cov,
    "is_elg"  : is_elg,
    "has_esi" : has_esi,
    "weight"  : has_cov.weights,
})

# ------------------------------------------------------------
# 2. Keep Marketplace ✔️  AND  ACA-eligible ❌  AND  ESI ❌
# ------------------------------------------------------------
problem_no_esi = df[(df.has_cov) & (~df.is_elg) & (~df.has_esi)]

print(
    f"{problem_no_esi.shape[0]:,} observations remain "
    f"(weighted pop: {problem_no_esi.weight.sum()/1e6:,.1f} million)"
)

# ------------------------------------------------------------
# 3. Pull in only *your* existing explanatory vars
# ------------------------------------------------------------
extra_vars = [
    "employment_income",
    "self_employment_income",
    "age",
    "household_id",
    "tax_unit_dependents",
]

for v in extra_vars:
    problem_no_esi[v] = (
        sim.calculate(v, map_to="person", period=period)
           .loc[problem_no_esi.index]
    )

# ------------------------------------------------------------
# 4. Quick look at the heaviest-weighted cases
# ------------------------------------------------------------
cols_to_show = [
    "household_id",
    "weight",
    "employment_income",
    "self_employment_income",
    "age",
    "tax_unit_dependents",
]
top50 = (problem_no_esi
         .sort_values("weight", ascending=False)
         [cols_to_show]
         .head(50))

print(top50.to_string(index=False))


1,112 observations remain (weighted pop: 6.9 million)

Top-weighted mismatches (Marketplace ✔️ / PTC-eligible ❌ / no ESI):
 household_id        weight  employment_income  self_employment_income  age  tax_unit_dependents
        77870 320219.375000        2856.855713                0.000000 26.0                    0
        47054 244629.296875           0.000000                0.000000 30.0                    0
        17784 198591.671875           0.000000                0.000000 43.0                    0
         3649 183900.546875           0.000000                0.000000 33.0                    0
        82375 143590.609375           0.000000             6681.662598 31.0                    4
        82375 143590.609375           0.000000                0.000000 35.0                    4
        82375 143590.609375           0.000000                0.000000 12.0                    4
        82375 143590.609375           0.000000                0.000000  8.0                    4
    