# Minnesota CTC Reform Analysis (2025)

This notebook analyzes the impact of modifying Minnesota's Child Tax Credit (CWFC).

## Baseline (Current Law)
- CTC amount: $1,750 per qualifying child
- Main phase-out rate: 12%
- Phase-out rate for CTC-ineligible with older children: 9%

## Reform
- CTC amount: $2,000 per qualifying child
- Main phase-out rate: 20%
- Phase-out rate for CTC-ineligible with older children: 9% (unchanged)

## Metrics
We calculate:
- Budgetary impact (net cost)
- Winners (percentage of population affected)
- Overall poverty impact
- Child poverty impact

In [1]:
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform
import pandas as pd
import numpy as np

MN_DATASET = "hf://policyengine/test/MN.h5"

## Helper Functions

In [2]:
def calculate_poverty(sim, period=2025, child_only=False):
    age = np.array(sim.calculate("age", period=period))
    is_in_poverty = np.array(sim.calculate("person_in_poverty", period=period))
    person_weight = np.array(sim.calculate("person_weight", period=period))
    
    if child_only:
        mask = age < 18
    else:
        mask = np.ones_like(age, dtype=bool)
    
    weighted_in_poverty = (is_in_poverty[mask] * person_weight[mask]).sum()
    weighted_total = person_weight[mask].sum()
    poverty_rate = weighted_in_poverty / weighted_total if weighted_total > 0 else 0
    
    return {
        "poverty_rate": poverty_rate,
        "people_in_poverty": weighted_in_poverty,
        "total_people": weighted_total
    }

def calculate_winners(baseline_sim, reform_sim, period=2025):
    baseline_income = np.array(baseline_sim.calculate("household_net_income", period=period, map_to="household"))
    reform_income = np.array(reform_sim.calculate("household_net_income", period=period, map_to="household"))
    household_weight = np.array(baseline_sim.calculate("household_weight", period=period))
    income_change = reform_income - baseline_income
    
    household_id_person = np.array(baseline_sim.calculate("household_id", period=period, map_to="person"))
    household_id_household = np.array(baseline_sim.calculate("household_id", period=period, map_to="household"))
    person_weight = np.array(baseline_sim.calculate("person_weight", period=period))
    
    income_change_dict = dict(zip(household_id_household, income_change))
    person_income_change = np.array([income_change_dict.get(hh_id, 0) for hh_id in household_id_person])
    
    winners_mask = person_income_change > 1
    people_winning = person_weight[winners_mask].sum()
    total_people = person_weight.sum()
    
    losers_mask = person_income_change < -1
    people_losing = person_weight[losers_mask].sum()
    
    pct_winners = (people_winning / total_people * 100) if total_people > 0 else 0
    pct_losers = (people_losing / total_people * 100) if total_people > 0 else 0
    
    winning_hh_mask = income_change > 1
    avg_gain = np.average(income_change[winning_hh_mask], weights=household_weight[winning_hh_mask]) if winning_hh_mask.sum() > 0 else 0
    
    losing_hh_mask = income_change < -1
    avg_loss = np.average(income_change[losing_hh_mask], weights=household_weight[losing_hh_mask]) if losing_hh_mask.sum() > 0 else 0
    
    return {
        "people_winning": people_winning,
        "people_losing": people_losing,
        "total_people": total_people,
        "pct_winners": pct_winners,
        "pct_losers": pct_losers,
        "avg_gain": avg_gain,
        "avg_loss": avg_loss
    }

def format_currency(value):
    return f"${value/1e6:.2f}M"

def format_percent(value):
    return f"{value*100:.2f}%"

## Define Baseline and Reform

In [3]:
def create_mn_ctc_reform():
    reform = Reform.from_dict(
        {
            "gov.states.mn.tax.income.credits.cwfc.ctc.amount": {
                "2025-01-01.2100-12-31": 2000
            },
            "gov.states.mn.tax.income.credits.cwfc.phase_out.rate.main": {
                "2025-01-01.2100-12-31": 0.20
            },
        },
        country_id="us",
    )
    return reform

print("Reform function defined!")
print("\nReform details:")
print("  - CTC amount: $1,750 -> $2,000 (+$250 per child)")
print("  - Main phase-out rate: 12% -> 20% (+8 percentage points)")

Reform function defined!

Reform details:
  - CTC amount: $1,750 -> $2,000 (+$250 per child)
  - Main phase-out rate: 12% -> 20% (+8 percentage points)


## Load Simulations

In [4]:
print("Loading baseline (current law)...")
baseline = Microsimulation(dataset=MN_DATASET)
print("Baseline loaded")

print("\nLoading reform (CTC $2,000 + 20% phase-out rate)...")
reform = create_mn_ctc_reform()
reform_sim = Microsimulation(dataset=MN_DATASET, reform=reform)
print("Reform loaded")

print("\n" + "="*60)
print("All simulations ready!")
print("="*60)

Loading baseline (current law)...


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


MN.h5:   0%|          | 0.00/50.5M [00:00<?, ?B/s]

Baseline loaded

Loading reform (CTC $2,000 + 20% phase-out rate)...
Reform loaded

All simulations ready!


## Calculate Impacts

In [5]:
baseline_overall_pov = calculate_poverty(baseline, child_only=False)
baseline_child_pov = calculate_poverty(baseline, child_only=True)

reform_overall_pov = calculate_poverty(reform_sim, child_only=False)
reform_child_pov = calculate_poverty(reform_sim, child_only=True)

baseline_hh_income = baseline.calculate("household_net_income", period=2025, map_to="household").sum()
reform_hh_income = reform_sim.calculate("household_net_income", period=2025, map_to="household").sum()
ctc_cost = reform_hh_income - baseline_hh_income

winners = calculate_winners(baseline, reform_sim)

print("All impacts calculated")

All impacts calculated


## Results Summary

In [6]:
print("\n" + "="*80)
print("MINNESOTA CTC REFORM IMPACTS (2025)")
print("Baseline: CTC $1,750 + 12% phase-out | Reform: CTC $2,000 + 20% phase-out")
print("="*80)

print(f"\n{'BUDGETARY IMPACT':=^80}")
print(f"MN CTC Reform net cost:        {format_currency(ctc_cost)}")

print(f"\n{'WINNERS AND LOSERS (POPULATION)':=^80}")
print(f"People gaining income:         {winners['people_winning']:,.0f} ({winners['pct_winners']:.2f}% of population)")
print(f"Average gain per household:    ${winners['avg_gain']:,.2f}")
print(f"People losing income:          {winners['people_losing']:,.0f} ({winners['pct_losers']:.2f}% of population)")
print(f"Average loss per household:    ${winners['avg_loss']:,.2f}")

print(f"\n{'POVERTY IMPACT - OVERALL':=^80}")
print(f"Baseline poverty rate:         {format_percent(baseline_overall_pov['poverty_rate'])}")
print(f"Reform poverty rate:           {format_percent(reform_overall_pov['poverty_rate'])}")
overall_pov_reduction = baseline_overall_pov['poverty_rate'] - reform_overall_pov['poverty_rate']
overall_pov_pct_reduction = (overall_pov_reduction / baseline_overall_pov['poverty_rate'] * 100) if baseline_overall_pov['poverty_rate'] > 0 else 0
print(f"Absolute reduction:            {format_percent(overall_pov_reduction)}")
print(f"Relative reduction:            {overall_pov_pct_reduction:.2f}%")
people_lifted = baseline_overall_pov['people_in_poverty'] - reform_overall_pov['people_in_poverty']
print(f"People lifted from poverty:    {people_lifted:,.0f}")

print(f"\n{'POVERTY IMPACT - CHILDREN':=^80}")
print(f"Baseline child poverty rate:   {format_percent(baseline_child_pov['poverty_rate'])}")
print(f"Reform child poverty rate:     {format_percent(reform_child_pov['poverty_rate'])}")
child_pov_reduction = baseline_child_pov['poverty_rate'] - reform_child_pov['poverty_rate']
child_pov_pct_reduction = (child_pov_reduction / baseline_child_pov['poverty_rate'] * 100) if baseline_child_pov['poverty_rate'] > 0 else 0
print(f"Absolute reduction:            {format_percent(child_pov_reduction)}")
print(f"Relative reduction:            {child_pov_pct_reduction:.2f}%")
children_lifted = baseline_child_pov['people_in_poverty'] - reform_child_pov['people_in_poverty']
print(f"Children lifted from poverty:  {children_lifted:,.0f}")
print("="*80)


MINNESOTA CTC REFORM IMPACTS (2025)
Baseline: CTC $1,750 + 12% phase-out | Reform: CTC $2,000 + 20% phase-out

MN CTC Reform net cost:        $-0.21M

People gaining income:         739,692 (12.63% of population)
Average gain per household:    $439.90
People losing income:          710,895 (12.14% of population)
Average loss per household:    $-545.76

Baseline poverty rate:         13.79%
Reform poverty rate:           13.81%
Absolute reduction:            -0.02%
Relative reduction:            -0.15%
People lifted from poverty:    -1,234

Baseline child poverty rate:   9.71%
Reform child poverty rate:     9.75%
Absolute reduction:            -0.05%
Relative reduction:            -0.47%
Children lifted from poverty:  -630


In [7]:
baseline_hh_income_arr = np.array(baseline.calculate("household_net_income", period=2025, map_to="household"))
reform_hh_income_arr = np.array(reform_sim.calculate("household_net_income", period=2025, map_to="household"))
household_weight = np.array(baseline.calculate("household_weight", period=2025))

hh_income_change = reform_hh_income_arr - baseline_hh_income_arr
hh_benefitting_mask = hh_income_change > 1
hh_losing_mask = hh_income_change < -1

households_benefitting = household_weight[hh_benefitting_mask].sum()
households_losing = household_weight[hh_losing_mask].sum()
total_households = household_weight.sum()
pct_households_benefitting = (households_benefitting / total_households) * 100
pct_households_losing = (households_losing / total_households) * 100

print("="*70)
print("HOUSEHOLDS IMPACTED BY MN CTC REFORM")
print("="*70)
print(f"Households benefitting:        {households_benefitting:,.0f} ({pct_households_benefitting:.2f}%)")
print(f"Households losing:             {households_losing:,.0f} ({pct_households_losing:.2f}%)")
print(f"Total households:              {total_households:,.0f}")
print("="*70)

HOUSEHOLDS IMPACTED BY MN CTC REFORM
Households benefitting:        168,551 (9.17%)
Households losing:             136,240 (7.41%)
Total households:              1,838,548


## Export Results

In [8]:
overall_pov_reduction = baseline_overall_pov['poverty_rate'] - reform_overall_pov['poverty_rate']
overall_pov_pct_reduction = (overall_pov_reduction / baseline_overall_pov['poverty_rate'] * 100) if baseline_overall_pov['poverty_rate'] > 0 else 0
child_pov_reduction = baseline_child_pov['poverty_rate'] - reform_child_pov['poverty_rate']
child_pov_pct_reduction = (child_pov_reduction / baseline_child_pov['poverty_rate'] * 100) if baseline_child_pov['poverty_rate'] > 0 else 0

results = [
    {
        "Scenario": "MN CTC Reform",
        "Description": "CTC $2,000 + 20% phase-out rate",
        "Net Cost": format_currency(ctc_cost),
        "% Population Winning": f"{winners['pct_winners']:.2f}%",
        "% Population Losing": f"{winners['pct_losers']:.2f}%",
        "Avg Gain (Winners)": f"${winners['avg_gain']:,.2f}",
        "Avg Loss (Losers)": f"${winners['avg_loss']:,.2f}",
        "Overall Poverty Change (%)": f"{overall_pov_pct_reduction:.2f}%",
        "Child Poverty Change (%)": f"{child_pov_pct_reduction:.2f}%",
        "People Lifted from Poverty": f"{people_lifted:,.0f}",
        "Children Lifted from Poverty": f"{children_lifted:,.0f}"
    }
]

df_results = pd.DataFrame(results)

print("\n" + "="*120)
print("MN CTC REFORM SUMMARY")
print("="*120)
print(df_results.T.to_string(header=False))
print("="*120)

df_results.to_csv("mn_ctc_reform_results.csv", index=False)
print("\nExported to: mn_ctc_reform_results.csv")


MN CTC REFORM SUMMARY
Scenario                                        MN CTC Reform
Description                   CTC $2,000 + 20% phase-out rate
Net Cost                                              $-0.21M
% Population Winning                                   12.63%
% Population Losing                                    12.14%
Avg Gain (Winners)                                    $439.90
Avg Loss (Losers)                                    $-545.76
Overall Poverty Change (%)                             -0.15%
Child Poverty Change (%)                               -0.47%
People Lifted from Poverty                             -1,234
Children Lifted from Poverty                             -630

Exported to: mn_ctc_reform_results.csv


## Average Income by Decile - Baseline vs Reform

In [9]:
baseline_income = np.array(baseline.calculate("household_net_income", period=2025, map_to="household"))
reform_income = np.array(reform_sim.calculate("household_net_income", period=2025, map_to="household"))
weights = np.array(baseline.calculate("household_weight", period=2025))
agi = np.array(baseline.calculate("adjusted_gross_income", period=2025, map_to="household"))

df_decile = pd.DataFrame({
    'agi': agi,
    'baseline_income': baseline_income,
    'reform_income': reform_income,
    'weight': weights
})

df_decile = df_decile.sort_values('agi').reset_index(drop=True)
df_decile['cumweight'] = df_decile['weight'].cumsum()
total_weight = df_decile['weight'].sum()

df_decile['decile'] = pd.cut(
    df_decile['cumweight'] / total_weight,
    bins=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
    labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th']
)

decile_summary = df_decile.groupby('decile', observed=True).apply(
    lambda x: pd.Series({
        'Baseline Avg Income': np.average(x['baseline_income'], weights=x['weight']),
        'Reform Avg Income': np.average(x['reform_income'], weights=x['weight']),
        'Avg Change': np.average(x['reform_income'] - x['baseline_income'], weights=x['weight']),
        'Households': x['weight'].sum()
    })
).reset_index()

print("\n" + "="*90)
print("AVERAGE HOUSEHOLD NET INCOME BY DECILE - BASELINE VS REFORM")
print("="*90)
print(f"{'Decile':>8} {'Baseline':>18} {'Reform':>18} {'Change':>14} {'Households':>14}")
print("-"*90)
for _, row in decile_summary.iterrows():
    print(f"{row['decile']:>8} ${row['Baseline Avg Income']:>16,.0f} ${row['Reform Avg Income']:>16,.0f} ${row['Avg Change']:>12,.2f} {row['Households']:>14,.0f}")
print("="*90)


AVERAGE HOUSEHOLD NET INCOME BY DECILE - BASELINE VS REFORM
  Decile           Baseline             Reform         Change     Households
------------------------------------------------------------------------------------------
     1st $          35,015 $          35,052 $       36.53        183,728
     2nd $          49,720 $          49,868 $      147.98        183,971
     3rd $          60,175 $          60,293 $      118.73        183,845
     4th $          77,365 $          77,248 $     -117.70        183,241
     5th $          97,346 $          97,280 $      -66.37        184,437
     6th $          93,105 $          93,049 $      -55.70        183,826
     7th $         146,248 $         146,245 $       -3.11        183,634
     8th $         147,574 $         147,506 $      -67.35        184,078
     9th $         265,363 $         265,372 $        8.53        183,739
    10th $         519,973 $         519,970 $       -2.85        184,049


## Winners, Losers, and Non-Affected by Decile

In [10]:
# Get person-level data with household mapping
person_weight = np.array(baseline.calculate("person_weight", period=2025))
household_id_person = np.array(baseline.calculate("household_id", period=2025, map_to="person"))
household_id_household = np.array(baseline.calculate("household_id", period=2025, map_to="household"))

# Get household-level income change
baseline_hh_inc = np.array(baseline.calculate("household_net_income", period=2025, map_to="household"))
reform_hh_inc = np.array(reform_sim.calculate("household_net_income", period=2025, map_to="household"))
hh_inc_change = reform_hh_inc - baseline_hh_inc

# Get household AGI for decile assignment
hh_agi = np.array(baseline.calculate("adjusted_gross_income", period=2025, map_to="household"))
hh_weight = np.array(baseline.calculate("household_weight", period=2025))

# Create household-level DataFrame with decile assignment
df_hh = pd.DataFrame({
    'household_id': household_id_household,
    'agi': hh_agi,
    'income_change': hh_inc_change,
    'weight': hh_weight
})

# Sort by AGI and assign deciles
df_hh = df_hh.sort_values('agi').reset_index(drop=True)
df_hh['cumweight'] = df_hh['weight'].cumsum()
total_hh_weight = df_hh['weight'].sum()
df_hh['decile'] = pd.cut(
    df_hh['cumweight'] / total_hh_weight,
    bins=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
    labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th']
)

# Create mapping from household_id to decile and income_change
hh_to_decile = dict(zip(df_hh['household_id'], df_hh['decile']))
hh_to_change = dict(zip(df_hh['household_id'], df_hh['income_change']))

# Create person-level DataFrame
df_person = pd.DataFrame({
    'household_id': household_id_person,
    'person_weight': person_weight
})

# Map decile and income change to persons
df_person['decile'] = df_person['household_id'].map(hh_to_decile)
df_person['income_change'] = df_person['household_id'].map(hh_to_change)

# Classify as winner, loser, or non-affected
df_person['is_winner'] = df_person['income_change'] > 1
df_person['is_loser'] = df_person['income_change'] < -1
df_person['is_nonaffected'] = (~df_person['is_winner']) & (~df_person['is_loser'])

# Calculate percentages by decile
decile_impact = df_person.groupby('decile', observed=True).apply(
    lambda x: pd.Series({
        'Total Residents': x['person_weight'].sum(),
        'Winners': x.loc[x['is_winner'], 'person_weight'].sum(),
        'Losers': x.loc[x['is_loser'], 'person_weight'].sum(),
        'Non-Affected': x.loc[x['is_nonaffected'], 'person_weight'].sum()
    })
).reset_index()

# Calculate percentages
decile_impact['% Winners'] = (decile_impact['Winners'] / decile_impact['Total Residents'] * 100)
decile_impact['% Losers'] = (decile_impact['Losers'] / decile_impact['Total Residents'] * 100)
decile_impact['% Non-Affected'] = (decile_impact['Non-Affected'] / decile_impact['Total Residents'] * 100)

print("\n" + "="*90)
print("WINNERS, LOSERS, AND NON-AFFECTED BY INCOME DECILE (% OF RESIDENTS)")
print("="*90)
print(f"{'Decile':>8} {'% Winners':>12} {'% Losers':>12} {'% Non-Affected':>16} {'Total Residents':>18}")
print("-"*90)
for _, row in decile_impact.iterrows():
    print(f"{row['decile']:>8} {row['% Winners']:>11.1f}% {row['% Losers']:>11.1f}% {row['% Non-Affected']:>15.1f}% {row['Total Residents']:>17,.0f}")
print("="*90)


WINNERS, LOSERS, AND NON-AFFECTED BY INCOME DECILE (% OF RESIDENTS)
  Decile    % Winners     % Losers   % Non-Affected    Total Residents
------------------------------------------------------------------------------------------
    10th         5.8%        28.4%            65.8%           715,130
     1st        17.5%         1.4%            81.1%           337,955
     2nd        49.5%         0.0%            50.5%           432,021
     3rd        41.5%         7.2%            51.2%           513,281
     4th         8.5%        32.4%            59.1%           596,435
     5th         8.2%        13.9%            77.9%           610,155
     6th         4.7%         7.6%            87.8%           590,161
     7th         3.2%         6.1%            90.7%           614,311
     8th         4.9%        12.7%            82.4%           770,523
     9th         4.0%         1.2%            94.9%           674,770


## Tax Units per Household Analysis

In [11]:
# Analyze tax units per household
tax_unit_id = np.array(baseline.calculate("tax_unit_id", period=2025, map_to="person"))
household_id_p = np.array(baseline.calculate("household_id", period=2025, map_to="person"))

# Create DataFrame to count unique tax units per household
df_tu = pd.DataFrame({
    'household_id': household_id_p,
    'tax_unit_id': tax_unit_id
})

# Count unique tax units per household
tu_per_hh = df_tu.groupby('household_id')['tax_unit_id'].nunique().reset_index()
tu_per_hh.columns = ['household_id', 'num_tax_units']

# Get household weights
hh_ids = np.array(baseline.calculate("household_id", period=2025, map_to="household"))
hh_weights = np.array(baseline.calculate("household_weight", period=2025))
df_hh_weights = pd.DataFrame({'household_id': hh_ids, 'weight': hh_weights})

# Merge
tu_per_hh = tu_per_hh.merge(df_hh_weights, on='household_id')

# Calculate weighted distribution
tu_distribution = tu_per_hh.groupby('num_tax_units')['weight'].sum().reset_index()
tu_distribution['pct'] = tu_distribution['weight'] / tu_distribution['weight'].sum() * 100

print("="*70)
print("TAX UNITS PER HOUSEHOLD - DISTRIBUTION")
print("="*70)
print(f"{'# Tax Units':>12} {'Households':>18} {'% of Total':>14}")
print("-"*70)
for _, row in tu_distribution.iterrows():
    print(f"{int(row['num_tax_units']):>12} {row['weight']:>17,.0f} {row['pct']:>13.2f}%")
print("="*70)
print(f"\nTotal households with 2+ tax units: {tu_per_hh[tu_per_hh['num_tax_units'] >= 2]['weight'].sum():,.0f}")
print(f"Percentage with 2+ tax units: {tu_per_hh[tu_per_hh['num_tax_units'] >= 2]['weight'].sum() / tu_per_hh['weight'].sum() * 100:.2f}%")

TAX UNITS PER HOUSEHOLD - DISTRIBUTION
 # Tax Units         Households     % of Total
----------------------------------------------------------------------
           1         1,130,173         61.47%
           2           458,195         24.92%
           3           148,805          8.09%
           4            57,527          3.13%
           5            18,721          1.02%
           6             3,533          0.19%
           7                 3          0.00%
           8            21,591          1.17%
           9                 1          0.00%

Total households with 2+ tax units: 708,375
Percentage with 2+ tax units: 38.53%


In [12]:
# Check if affected high-income households have multiple tax units
# Merge tax unit count with household decile and income change data
df_hh_analysis = df_hh.merge(tu_per_hh[['household_id', 'num_tax_units']], on='household_id')

# Filter to top 3 deciles (8th, 9th, 10th)
top_deciles = df_hh_analysis[df_hh_analysis['decile'].isin(['8th', '9th', '10th'])]

# Among top decile households, compare affected vs non-affected
affected_top = top_deciles[abs(top_deciles['income_change']) > 1]
unaffected_top = top_deciles[abs(top_deciles['income_change']) <= 1]

print("="*70)
print("TOP 3 DECILES: TAX UNITS IN AFFECTED VS UNAFFECTED HOUSEHOLDS")
print("="*70)

if len(affected_top) > 0:
    avg_tu_affected = np.average(affected_top['num_tax_units'], weights=affected_top['weight'])
    print(f"Affected households (income change > $1):")
    print(f"  Count: {affected_top['weight'].sum():,.0f}")
    print(f"  Avg tax units per household: {avg_tu_affected:.2f}")
    
    # Distribution of tax units among affected
    affected_tu_dist = affected_top.groupby('num_tax_units')['weight'].sum()
    print(f"  Distribution:")
    for tu, wt in affected_tu_dist.items():
        print(f"    {tu} tax unit(s): {wt:,.0f} ({wt/affected_top['weight'].sum()*100:.1f}%)")
else:
    print("No affected households in top deciles")

print()

if len(unaffected_top) > 0:
    avg_tu_unaffected = np.average(unaffected_top['num_tax_units'], weights=unaffected_top['weight'])
    print(f"Unaffected households (income change <= $1):")
    print(f"  Count: {unaffected_top['weight'].sum():,.0f}")
    print(f"  Avg tax units per household: {avg_tu_unaffected:.2f}")
print("="*70)

TOP 3 DECILES: TAX UNITS IN AFFECTED VS UNAFFECTED HOUSEHOLDS
Affected households (income change > $1):
  Count: 58,919
  Avg tax units per household: 4.51
  Distribution:
    1 tax unit(s): 7,342 (12.5%)
    2 tax unit(s): 15,097 (25.6%)
    3 tax unit(s): 6,346 (10.8%)
    4 tax unit(s): 6,924 (11.8%)
    5 tax unit(s): 809 (1.4%)
    6 tax unit(s): 810 (1.4%)
    7 tax unit(s): 0 (0.0%)
    8 tax unit(s): 21,591 (36.6%)
    9 tax unit(s): 0 (0.0%)

Unaffected households (income change <= $1):
  Count: 492,947
  Avg tax units per household: 1.70


In [None]:
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform
import pandas as pd
import numpy as np

MN_DATASET = "hf://policyengine/test/MN.h5"

print("Loading baseline simulation with test dataset...")
baseline = Microsimulation(dataset=MN_DATASET)
print("Loaded successfully!")

# Basic dataset stats
household_weight = np.array(baseline.calculate("household_weight", period=2025))
person_weight = np.array(baseline.calculate("person_weight", period=2025))
tax_unit_weight = np.array(baseline.calculate("tax_unit_weight", period=2025))

print(f"\n{'='*60}")
print("DATASET STATISTICS - TEST MN.h5")
print(f"{'='*60}")
print(f"Population (weighted):    {person_weight.sum():,.0f}")
print(f"Households (weighted):    {household_weight.sum():,.0f}")
print(f"Tax Units (weighted):     {tax_unit_weight.sum():,.0f}")
print(f"\nTargets:")
print(f"Population target:        5,737,915")
print(f"Household target:         2,344,432")
print(f"Tax Unit target:          2,871,840")