In [34]:
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform
import pandas as pd

In [35]:
# Set up microsimulation for US households
sim = Microsimulation()

In [36]:
# Extract household data needed for rebate calculations
# Filter for California households only
state_code = sim.calculate("state_code_str", map_to="household")
ca_mask = state_code == "CA"

household_count_people = sim.calculate("household_count_people")[ca_mask]
household_adjusted_gross_income = sim.calculate("adjusted_gross_income", map_to="household")[ca_mask]
# Use snap_fpg for Federal Poverty Guidelines (this is the poverty threshold)
poverty_threshold = sim.calculate("snap_fpg", map_to="household")[ca_mask]
household_weight = sim.calculate("household_weight")[ca_mask]

print(f"Number of California households in sample: {len(household_count_people):,}")
print(f"Total weighted California households: {household_weight.sum():,.0f}")

Number of California households in sample: 1,854
Total weighted California households: 3,990,023,830,548


In [37]:
# Debug: Check the values to understand why rebate is so low
print("=== DEBUGGING VALUES ===")
print(f"Sample poverty threshold values: {poverty_threshold[:10]}")
print(f"Sample AGI values: {household_adjusted_gross_income[:10]}")
print(f"Sample income to poverty ratios: {(household_adjusted_gross_income / poverty_threshold)[:10]}")
print(f"Mean poverty threshold: ${poverty_threshold.mean():,.2f}")
print(f"Median poverty threshold: ${np.median(poverty_threshold):,.2f}")
print(f"Mean AGI: ${household_adjusted_gross_income.mean():,.2f}")
print(f"Median AGI: ${np.median(household_adjusted_gross_income):,.2f}")

# Check if there are households that should get rebates
households_under_150_pct = (household_adjusted_gross_income / poverty_threshold <= 1.5).sum()
households_under_175_pct = (household_adjusted_gross_income / poverty_threshold <= 1.75).sum()
print(f"Households under 150% of poverty: {households_under_150_pct:,}")
print(f"Households under 175% of poverty: {households_under_175_pct:,}")

# Check for any zero or negative values that might cause issues
print(f"Zero poverty thresholds: {(poverty_threshold == 0).sum()}")
print(f"Zero AGI values: {(household_adjusted_gross_income == 0).sum()}")
print(f"Negative AGI values: {(household_adjusted_gross_income < 0).sum()}")

=== DEBUGGING VALUES ===
Sample poverty threshold values:           value         weight
0  14700.000000    1743.168091
1  19900.000000       0.122180
2  19900.000000     756.053284
3  25099.998047       0.220665
4  30300.000000  358255.062500
5  14700.000000    2479.817871
6  35500.003906     269.325409
7  40700.000000    4106.521484
8  39799.998047     291.358612
9  30300.000000     386.776031
Sample AGI values:            value         weight
0   42985.429688    1743.168091
1    5803.675293       0.122180
2  437602.093750     756.053284
3    6000.000000       0.220665
4    9000.679688  358255.062500
5   37174.089844    2479.817871
6   34400.679688     269.325409
7  118001.359375    4106.521484
8   56800.000000     291.358612
9   32200.000000     386.776031
Sample income to poverty ratios:        value         weight
0   2.924179    1743.168091
1   0.291642       0.122180
2  21.990055     756.053284
3   0.239044       0.220665
4   0.297052  358255.062500
5   2.528850    2479.817871
6

In [None]:
# Try different income measures to see which is appropriate
# Let's test which income variables exist
income_vars_to_test = [
    "household_net_income",
    "spm_unit_net_income", 
    "total_income",
    "employment_income"
]

print("=== TESTING AVAILABLE INCOME VARIABLES ===")
available_income_vars = {}

for var in income_vars_to_test:
    try:
        if "spm_unit" in var:
            income_vals = sim.calculate(var, map_to="household")[ca_mask]
        else:
            income_vals = sim.calculate(var, map_to="household")[ca_mask]
        available_income_vars[var] = income_vals
        print(f"{var}: Available, mean = ${income_vals.mean():,.2f}")
    except:
        print(f"{var}: Not available")

# Compare with AGI
print(f"\nAGI mean: ${household_adjusted_gross_income.mean():,.2f}")

# If we found alternatives, compare poverty ratios
if available_income_vars:
    print(f"\nHouseholds under 150% poverty by income measure:")
    agi_under_150 = (household_adjusted_gross_income / poverty_threshold <= 1.5).sum()
    print(f"Using AGI: {agi_under_150:,}")
    
    for var_name, income_vals in available_income_vars.items():
        under_150 = (income_vals / poverty_threshold <= 1.5).sum()
        print(f"Using {var_name}: {under_150:,}")

=== TESTING AVAILABLE INCOME VARIABLES ===


In [None]:
# Analyze income distribution to understand why so few get rebates
print("=== INCOME TO POVERTY RATIO DISTRIBUTION ===")

income_to_poverty_ratio = household_adjusted_gross_income / poverty_threshold

# Show distribution of income-to-poverty ratios
percentiles = [10, 25, 50, 75, 90, 95, 99]
for p in percentiles:
    value = np.percentile(income_to_poverty_ratio, p)
    print(f"{p}th percentile income-to-poverty ratio: {value:.2f}")

print(f"\nPercent of households by income level:")
print(f"Below 50% of poverty: {(income_to_poverty_ratio <= 0.5).mean()*100:.1f}%")
print(f"Below 100% of poverty: {(income_to_poverty_ratio <= 1.0).mean()*100:.1f}%")
print(f"Below 150% of poverty: {(income_to_poverty_ratio <= 1.5).mean()*100:.1f}%")
print(f"Below 175% of poverty: {(income_to_poverty_ratio <= 1.75).mean()*100:.1f}%")
print(f"Below 200% of poverty: {(income_to_poverty_ratio <= 2.0).mean()*100:.1f}%")

# Check if the issue is with zero/negative incomes
zero_income_mask = household_adjusted_gross_income <= 0
print(f"\nHouseholds with zero or negative AGI: {zero_income_mask.sum():,} ({zero_income_mask.mean()*100:.1f}%)")

# If there are many zero-income households, they might be skewing results
if zero_income_mask.sum() > 0:
    print("Income distribution excluding zero/negative income households:")
    positive_income_mask = household_adjusted_gross_income > 0
    positive_income_ratio = household_adjusted_gross_income[positive_income_mask] / poverty_threshold[positive_income_mask]
    
    for p in [10, 25, 50, 75, 90]:
        value = np.percentile(positive_income_ratio, p)
        print(f"{p}th percentile (positive income only): {value:.2f}")
    
    print(f"Below 150% of poverty (positive income only): {(positive_income_ratio <= 1.5).mean()*100:.1f}%")
    print(f"Below 175% of poverty (positive income only): {(positive_income_ratio <= 1.75).mean()*100:.1f}%")

=== INCOME TO POVERTY RATIO DISTRIBUTION ===
10th percentile income-to-poverty ratio: 0.32
25th percentile income-to-poverty ratio: 1.59
50th percentile income-to-poverty ratio: 3.50
75th percentile income-to-poverty ratio: 6.68
90th percentile income-to-poverty ratio: 12.48
95th percentile income-to-poverty ratio: 19.53
99th percentile income-to-poverty ratio: 46.38

Percent of households by income level:
Below 50% of poverty: 18.9%
Below 100% of poverty: 24.6%
Below 150% of poverty: 31.3%
Below 175% of poverty: 33.5%
Below 200% of poverty: 37.3%

Households with zero or negative AGI: 1,426,160.1974516786 (9.0%)
Income distribution excluding zero/negative income households:
10th percentile (positive income only): 0.85
25th percentile (positive income only): 1.87
50th percentile (positive income only): 3.74
75th percentile (positive income only): 6.94
90th percentile (positive income only): 12.86
Below 150% of poverty (positive income only): 24.4%
Below 175% of poverty (positive income

In [None]:
# Calculate rebate with phase-out logic
# Rebate = poverty_threshold for households below 150% of poverty threshold
# Phases out linearly between 150% and 175% of poverty threshold
# No rebate for households above 175% of poverty threshold

import numpy as np

# Define phase-out thresholds
phase_out_start = 1.5  # 150% of poverty threshold
phase_out_end = 1.75   # 175% of poverty threshold

# Calculate income as percentage of poverty threshold
income_to_poverty_ratio = household_adjusted_gross_income / poverty_threshold

# Calculate tax-free consumption (X component) for each household
# Full poverty threshold amount for households below 150% of poverty
full_rebate_mask = income_to_poverty_ratio <= phase_out_start
tax_free_consumption = np.where(full_rebate_mask, poverty_threshold, 0)

# Partial rebate for households between 150% and 175% of poverty
partial_rebate_mask = (income_to_poverty_ratio > phase_out_start) & (income_to_poverty_ratio <= phase_out_end)
phase_out_factor = (phase_out_end - income_to_poverty_ratio) / (phase_out_end - phase_out_start)
partial_tax_free = poverty_threshold * phase_out_factor
tax_free_consumption = np.where(partial_rebate_mask, partial_tax_free, tax_free_consumption)

# Count households by rebate status
full_rebate_households = (full_rebate_mask & (tax_free_consumption > 0)).sum()
partial_rebate_households = (partial_rebate_mask & (tax_free_consumption > 0)).sum()
total_rebate_households = full_rebate_households + partial_rebate_households

print(f"Households receiving full rebate: {full_rebate_households:,}")
print(f"Households receiving partial rebate: {partial_rebate_households:,}")
print(f"Total households receiving any rebate: {total_rebate_households:,}")

Households receiving full rebate: 441
Households receiving partial rebate: 61
Total households receiving any rebate: 502


In [None]:
# Calculate X: Sum of all tax-free consumption across all households (weighted)
X = (tax_free_consumption * household_weight).sum()

print(f"X (Total tax-free consumption after phase-out): ${X/1e9:.1f} billion")
print(f"X (Total tax-free consumption after phase-out): ${X:,.0f}")

# For comparison, show the total if there were no phase-out
total_without_phaseout = (poverty_threshold * household_weight).sum()
print(f"Total poverty thresholds without phase-out: ${total_without_phaseout/1e9:.1f} billion")
print(f"Reduction due to phase-out: ${(total_without_phaseout - X)/1e9:.1f} billion ({100*(total_without_phaseout - X)/total_without_phaseout:.1f}%)")

X (Total tax-free consumption after phase-out): $115.2 billion
X (Total tax-free consumption after phase-out): $115,238,302,411
Total poverty thresholds without phase-out: $79525582.4 billion
Reduction due to phase-out: $79525467.2 billion (100.0%)


In [None]:
# Summary statistics matching expected outputs
total_weighted_households = household_weight.sum()
households_with_rebate = (tax_free_consumption > 0).sum()
weighted_households_with_rebate = ((tax_free_consumption > 0) * household_weight).sum()

# Calculate average rebate among households receiving any rebate
rebate_mask = tax_free_consumption > 0
if rebate_mask.sum() > 0:
    total_rebate_amount = (tax_free_consumption[rebate_mask] * household_weight[rebate_mask]).sum()
    average_rebate = total_rebate_amount / household_weight[rebate_mask].sum()
else:
    average_rebate = 0

print("=== SUMMARY STATISTICS ===")
print(f"Total weighted households: {total_weighted_households/1e6:.2f} million")
print(f"Households receiving any rebate: {weighted_households_with_rebate/1e6:.1f} million")
print(f"Households receiving partial rebate: {partial_rebate_households:,}")
print(f"Average rebate amount: ${average_rebate:,.2f}")
print(f"X (tax-free consumption): ${X/1e9:.1f} billion")

print(f"\n=== FORMULA CONTEXT ===")
print(f"X = ${X:,.0f}")
print(f"This is the sum of all tax-free consumption after accounting for phase-out")
print(f"Used in VAT rate formula: t = Rs/(Cp - X - T + Ro)")
print(f"Where Xt/(1+t) is the budgetary cost of the rebate")

=== SUMMARY STATISTICS ===
Total weighted households: 3990023.83 million
Households receiving any rebate: 5.3 million
Households receiving partial rebate: 61
Average rebate amount: $0.09
X (tax-free consumption): $115.2 billion

=== FORMULA CONTEXT ===
X = $115,238,302,411
This is the sum of all tax-free consumption after accounting for phase-out
Used in VAT rate formula: t = Rs/(Cp - X - T + Ro)
Where Xt/(1+t) is the budgetary cost of the rebate
