# California AEI Rebate Base Analysis

This notebook calculates the rebate base for a proposed California consumption rebate program.

## Program Design
- **Rebate Base**: Equal to the Federal Poverty Guideline (FPG) for the tax unit or household
- **Phase-out**: Linear phase-out between 150% and 175% of FPG
- **Actual Rebate**: `rebate_base * VAT_rate` (VAT rate to be determined)

## Key Formula
The rebate base (X) is used in the VAT rate formula:
```
t = Rs/(Cp - X - T + Ro)
```
Where:
- t = VAT rate
- Rs = Revenue target
- Cp = Private consumption
- X = Total rebate base (calculated here)
- T = Existing taxes
- Ro = Other revenue

## Setup and Imports

In [1]:
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform
from policyengine_us.model_api import *
import numpy as np
import pandas as pd

# Constants
SIMULATION_YEAR = 2026  # Year for simulation calculations

print("Libraries imported successfully")

Libraries imported successfully


## Define the AEI Rebate Base Variables

In [None]:
def create_aei_reform():
    """
    Creates a PolicyEngine reform that adds the California AEI rebate base variables.
    
    Returns:
        Reform: PolicyEngine reform with ca_aei_rebate_base variables
    """
    
    class household_fpg(Variable):
        value_type = float
        entity = Household
        label = "Household's federal poverty guideline"
        definition_period = YEAR
        unit = USD

        def formula(household, period, parameters):
            n = household("household_size", period)
            state_group = household("state_group_str", period)
            p_fpg = parameters(period).gov.hhs.fpg
            p1 = p_fpg.first_person[state_group]
            pn = p_fpg.additional_person[state_group]
            return p1 + pn * (n - 1)
    
    class ca_aei_rebate_base_tax_unit(Variable):
        value_type = float
        entity = TaxUnit
        label = "California AEI rebate base (tax unit version)"
        unit = USD
        definition_period = YEAR
        defined_for = StateCode.CA

        def formula(tax_unit, period, parameters):
            # Use tax unit's own AGI
            income = tax_unit("adjusted_gross_income", period)
            fpg = tax_unit("tax_unit_fpg", period)
            income_to_fpg_ratio = where(fpg > 0, income / fpg, np.inf)

            # Phase-out parameters
            PHASEOUT_START = 1.5   # 150% FPG
            PHASEOUT_END = 1.75    # 175% FPG
            phaseout_width = PHASEOUT_END - PHASEOUT_START

            # Phase-out calculation
            excess = max_(income_to_fpg_ratio - PHASEOUT_START, 0)
            phaseout_percentage = min_(1, excess / phaseout_width)

            return fpg * (1 - phaseout_percentage)

    class ca_aei_rebate_base_household(Variable):
        value_type = float
        entity = Household
        label = "California AEI rebate base (household version)"
        unit = USD
        definition_period = YEAR

        def formula(household, period, parameters):
            # Sum AGI from all tax units in the household
            income = household.sum(household.members.tax_unit("adjusted_gross_income", period))
            fpg = household("household_fpg", period)
            income_to_fpg_ratio = where(fpg > 0, income / fpg, np.inf)

            # Phase-out parameters
            PHASEOUT_START = 1.5   # 150% FPG
            PHASEOUT_END = 1.75    # 175% FPG
            phaseout_width = PHASEOUT_END - PHASEOUT_START

            # Phase-out calculation
            excess = max_(income_to_fpg_ratio - PHASEOUT_START, 0)
            phaseout_percentage = min_(1, excess / phaseout_width)

            return fpg * (1 - phaseout_percentage)

    class AEIReform(Reform):
        def apply(self):
            self.update_variable(household_fpg)
            self.update_variable(ca_aei_rebate_base_tax_unit)
            self.update_variable(ca_aei_rebate_base_household)
    
    return AEIReform

print("Reform defined successfully")

## Calculate Rebate Base Statistics

In [None]:
def calculate_rebate_base_statistics(sim, unit_type="household", year=2026):
    """
    Calculate AEI rebate base program statistics for California households or tax units.
    
    Args:
        sim: Microsimulation object with reform applied
        unit_type: Either "household" or "tax_unit"
        year: Year to calculate for
    
    Returns:
        Dictionary with rebate base statistics
    """
    print(f"Calculating {unit_type} statistics for {year}...")
    
    if unit_type == "household":
        # Calculate rebate base for all households
        rebate_base = sim.calculate("ca_aei_rebate_base_household", year)
        
        # Filter for California households only
        household_state = sim.calculate("state_code", year, map_to="household")
        ca_mask = household_state == "CA"
        
        # Apply CA filter
        ca_rebate_base = rebate_base[ca_mask]
        total_ca_units = ca_mask.sum()
        
    else:  # tax_unit
        # Calculate rebate base for all tax units (defined_for gives 0 for non-CA)
        rebate_base = sim.calculate("ca_aei_rebate_base_tax_unit", year)
        
        # Use calculate_dataframe to get household-level data
        household_df = sim.calculate_dataframe(
            ["household_id", "state_code"],
            year,
            map_to="household"
        )
        
        # Get tax unit data
        tax_unit_df = sim.calculate_dataframe(
            ["tax_unit_id", "tax_unit_household_id"],
            year
        )
        
        # Merge to get state for each tax unit
        tax_unit_with_state = tax_unit_df.merge(
            household_df[["household_id", "state_code"]],
            left_on="tax_unit_household_id",
            right_on="household_id",
            how="left"
        )
        
        # Create a boolean MicroSeries for CA tax units
        ca_tax_unit_mask = tax_unit_with_state["state_code"] == "CA"
        total_ca_units = ca_tax_unit_mask.sum()
        
        # For tax units, we use all rebates (defined_for already filters to CA)
        ca_rebate_base = rebate_base
    
    # Calculate statistics (MicroSeries already contain weights)
    units_with_rebate = (ca_rebate_base > 0).sum()
    total_rebate_base = ca_rebate_base.sum()
    average_rebate_base = ca_rebate_base[ca_rebate_base > 0].mean() if units_with_rebate > 0 else 0
    
    return {
        "unit_type": unit_type,
        "total_ca_units": total_ca_units,
        "units_with_rebate": units_with_rebate,
        "rebate_percentage": units_with_rebate / total_ca_units,
        "average_rebate_base": average_rebate_base,
        "total_rebate_base": total_rebate_base,
    }

# Create simulation once
print("Loading data and creating simulation...")
reform = create_aei_reform()
sim = Microsimulation(
    dataset="hf://policyengine/policyengine-us-data/pooled_3_year_cps_2023.h5",
    reform=reform
)

# Calculate both household and tax unit results using the same simulation
household_results = calculate_rebate_base_statistics(sim, "household", SIMULATION_YEAR)
tax_unit_results = calculate_rebate_base_statistics(sim, "tax_unit", SIMULATION_YEAR)

## Display Results

In [None]:
# Stack results into a single DataFrame for easy comparison
results_df = pd.DataFrame([household_results, tax_unit_results])

# Format numeric columns for display
display_df = results_df.copy()
display_df['Total CA Units'] = display_df['total_ca_units'].apply(lambda x: f"{x/1e6:.1f}M")
display_df['Units with Rebate'] = display_df.apply(lambda r: f"{r['units_with_rebate']/1e6:.1f}M ({r['rebate_percentage']:.0%})", axis=1)
display_df['Average Rebate Base'] = display_df['average_rebate_base'].apply(lambda x: f"${x:,.0f}")
display_df['Total Rebate Base'] = display_df['total_rebate_base'].apply(lambda x: f"${x/1e9:.1f}B")

# Select columns for display
display_cols = ['unit_type', 'Total CA Units', 'Units with Rebate', 'Average Rebate Base', 'Total Rebate Base']
display_df = display_df[display_cols]
display_df.columns = ['Unit Type', 'Total CA Units', 'Units with Rebate', 'Average Rebate Base', 'Total Rebate Base']

print(f"\n=== REBATE BASE CALCULATIONS ({SIMULATION_YEAR}) ===")
display_df

In [5]:
import pandas as pd

# Official statistics constants
CENSUS_HOUSEHOLDS_2023 = 13_699_816  # From ACS 2023 1-Year Estimates
IRS_RETURNS_2022 = 18_487_690  # From IRS SOI Historic Table 2

# Create validation table
validation_data = {
    'Metric': ['Households', 'Tax Returns/Units'],
    'Official Source': ['U.S. Census ACS 2023', 'IRS SOI 2022'],
    'Official Count': [f"{CENSUS_HOUSEHOLDS_2023:,}", f"{IRS_RETURNS_2022:,}"],
    'Our Simulation (2026)': [
        f"{household_results['total_ca_units']:,.0f}",
        f"{tax_unit_results['total_ca_units']:,.0f}"
    ],
    'Difference': [
        f"{(household_results['total_ca_units']/CENSUS_HOUSEHOLDS_2023 - 1)*100:+.1f}%",
        f"{(tax_unit_results['total_ca_units']/IRS_RETURNS_2022 - 1)*100:+.1f}%"
    ]
}

validation_df = pd.DataFrame(validation_data)
print("=== VALIDATION AGAINST OFFICIAL STATISTICS ===")
validation_df

=== VALIDATION AGAINST OFFICIAL STATISTICS ===


Unnamed: 0,Metric,Official Source,Official Count,Our Simulation (2026),Difference
0,Households,U.S. Census ACS 2023,13699816,14605984,+6.6%
1,Tax Returns/Units,IRS SOI 2022,18487690,21913086,+18.5%


## Conclusions