In [1]:
import numpy as np
import pandas as pd
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

reform = Reform.from_dict({
  "gov.contrib.repeal_state_dependent_exemptions.in_effect": {
    "2025-01-01.2100-12-31": True
  }
}, country_id="us")


baseline = Microsimulation(dataset='pooled_3_year_cps_2023')
reformed = Microsimulation(reform=reform, dataset='pooled_3_year_cps_2023')
baseline_income = baseline.calculate("household_net_income", period=2025)
reformed_income = reformed.calculate("household_net_income", period=2025)
difference_income = reformed_income - baseline_income

In [3]:
difference_income.sum()

-6091587854.847457

In [4]:
# Get state codes
state_code = baseline.calculate("state_code", period=2025)

# Get household weights for accurate aggregation
weights = baseline.calculate("household_weight", period=2025)

# Create a DataFrame with the results
results_df = pd.DataFrame({
    'state_code': state_code,
    'income_change': difference_income,
    'weight': weights
})

# Calculate state-level statistics
state_impacts = (
    results_df
    .groupby('state_code')
    .agg({
        'income_change': lambda x: np.average(x, weights=results_df.loc[x.index, 'weight']),
        'weight': 'sum'
    })
    .reset_index()
)

In [5]:
# Calculate total income impact
total_impact_by_state = (
    results_df
    .groupby('state_code')
    .apply(lambda x: np.sum(x['income_change'] * x['weight']))
    .reset_index(name='total_income_impact')
)

# Merge total impact into main results
state_impacts = state_impacts.merge(total_impact_by_state, on='state_code')

# Add households affected
state_impacts['households_affected'] = (
    results_df[results_df['income_change'] != 0]
    .groupby('state_code')['weight']
    .sum()
    .reindex(state_impacts['state_code'])
    .fillna(0)
)

# Calculate percentage of households affected
state_impacts['percent_households_affected'] = (
    state_impacts['households_affected'] / state_impacts['weight'] * 100
)

# Format the results
state_impacts['avg_income_change'] = state_impacts['income_change'].round(2)
state_impacts['total_income_impact'] = (state_impacts['total_income_impact'] / 1_000_000).round(3)
state_impacts['households_affected'] = state_impacts['households_affected'].round(0)
state_impacts['percent_households_affected'] = state_impacts['percent_households_affected'].round(2)
state_impacts['weight'] = state_impacts['weight'].round(0)

# Sort by total impact
state_impacts_sorted = state_impacts.sort_values('total_income_impact')

# Rename columns for clarity
state_impacts_sorted = state_impacts_sorted.rename(columns={
    'weight': 'total_households',
    'avg_income_change': 'avg_income_change_per_household',
    'total_income_impact': 'total_state_income_impact'
})

# Calculate national totals
national_totals = pd.DataFrame({
    'state_code': ['TOTAL'],
    'total_households': [state_impacts_sorted['total_households'].sum()],
    'avg_income_change_per_household': [(state_impacts_sorted['total_state_income_impact'].sum() / 
                                       state_impacts_sorted['total_households'].sum())],
    'total_state_income_impact': [state_impacts_sorted['total_state_income_impact'].sum()]
})

# Combine state results with national totals
final_results = pd.concat([state_impacts_sorted, national_totals])

# Display results
print("\nState-by-State Impact Analysis:")
print(final_results[[
    'state_code',
    'total_households',
    'avg_income_change_per_household',
    'total_state_income_impact'
]].to_string())


State-by-State Impact Analysis:
   state_code  total_households  avg_income_change_per_household  total_state_income_impact
4          CA        14531141.0                      -168.119995                  -2442.917
22         MI         4277382.0                      -113.279999                   -484.533
10         GA         4398419.0                       -94.610001                   -416.124
23         MN         2371857.0                      -172.839996                   -409.952
14         IL         5246896.0                       -61.740002                   -323.962
34         NY         7987400.0                       -28.860001                   -230.489
40         SC         2247520.0                      -101.629997                   -228.414
31         NJ         3619377.0                       -45.480000                   -164.620
20         MD         2346259.0                       -53.090000                   -124.572
35         OH         5050174.0                

In [6]:
final_results.to_csv('state_impacts_detailed.csv', index=False)

In [7]:
# Get required variables
state_codes = baseline.calculate("state_code", period=2025)
num_children = baseline.calculate("tax_unit_children", map_to='household', period=2025)
baseline_tax = baseline.calculate("state_income_tax_before_refundable_credits", map_to='household', period=2024)
reformed_tax = reformed.calculate("state_income_tax_before_refundable_credits", map_to='household', period=2024)

# Create results DataFrame
results = pd.DataFrame({
    'state_code': state_codes,
    'num_children': num_children,
    'zero_tax_both': (np.abs(baseline_tax) < 0.01) & (np.abs(reformed_tax) < 0.01)
})

# Get household weights
weights = baseline.calculate("household_weight", period=2024)

# Add weights to results DataFrame
results['weight'] = weights

# Calculate weighted total children by state
total_children = (
    results
    .groupby('state_code')
    .apply(lambda x: np.sum(x['num_children'] * x['weight']))
    .reset_index(name='total_children')
)

# Calculate weighted children with zero tax in both scenarios
zero_tax_children = (
    results[results['zero_tax_both']]
    .groupby('state_code')
    .apply(lambda x: np.sum(x['num_children'] * x['weight']))
    .reset_index(name='children_zero_tax')
)

# Rest of the code remains the same
state_results = total_children.merge(zero_tax_children, on='state_code', how='left')
state_results['percent_zero_tax'] = (
    state_results['children_zero_tax'] / state_results['total_children'] * 100
).round(1)

state_results = state_results.sort_values('percent_zero_tax', ascending=False)
print("\nChildren in households with zero tax in both scenarios (weighted):")
print(state_results.to_string(float_format=lambda x: f"{x:,.1f}"))




Children in households with zero tax in both scenarios (weighted):
   state_code  total_children  children_zero_tax  percent_zero_tax
2          AR       674,472.9          190,476.5              28.2
32         NM       460,692.2          129,421.1              28.1
18         LA     1,041,173.6          287,304.2              27.6
25         MS       628,728.5          168,819.7              26.9
1          AL       998,394.1          268,625.9              26.9
17         KY     1,014,110.9          254,769.5              25.1
36         OK       959,907.1          240,113.7              25.0
49         WV       378,390.8           87,248.9              23.1
40         SC     1,179,198.5          264,245.2              22.4
27         NC     2,249,389.3          500,394.7              22.2
43         TX     7,005,303.5        1,511,229.6              21.6
3          AZ     1,462,526.9          302,195.0              20.7
7          DC        96,331.9           19,736.5             