In [10]:
import numpy as np
import pandas as pd
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform

In [11]:

reform = Reform.from_dict({
  "gov.contrib.repeal_state_dependent_exemptions.in_effect": {
    "2024-01-01.2100-12-31": True
  }
}, country_id="us")


baseline = Microsimulation(dataset='pooled_3_year_cps_2023')
reformed = Microsimulation(reform=reform, dataset='pooled_3_year_cps_2023')
baseline_income = baseline.calculate("household_net_income", period=2024)
reformed_income = reformed.calculate("household_net_income", period=2024)
difference_income = reformed_income - baseline_income

In [12]:
# Get state codes
state_code = baseline.calculate("state_code", period=2024)

# Get household weights for accurate aggregation
weights = baseline.calculate("household_weight", period=2024)

# Create a DataFrame with the results
results_df = pd.DataFrame({
    'state_code': state_code,
    'income_change': difference_income,
    'weight': weights
})

# Calculate state-level statistics
state_impacts = (
    results_df
    .groupby('state_code')
    .agg({
        'income_change': lambda x: np.average(x, weights=results_df.loc[x.index, 'weight']),
        'weight': 'sum'
    })
    .reset_index()
)

In [13]:
# Calculate total income impact
total_impact_by_state = (
    results_df
    .groupby('state_code')
    .apply(lambda x: np.sum(x['income_change'] * x['weight']))
    .reset_index(name='total_income_impact')
)

# Merge total impact into main results
state_impacts = state_impacts.merge(total_impact_by_state, on='state_code')

# Add households affected
state_impacts['households_affected'] = (
    results_df[results_df['income_change'] != 0]
    .groupby('state_code')['weight']
    .sum()
    .reindex(state_impacts['state_code'])
    .fillna(0)
)

# Calculate percentage of households affected
state_impacts['percent_households_affected'] = (
    state_impacts['households_affected'] / state_impacts['weight'] * 100
)

# Format the results
state_impacts['avg_income_change'] = state_impacts['income_change'].round(2)
state_impacts['total_income_impact'] = state_impacts['total_income_impact'].round(0)
state_impacts['households_affected'] = state_impacts['households_affected'].round(0)
state_impacts['percent_households_affected'] = state_impacts['percent_households_affected'].round(2)
state_impacts['weight'] = state_impacts['weight'].round(0)

# Sort by total impact
state_impacts_sorted = state_impacts.sort_values('total_income_impact')

# Rename columns for clarity
state_impacts_sorted = state_impacts_sorted.rename(columns={
    'weight': 'total_households',
    'avg_income_change': 'avg_income_change_per_household',
    'total_income_impact': 'total_state_income_impact'
})

# Calculate national totals
national_totals = pd.DataFrame({
    'state_code': ['TOTAL'],
    'total_households': [state_impacts_sorted['total_households'].sum()],
    'avg_income_change_per_household': [(state_impacts_sorted['total_state_income_impact'].sum() / 
                                       state_impacts_sorted['total_households'].sum())],
    'total_state_income_impact': [state_impacts_sorted['total_state_income_impact'].sum()],
    'households_affected': [state_impacts_sorted['households_affected'].sum()],
    'percent_households_affected': [(state_impacts_sorted['households_affected'].sum() / 
                                   state_impacts_sorted['total_households'].sum() * 100)]
})

# Combine state results with national totals
final_results = pd.concat([state_impacts_sorted, national_totals])

# Display results
print("\nState-by-State Impact Analysis:")
print(final_results[[
    'state_code',
    'total_households',
    'avg_income_change_per_household',
    'total_state_income_impact',
    'households_affected',
    'percent_households_affected'
]].to_string())


State-by-State Impact Analysis:
   state_code  total_households  avg_income_change_per_household  total_state_income_impact  households_affected  percent_households_affected
4          CA        14365824.0                      -161.660004              -2.322313e+09                  NaN                          NaN
22         MI         4228719.0                      -113.089996              -4.782457e+08                  NaN                          NaN
10         GA         4348380.0                       -94.570000              -4.112280e+08                  NaN                          NaN
23         MN         2344873.0                      -166.809998              -3.911444e+08                  NaN                          NaN
14         IL         5187204.0                       -61.619999              -3.196205e+08                  NaN                          NaN
34         NY         7896529.0                       -28.510000              -2.251366e+08                  NaN   

In [14]:
final_results.to_csv('state_impacts_detailed.csv', index=False)

In [43]:
# Calculate household income and number of children
household_income = baseline.calculate("household_net_income", period=2024)
num_children = baseline.calculate("tax_unit_children", map_to='household', period=2024)
# Create a mapping of state codes to their exemption variables
state_to_exemption = {
    'AL': ['al_personal_exemption'],
    'CA': ['ca_exemptions'],
    'GA': ['ga_exemptions'],
    'HI': ['hi_exemptions'],
    'IN': ['in_base_exemptions'],
    'IL': ['il_dependent_exemption'],
    'KS': ['ks_exemptions'],
    'LA': ['la_dependents_exemption'],
    'MD': ['md_total_personal_exemptions'],
    'MI': ['mi_exemptions'],
    'MN': ['mn_exemptions'],
    'MS': ['ms_dependents_exemption'],
    'NE': ['ne_exemptions'],
    'NJ': ['nj_dependents_exemption'],
    'NY': ['ny_exemptions'],
    'OH': ['oh_personal_exemptions'],
    'OK': ['ok_exemptions'],
    'RI': ['ri_exemptions'],
    'SC': ['sc_dependent_exemption'],
    'VA': ['va_personal_exemption'],
    'VT': ['vt_personal_exemptions'],
    'WV': ['wv_personal_exemption']
}

# Calculate exemptions state by state
total_exemptions = np.zeros_like(household_income)
state_codes = baseline.calculate("state_code", period=2024)

# Add debug prints
for state, exemption_list in state_to_exemption.items():
    state_mask = (state_codes == state)
    state_exemptions = np.zeros_like(household_income)
    
    for var in exemption_list:
        try:
            # Map tax unit variables to household level
            exemption_value = baseline.calculate(var, map_to="household", period=2024)
            state_exemptions += exemption_value
        except Exception as e:
            continue
            
    total_exemptions[state_mask] = state_exemptions[state_mask]




results_df['total_children'] = num_children


# Identify children in households with zero exemptions
children_in_zero_exemption = (total_exemptions == 0).astype(int) * num_children
# Add to results DataFrame
results_df['children_in_zero_exemption'] = children_in_zero_exemption

# Get household weights for accurate aggregation
weights = baseline.calculate("household_weight", period=2024)
results_df['weight'] = weights

# Calculate state-level statistics with weights
state_children_impact = (
    results_df
    .groupby('state_code')
    .agg({
        'children_in_zero_exemption': lambda x: np.sum(x * results_df.loc[x.index, 'weight']),
        'total_children': lambda x: np.sum(x * results_df.loc[x.index, 'weight'])
    })
    .reset_index()
)

# Calculate percentage using total children as denominator
state_children_impact['percent_children_in_zero_exemption'] = (
    state_children_impact['children_in_zero_exemption'] / state_children_impact['total_children'] * 100
)

# Format and display results
state_children_impact['children_in_zero_exemption'] = state_children_impact['children_in_zero_exemption'].round(0)
state_children_impact['percent_children_in_zero_exemption'] = state_children_impact['percent_children_in_zero_exemption'].round(2)

print("\nState-by-State Analysis of Children in Households with Zero State Exemptions:")
print(state_children_impact[[
    'state_code',
    'children_in_zero_exemption',
    'percent_children_in_zero_exemption'
]].to_string())


State-by-State Analysis of Children in Households with Zero State Exemptions:
   state_code  children_in_zero_exemption  percent_children_in_zero_exemption
0          AK                    183148.0                              100.00
1          AL                         0.0                                0.00
2          AR                    674473.0                              100.00
3          AZ                   1462527.0                              100.00
4          CA                    142232.0                                1.85
5          CO                   1192968.0                              100.00
6          CT                    688379.0                              100.00
7          DC                     96332.0                              100.00
8          DE                    218417.0                              100.00
9          FL                   4147576.0                              100.00
10         GA                         0.0                      

In [58]:
# Define states with exemptions
state_to_exemption = {
    'AL': ['al_personal_exemption'], 'CA': ['ca_exemptions'],
    'GA': ['ga_exemptions'], 'HI': ['hi_exemptions'],
    'IN': ['in_base_exemptions'], 'IL': ['il_dependent_exemption'],
    'KS': ['ks_exemptions'], 'LA': ['la_dependents_exemption'],
    'MD': ['md_total_personal_exemptions'], 'MI': ['mi_exemptions'],
    'MN': ['mn_exemptions'], 'MS': ['ms_dependents_exemption'],
    'NE': ['ne_exemptions'], 'NJ': ['nj_dependents_exemption'],
    'NY': ['ny_exemptions'], 'OH': ['oh_personal_exemptions'],
    'OK': ['ok_exemptions'], 'RI': ['ri_exemptions'],
    'SC': ['sc_dependent_exemption'], 'VA': ['va_personal_exemption'],
    'VT': ['vt_personal_exemptions'], 'WV': ['wv_personal_exemption']
}

# Get required variables
state_codes = baseline.calculate("state_code", period=2024)
num_children = baseline.calculate("tax_unit_children", map_to='household', period=2024)
baseline_tax = baseline.calculate("state_income_tax_before_refundable_credits", period=2024)
reformed_tax = reformed.calculate("state_income_tax_before_refundable_credits", period=2024)

# Create results DataFrame
results = pd.DataFrame({
    'state_code': state_codes,
    'num_children': num_children,
    'zero_tax_both': (np.abs(baseline_tax) < 0.01) & (np.abs(reformed_tax) < 0.01)
})

# Filter for states we want to analyze (exemption states + TX for verification)
states_to_show = list(state_to_exemption.keys()) + ['TX']
results = results[results['state_code'].isin(states_to_show)]

# Calculate total children by state (unweighted)
total_children = (
    results
    .groupby('state_code')
    ['num_children']
    .sum()
    .reset_index(name='total_children')
)

# Calculate children with zero tax in both scenarios (unweighted)
zero_tax_children = (
    results[results['zero_tax_both']]
    .groupby('state_code')
    ['num_children']
    .sum()
    .reset_index(name='children_zero_tax')
)

# Merge results
state_results = total_children.merge(zero_tax_children, on='state_code', how='left').fillna(0)

# Calculate percentage
state_results['percent_zero_tax'] = (
    state_results['children_zero_tax'] / state_results['total_children'] * 100
).round(1)

# Sort and display results
state_results = state_results.sort_values('percent_zero_tax', ascending=False)
print("\nChildren in households with zero tax in both scenarios (unweighted):")
print(state_results.to_string(float_format=lambda x: f"{x:,.1f}"))


Children in households with zero tax in both scenarios (unweighted):
   state_code  total_children  children_zero_tax  percent_zero_tax
16         OK         1,849.0            1,090.0              59.0
21         VT         1,154.0              676.0              58.6
14         NY         3,629.0            2,029.0              55.9
13         NJ         2,013.0            1,100.0              54.6
19         TX         7,333.0            3,984.0              54.3
17         RI           823.0              408.0              49.6
22         WV         1,968.0              938.0              47.7
3          HI         1,649.0              783.0              47.5
8          MD         1,272.0              581.0              45.7
10         MN         1,574.0              716.0              45.5
2          GA         2,386.0            1,063.0              44.6
20         VA         2,098.0              932.0              44.4
1          CA         9,521.0            4,179.0           