In [26]:
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform
import pandas as pd
import plotly.express as px


In [5]:
baseline = Microsimulation()



In [9]:
reform = Reform.from_dict({
  "gov.contrib.congress.wyden_smith.actc_lookback": {
    "2024-01-01.2025-12-31": True
  },
  "gov.contrib.congress.wyden_smith.per_child_actc_phase_in": {
    "2023-01-01.2025-12-31": True
  },
  "gov.irs.credits.ctc.amount.base[0].amount": {
    "2024-01-01.2025-12-31": 2100
  },
  "gov.irs.credits.ctc.refundable.individual_max": {
    "2023-01-01.2023-12-31": 1800,
    "2024-01-01.2024-12-31": 1900,
    "2025-01-01.2025-12-31": 2100
  }
}, country_id="us")
reformed = Microsimulation(reform=reform)


In [43]:
# Initialize an empty DataFrame to store all results
all_results = pd.DataFrame()

In [44]:
# Calculate for each year
for year in range(2023, 2026):
    baseline_household = baseline.calculate("household_net_income", period=year, map_to="household")
    reformed_household = reformed.calculate("household_net_income", period=year, map_to="household")
    
    # Calculate the ctc_qualifying_children at the household level
    ctc_qualifying_children = baseline.calculate("ctc_qualifying_children", period=year, map_to="household")
    
    # Get state codes at the household level
    state_codes = baseline.calculate("state_code", period=year, map_to="household")
    
    # Calculate the baseline weight at the household level
    baseline_weight = baseline_household.weights
    
    # Calculate adjusted gross income
    agi = baseline.calculate("adjusted_gross_income", period=year, map_to="household")
    
    # Calculate the difference for all households
    difference = reformed_household - baseline_household
    
    # Create a DataFrame with all the results
    df_year = pd.DataFrame({
        'Year': year,
        'State': state_codes,
        'Difference': difference,
        'CTC_Qualifying_Children': ctc_qualifying_children,
        'Weight': baseline_weight,
        'AGI': agi
    })
    
    # Filter the DataFrame to households where ctc_qualifying_children >= 1 and AGI between 2,500 and 50,000
    df_filtered = df_year[(df_year['CTC_Qualifying_Children'] >= 1) & 
                          (df_year['AGI'] >= 2500) & 
                          (df_year['AGI'] <= 50000)]
    
    # Group by State and calculate the sum of weighted differences and sum of weights
    df_grouped = df_filtered.groupby('State').agg({
        'Difference': lambda x: (x * df_filtered.loc[x.index, 'Weight']).sum(),
        'Weight': 'sum'
    }).reset_index()
    
    # Calculate the weighted average difference
    df_grouped['Mean_Difference'] = df_grouped['Difference'] / df_grouped['Weight']
    
    # Add the Year column
    df_grouped['Year'] = year
    
    # Select and rename columns
    df_grouped = df_grouped[['Year', 'State', 'Mean_Difference', 'Weight']]
    df_grouped = df_grouped.rename(columns={'Weight': 'Total_Weight'})
    
    # Append to the all_results DataFrame
    all_results = pd.concat([all_results, df_grouped], ignore_index=True)

# Sort the DataFrame by State and Year
all_results = all_results.sort_values(['State', 'Year'])

# Reset the index
all_results = all_results.reset_index(drop=True)

# Remove any remaining rows with NaN values
all_results = all_results.dropna()

# Display the results
print(all_results)

     Year State  Mean_Difference   Total_Weight
0    2023    AK       963.892457   11886.719986
1    2024    AK      1037.773434   11516.829971
2    2025    AK      1216.882271   11207.789963
3    2023    AL       494.327029  141075.609558
4    2024    AL       553.198352  130823.649780
..    ...   ...              ...            ...
148  2024    WV       428.550899   48176.619934
149  2025    WV       710.042486   44582.329987
150  2023    WY       323.264818   12120.260025
151  2024    WY       411.158179   10948.930023
152  2025    WY       739.925405   10948.930023

[153 rows x 4 columns]


In [45]:
# Sort the DataFrame by State and Year
df_results = all_results.sort_values(['State', 'Year'])

# Reset the index
df_results = df_results.reset_index(drop=True)

# Rename the 'Difference' column to 'Mean_Difference' for clarity
df_results = df_results.rename(columns={'Difference': 'Mean_Difference'})

# Reorder columns
df_results = df_results[['Year', 'State', 'Mean_Difference']]

# Display the results
print(df_results)

     Year State  Mean_Difference
0    2023    AK       963.892457
1    2024    AK      1037.773434
2    2025    AK      1216.882271
3    2023    AL       494.327029
4    2024    AL       553.198352
..    ...   ...              ...
148  2024    WV       428.550899
149  2025    WV       710.042486
150  2023    WY       323.264818
151  2024    WY       411.158179
152  2025    WY       739.925405

[153 rows x 3 columns]


In [47]:

# Export the results to a CSV file
df_results.to_csv('state_results.csv', index=False)
print("Results have been exported to 'state_results.csv'")

Results have been exported to 'state_results.csv'


In [48]:
# Sort the DataFrame by State and Year
df_results = df_results.sort_values(['State', 'Year'])

# Generate the CSV file
csv_filename = 'state_results.csv'
df_results.to_csv(csv_filename, index=False)

print(f"CSV file '{csv_filename}' has been generated.")

# Optionally, display the first few rows of the DataFrame
print("\nFirst few rows of the data:")
print(df_results.head())


CSV file 'state_results.csv' has been generated.

First few rows of the data:
   Year State  Mean_Difference
0  2023    AK       963.892457
1  2024    AK      1037.773434
2  2025    AK      1216.882271
3  2023    AL       494.327029
4  2024    AL       553.198352
