In [16]:
import pandas as pd

# Load the 2026 CSV file
df_2026 = pd.read_csv("nationwide_impacts/data/impacts_2026_2035/impacts_2026.csv")

# Filter reforms with and without behavioral responses in 2026
reforms_with_behavior_2026 = df_2026[
    df_2026["reform"].str.contains("behavioral_responses_yes")
]
reforms_without_behavior_2026 = df_2026[
    df_2026["reform"].str.contains("behavioral_responses_no")
]

# Create a base reform name by replacing '_yes_' with '_no_'
reforms_with_behavior_2026["reform_base"] = reforms_with_behavior_2026[
    "reform"
].str.replace("behavioral_responses_yes", "behavioral_responses_no")

# Merge to align reforms with and without behavioral responses in 2026
merged_2026 = pd.merge(
    reforms_with_behavior_2026,
    reforms_without_behavior_2026,
    left_on="reform_base",
    right_on="reform",
    suffixes=("_with_behavior", "_without_behavior"),
)

# Ensure each reform_base is unique by dropping duplicates
merged_2026 = merged_2026.drop_duplicates(subset=["reform_base"])

# Calculate the percentage change in total_income_change for 2026
merged_2026["pct_change"] = (
    merged_2026["total_income_change_with_behavior"]
    - merged_2026["total_income_change_without_behavior"]
) / merged_2026["total_income_change_without_behavior"]

# Initialize a dictionary to store the imputed dataframes
imputed_dfs = {}

# Loop through the years 2027 to 2035
for year in range(2027, 2036):
    # Load the CSV for the current year
    df_year = pd.read_csv(
        f"nationwide_impacts/data/impacts_2026_2035/impacts_{year}.csv"
    )

    # Filter reforms without behavioral responses for the current year
    reforms_without_behavior_year = df_year[
        df_year["reform"].str.contains("behavioral_responses_no")
    ]

    # Create a base reform name by removing the year suffix (e.g., '_year_2027')
    reforms_without_behavior_year["reform_base"] = reforms_without_behavior_year[
        "reform"
    ].str.replace(r"_year_\d{4}", "", regex=True)

    # Merge with the 2026 percentage change data
    merged_year = pd.merge(
        reforms_without_behavior_year,
        merged_2026[["reform_base", "pct_change"]],
        on="reform_base",
    )

    # Impute the total_income_change for reforms with behavioral responses
    merged_year["total_income_change_with_behavior"] = merged_year[
        "total_income_change"
    ] * (1 + merged_year["pct_change"])

    # Create the reform name for the imputed reform with behavioral responses
    merged_year["reform_with_behavior"] = (
        merged_year["reform_base"].str.replace(
            "behavioral_responses_no", "behavioral_responses_yes"
        )
        + f"_year_{year}"
    )

    # Create the output dataframe
    imputed_df = merged_year[
        [
            "reform_with_behavior",
            "baseline",
            "year",
            "total_income_change_with_behavior",
        ]
    ].copy()
    imputed_df.rename(
        columns={
            "reform_with_behavior": "reform",
            "total_income_change_with_behavior": "total_income_change",
        },
        inplace=True,
    )

    # Add the imputed dataframe to the dictionary
    imputed_dfs[year] = imputed_df

# Save the imputed dataframes to CSV files
for year, df in imputed_dfs.items():
    df.to_csv(f"{year}_imputed_with_behavior.csv", index=False)

print("Imputation complete. CSV files for 2027 to 2035 have been saved.")

Imputation complete. CSV files for 2027 to 2035 have been saved.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reforms_with_behavior_2026['reform_base'] = reforms_with_behavior_2026['reform'].str.replace('behavioral_responses_yes', 'behavioral_responses_no')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reforms_without_behavior_year['reform_base'] = reforms_without_behavior_year['reform'].str.replace(r'_year_\d{4}', '', regex=True)
