diff --git a/changelog.d/cliff-impact.added.md b/changelog.d/cliff-impact.added.md new file mode 100644 index 00000000..0be86a24 --- /dev/null +++ b/changelog.d/cliff-impact.added.md @@ -0,0 +1 @@ +Added opt-in macro cliff impact outputs for US and UK reform analyses. diff --git a/src/policyengine/outputs/__init__.py b/src/policyengine/outputs/__init__.py index f2f27849..b6ebd37b 100644 --- a/src/policyengine/outputs/__init__.py +++ b/src/policyengine/outputs/__init__.py @@ -4,6 +4,12 @@ ChangeAggregate, ChangeAggregateType, ) +from policyengine.outputs.cliff_impact import ( + CliffImpact, + CliffImpactInSimulation, + calculate_cliff_impact, + configure_cliff_impact_variables, +) from policyengine.outputs.congressional_district_impact import ( CongressionalDistrictImpact, compute_us_congressional_district_impacts, @@ -76,6 +82,10 @@ "AggregateType", "ChangeAggregate", "ChangeAggregateType", + "CliffImpact", + "CliffImpactInSimulation", + "calculate_cliff_impact", + "configure_cliff_impact_variables", "DecileImpact", "calculate_decile_impacts", "ProgramStatistics", diff --git a/src/policyengine/outputs/cliff_impact.py b/src/policyengine/outputs/cliff_impact.py new file mode 100644 index 00000000..c28093d3 --- /dev/null +++ b/src/policyengine/outputs/cliff_impact.py @@ -0,0 +1,89 @@ +"""Legacy-compatible tax-benefit cliff macro output.""" + +from __future__ import annotations + +from pydantic import BaseModel + +from policyengine.core import Output, Simulation +from policyengine.outputs.aggregate import ( + get_aggregate_variable, + get_output_entity_data, + require_output_column, +) +from policyengine.outputs.extra_variables import add_extra_variables + +CLIFF_IMPACT_VARIABLES = ("cliff_gap", "is_on_cliff", "is_adult") + + +class CliffImpactInSimulation(BaseModel): + cliff_gap: float + cliff_share: float + + +class CliffImpact(Output): + baseline: CliffImpactInSimulation + reform: CliffImpactInSimulation + + +def _cliff_variables_by_entity( + simulation: Simulation, +) -> dict[str, list[str]]: + variables_by_entity: dict[str, list[str]] = {} + for variable_name in CLIFF_IMPACT_VARIABLES: + variable = get_aggregate_variable( + simulation, + variable_name, + "CliffImpact.extra_variables", + ) + variables_by_entity.setdefault(variable.entity, []).append(variable_name) + return variables_by_entity + + +def configure_cliff_impact_variables(*simulations: Simulation) -> None: + """Materialize cliff columns only for analyses that request them.""" + for simulation in simulations: + add_extra_variables( + simulation, + _cliff_variables_by_entity(simulation), + ) + + +def _sum_output_variable( + simulation: Simulation, + variable_name: str, +) -> float: + context = f"CliffImpact.{variable_name}" + variable = get_aggregate_variable(simulation, variable_name, context) + data = get_output_entity_data(simulation, variable.entity, context) + require_output_column( + data, + variable_name, + variable.entity, + simulation, + context, + ) + return float(data[variable_name].sum()) + + +def _calculate_cliff_impact_in_simulation( + simulation: Simulation, +) -> CliffImpactInSimulation: + cliff_gap = _sum_output_variable(simulation, "cliff_gap") + people_on_cliffs = _sum_output_variable(simulation, "is_on_cliff") + adults = _sum_output_variable(simulation, "is_adult") + + return CliffImpactInSimulation( + cliff_gap=cliff_gap, + cliff_share=float(people_on_cliffs / adults), + ) + + +def calculate_cliff_impact( + baseline_simulation: Simulation, + reform_simulation: Simulation, +) -> CliffImpact: + """Calculate legacy macro cliff output from materialized simulations.""" + return CliffImpact( + baseline=_calculate_cliff_impact_in_simulation(baseline_simulation), + reform=_calculate_cliff_impact_in_simulation(reform_simulation), + ) diff --git a/src/policyengine/outputs/extra_variables.py b/src/policyengine/outputs/extra_variables.py new file mode 100644 index 00000000..cd90d6d0 --- /dev/null +++ b/src/policyengine/outputs/extra_variables.py @@ -0,0 +1,22 @@ +"""Helpers for conditionally materialized output variables.""" + +from __future__ import annotations + +from policyengine.core import Simulation + + +def add_extra_variables( + simulation: Simulation, + variables_by_entity: dict[str, list[str]], +) -> None: + """Append extra output variables without dropping caller-supplied extras.""" + extra_variables = { + entity: list(variables) + for entity, variables in (simulation.extra_variables or {}).items() + } + for entity, variables in variables_by_entity.items(): + existing = extra_variables.setdefault(entity, []) + for variable in variables: + if variable not in existing: + existing.append(variable) + simulation.extra_variables = extra_variables diff --git a/src/policyengine/outputs/labor_supply_response.py b/src/policyengine/outputs/labor_supply_response.py index 1715c0c7..362190ab 100644 --- a/src/policyengine/outputs/labor_supply_response.py +++ b/src/policyengine/outputs/labor_supply_response.py @@ -14,6 +14,7 @@ get_output_entity_data, require_output_column, ) +from policyengine.outputs.extra_variables import add_extra_variables CountryCode = Literal["us", "uk"] DecileValues = dict[int, float] @@ -162,22 +163,6 @@ def _active_lsr_variables(country_code: CountryCode) -> dict[str, list[str]]: ) -def _add_extra_variables( - simulation: Simulation, - variables_by_entity: dict[str, list[str]], -) -> None: - extra_variables = { - entity: list(variables) - for entity, variables in (simulation.extra_variables or {}).items() - } - for entity, variables in variables_by_entity.items(): - existing = extra_variables.setdefault(entity, []) - for variable in variables: - if variable not in existing: - existing.append(variable) - simulation.extra_variables = extra_variables - - def configure_labor_supply_response_variables( baseline_simulation: Simulation, reform_simulation: Simulation, @@ -193,8 +178,8 @@ def configure_labor_supply_response_variables( return False active_variables = _active_lsr_variables(country_code) - _add_extra_variables(baseline_simulation, active_variables) - _add_extra_variables(reform_simulation, active_variables) + add_extra_variables(baseline_simulation, active_variables) + add_extra_variables(reform_simulation, active_variables) return True diff --git a/src/policyengine/tax_benefit_models/uk/analysis.py b/src/policyengine/tax_benefit_models/uk/analysis.py index ee913ffb..0d962026 100644 --- a/src/policyengine/tax_benefit_models/uk/analysis.py +++ b/src/policyengine/tax_benefit_models/uk/analysis.py @@ -11,9 +11,12 @@ from policyengine.core import OutputCollection, Simulation from policyengine.outputs import ( + CliffImpact, LaborSupplyResponse, ProgramStatistics, + calculate_cliff_impact, calculate_labor_supply_response, + configure_cliff_impact_variables, configure_labor_supply_response_variables, ) from policyengine.outputs.decile_impact import ( @@ -71,6 +74,7 @@ class PolicyReformAnalysis(BaseModel): baseline_inequality: Inequality reform_inequality: Inequality labor_supply_response: LaborSupplyResponse + cliff_impact: CliffImpact | None = None def _format_missing_program_variables(missing_variables: set[str]) -> str | None: @@ -141,6 +145,7 @@ def _validate_program_statistics_config( def economic_impact_analysis( baseline_simulation: Simulation, reform_simulation: Simulation, + include_cliff_impacts: bool = False, ) -> PolicyReformAnalysis: """Perform comprehensive analysis of a UK policy reform.""" configure_labor_supply_response_variables( @@ -148,6 +153,8 @@ def economic_impact_analysis( reform_simulation, country_code="uk", ) + if include_cliff_impacts: + configure_cliff_impact_variables(baseline_simulation, reform_simulation) _validate_program_statistics_config(baseline_simulation, reform_simulation) baseline_simulation.ensure() @@ -224,6 +231,11 @@ def economic_impact_analysis( reform_simulation, country_code="uk", ) + cliff_impact = ( + calculate_cliff_impact(baseline_simulation, reform_simulation) + if include_cliff_impacts + else None + ) return PolicyReformAnalysis( decile_impacts=decile_impacts, @@ -235,4 +247,5 @@ def economic_impact_analysis( baseline_inequality=baseline_inequality, reform_inequality=reform_inequality, labor_supply_response=labor_supply_response, + cliff_impact=cliff_impact, ) diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py index 26359480..1d4bb5d1 100644 --- a/src/policyengine/tax_benefit_models/us/analysis.py +++ b/src/policyengine/tax_benefit_models/us/analysis.py @@ -13,9 +13,12 @@ from policyengine.core import OutputCollection, Simulation from policyengine.outputs import ( + CliffImpact, LaborSupplyResponse, ProgramStatistics, + calculate_cliff_impact, calculate_labor_supply_response, + configure_cliff_impact_variables, configure_labor_supply_response_variables, ) from policyengine.outputs.decile_impact import ( @@ -63,6 +66,7 @@ class PolicyReformAnalysis(BaseModel): baseline_inequality: Inequality reform_inequality: Inequality labor_supply_response: LaborSupplyResponse + cliff_impact: CliffImpact | None = None def _format_missing_program_variables(missing_variables: set[str]) -> str | None: @@ -134,6 +138,7 @@ def economic_impact_analysis( baseline_simulation: Simulation, reform_simulation: Simulation, inequality_preset: Union[USInequalityPreset, str] = USInequalityPreset.STANDARD, + include_cliff_impacts: bool = False, ) -> PolicyReformAnalysis: """Perform comprehensive analysis of a US policy reform. @@ -151,6 +156,8 @@ def economic_impact_analysis( reform_simulation, country_code="us", ) + if include_cliff_impacts: + configure_cliff_impact_variables(baseline_simulation, reform_simulation) _validate_program_statistics_config(baseline_simulation, reform_simulation) baseline_simulation.ensure() @@ -218,6 +225,11 @@ def economic_impact_analysis( reform_simulation, country_code="us", ) + cliff_impact = ( + calculate_cliff_impact(baseline_simulation, reform_simulation) + if include_cliff_impacts + else None + ) return PolicyReformAnalysis( decile_impacts=decile_impacts, @@ -227,4 +239,5 @@ def economic_impact_analysis( baseline_inequality=baseline_inequality, reform_inequality=reform_inequality, labor_supply_response=labor_supply_response, + cliff_impact=cliff_impact, ) diff --git a/tests/test_cliff_impact.py b/tests/test_cliff_impact.py new file mode 100644 index 00000000..d51b67c6 --- /dev/null +++ b/tests/test_cliff_impact.py @@ -0,0 +1,160 @@ +from types import SimpleNamespace + +import pandas as pd +import pytest +from microdf import MicroDataFrame + +import policyengine.outputs as outputs +from policyengine.core import Simulation +from policyengine.outputs import ( + CliffImpact, + CliffImpactInSimulation, + calculate_cliff_impact, + configure_cliff_impact_variables, +) + + +class _FakeModelVersion: + model = SimpleNamespace(id="fake-model") + version = "test" + variables_by_name = { + "cliff_gap": object(), + "is_on_cliff": object(), + "is_adult": object(), + } + + def get_variable(self, variable_name: str): + if variable_name not in self.variables_by_name: + raise ValueError(variable_name) + return SimpleNamespace(entity="person") + + +def _simulation( + simulation_id: str, + data: dict[str, list[float]], + *, + extra_variables: dict[str, list[str]] | None = None, +) -> Simulation: + person = MicroDataFrame( + pd.DataFrame( + { + "person_id": list(range(1, len(data["person_weight"]) + 1)), + **data, + } + ), + weights="person_weight", + ) + return Simulation.model_construct( + id=simulation_id, + extra_variables=extra_variables or {}, + tax_benefit_model_version=_FakeModelVersion(), + output_dataset=SimpleNamespace(data=SimpleNamespace(person=person)), + ) + + +def test_cliff_impact_models_serialize_to_legacy_shape(): + result = CliffImpact( + baseline=CliffImpactInSimulation(cliff_gap=1.0, cliff_share=0.25), + reform=CliffImpactInSimulation(cliff_gap=2.0, cliff_share=0.5), + ) + + assert result.model_dump(mode="json") == { + "baseline": {"cliff_gap": 1.0, "cliff_share": 0.25}, + "reform": {"cliff_gap": 2.0, "cliff_share": 0.5}, + } + + +def test_cliff_impact_symbols_are_publicly_exported(): + assert outputs.CliffImpact is CliffImpact + assert outputs.CliffImpactInSimulation is CliffImpactInSimulation + assert outputs.calculate_cliff_impact is calculate_cliff_impact + assert outputs.configure_cliff_impact_variables is configure_cliff_impact_variables + + +def test_configure_cliff_impact_variables_preserves_existing_extras(): + baseline = _simulation( + "baseline", + { + "person_weight": [1.0], + "cliff_gap": [0.0], + "is_on_cliff": [0.0], + "is_adult": [1.0], + }, + extra_variables={"person": ["existing_person_variable"]}, + ) + reform = _simulation( + "reform", + { + "person_weight": [1.0], + "cliff_gap": [0.0], + "is_on_cliff": [0.0], + "is_adult": [1.0], + }, + extra_variables={"person": ["cliff_gap"]}, + ) + + configure_cliff_impact_variables(baseline, reform) + configure_cliff_impact_variables(baseline, reform) + + assert baseline.extra_variables["person"] == [ + "existing_person_variable", + "cliff_gap", + "is_on_cliff", + "is_adult", + ] + assert reform.extra_variables["person"] == [ + "cliff_gap", + "is_on_cliff", + "is_adult", + ] + + +def test_calculate_cliff_impact_matches_legacy_shape(): + baseline = _simulation( + "baseline", + { + "person_weight": [1.0, 2.0], + "cliff_gap": [10.0, 20.0], + "is_on_cliff": [1.0, 0.0], + "is_adult": [1.0, 1.0], + }, + ) + reform = _simulation( + "reform", + { + "person_weight": [1.0, 2.0], + "cliff_gap": [2.0, 3.0], + "is_on_cliff": [0.0, 1.0], + "is_adult": [1.0, 1.0], + }, + ) + + result = calculate_cliff_impact(baseline, reform) + + assert result.baseline.cliff_gap == 50.0 + assert result.baseline.cliff_share == pytest.approx(1 / 3) + assert result.reform.cliff_gap == 8.0 + assert result.reform.cliff_share == pytest.approx(2 / 3) + + +def test_calculate_cliff_impact_requires_materialized_columns(): + baseline = _simulation( + "baseline", + { + "person_weight": [1.0], + "cliff_gap": [10.0], + "is_adult": [1.0], + }, + ) + reform = _simulation( + "reform", + { + "person_weight": [1.0], + "cliff_gap": [10.0], + "is_on_cliff": [1.0], + "is_adult": [1.0], + }, + ) + + with pytest.raises(ValueError, match="is_on_cliff"): + calculate_cliff_impact(baseline, reform) diff --git a/tests/test_cliff_impact_analysis.py b/tests/test_cliff_impact_analysis.py new file mode 100644 index 00000000..ab2a0b3d --- /dev/null +++ b/tests/test_cliff_impact_analysis.py @@ -0,0 +1,225 @@ +from unittest.mock import MagicMock + +import pandas as pd +import pytest + +from policyengine.core import OutputCollection +from policyengine.outputs import ( + CliffImpact, + CliffImpactInSimulation, + LaborSupplyResponse, + ProgramStatistics, +) +from policyengine.outputs.inequality import Inequality +from policyengine.tax_benefit_models.uk import analysis as uk_analysis +from policyengine.tax_benefit_models.us import analysis as us_analysis + + +def _empty_collection() -> OutputCollection: + return OutputCollection(outputs=[], dataframe=pd.DataFrame()) + + +def _empty_labor_supply_response() -> LaborSupplyResponse: + return LaborSupplyResponse.model_construct() + + +def _empty_inequality(simulation) -> Inequality: + return Inequality.model_construct( + simulation=simulation, + income_variable="household_net_income", + gini=0.0, + top_10_share=0.0, + top_1_share=0.0, + bottom_50_share=0.0, + ) + + +def _make_simulation(simulation_id: str, events: list[str]) -> MagicMock: + simulation = MagicMock() + simulation.id = simulation_id + simulation.dataset.data.household = pd.DataFrame({"household_id": range(101)}) + simulation.tax_benefit_model_version.get_variable.return_value.entity = "household" + simulation.ensure.side_effect = lambda: events.append(f"{simulation_id}.ensure") + return simulation + + +def _patch_analysis_dependencies( + monkeypatch, + analysis_module, + *, + country_code: str, + events: list[str], + fail_on_cliff: bool, + cliff_result: CliffImpact | None = None, +) -> None: + class DummyProgramStatistics(ProgramStatistics): + def run(self): + self.baseline_total = 0.0 + self.reform_total = 0.0 + self.change = 0.0 + self.baseline_count = 0.0 + self.reform_count = 0.0 + self.winners = 0.0 + self.losers = 0.0 + + def fake_program_statistics(**kwargs): + return DummyProgramStatistics.model_construct(**kwargs) + + monkeypatch.setattr( + analysis_module, + "_validate_program_statistics_config", + lambda baseline_simulation, reform_simulation: None, + ) + monkeypatch.setattr(analysis_module, "ProgramStatistics", fake_program_statistics) + monkeypatch.setattr( + analysis_module, + "configure_labor_supply_response_variables", + lambda baseline_simulation, reform_simulation, country_code: None, + ) + monkeypatch.setattr( + analysis_module, + "calculate_labor_supply_response", + lambda baseline_simulation, reform_simulation, country_code: ( + _empty_labor_supply_response() + ), + ) + monkeypatch.setattr( + analysis_module, + "calculate_decile_impacts", + lambda **kwargs: _empty_collection(), + ) + + if country_code == "uk": + monkeypatch.setattr( + analysis_module, + "compute_intra_decile_impacts", + lambda **kwargs: _empty_collection(), + ) + monkeypatch.setattr( + analysis_module, + "calculate_uk_poverty_rates", + lambda simulation: _empty_collection(), + ) + monkeypatch.setattr( + analysis_module, + "calculate_uk_inequality", + _empty_inequality, + ) + else: + monkeypatch.setattr( + analysis_module, + "calculate_us_poverty_rates", + lambda simulation: _empty_collection(), + ) + monkeypatch.setattr( + analysis_module, + "calculate_us_inequality", + lambda simulation, preset: _empty_inequality(simulation), + ) + + if fail_on_cliff: + + def unexpected_cliff_call(*args, **kwargs): + raise AssertionError("cliff helpers should not run by default") + + monkeypatch.setattr( + analysis_module, + "configure_cliff_impact_variables", + unexpected_cliff_call, + ) + monkeypatch.setattr( + analysis_module, + "calculate_cliff_impact", + unexpected_cliff_call, + ) + return + + def fake_configure_cliff_impact_variables( + baseline_simulation, + reform_simulation, + ): + events.append("configure_cliff") + + def fake_calculate_cliff_impact( + baseline_simulation, + reform_simulation, + ): + events.append("calculate_cliff") + return cliff_result + + monkeypatch.setattr( + analysis_module, + "configure_cliff_impact_variables", + fake_configure_cliff_impact_variables, + ) + monkeypatch.setattr( + analysis_module, + "calculate_cliff_impact", + fake_calculate_cliff_impact, + ) + + +@pytest.mark.parametrize( + ("analysis_module", "country_code"), + [(us_analysis, "us"), (uk_analysis, "uk")], +) +def test_economic_impact_analysis_defaults_cliff_impact_to_none( + monkeypatch, + analysis_module, + country_code, +): + events: list[str] = [] + baseline = _make_simulation("baseline", events) + reform = _make_simulation("reform", events) + _patch_analysis_dependencies( + monkeypatch, + analysis_module, + country_code=country_code, + events=events, + fail_on_cliff=True, + ) + + result = analysis_module.economic_impact_analysis(baseline, reform) + + assert result.cliff_impact is None + assert events == ["baseline.ensure", "reform.ensure"] + + +@pytest.mark.parametrize( + ("analysis_module", "country_code"), + [(us_analysis, "us"), (uk_analysis, "uk")], +) +def test_economic_impact_analysis_can_include_cliff_impacts( + monkeypatch, + analysis_module, + country_code, +): + events: list[str] = [] + baseline = _make_simulation("baseline", events) + reform = _make_simulation("reform", events) + cliff_result = CliffImpact( + baseline=CliffImpactInSimulation(cliff_gap=1.0, cliff_share=0.1), + reform=CliffImpactInSimulation(cliff_gap=2.0, cliff_share=0.2), + ) + _patch_analysis_dependencies( + monkeypatch, + analysis_module, + country_code=country_code, + events=events, + fail_on_cliff=False, + cliff_result=cliff_result, + ) + + result = analysis_module.economic_impact_analysis( + baseline, + reform, + include_cliff_impacts=True, + ) + + assert result.cliff_impact == cliff_result + assert events == [ + "configure_cliff", + "baseline.ensure", + "reform.ensure", + "calculate_cliff", + ] diff --git a/uv.lock b/uv.lock index 83f691ec..e7d77e6f 100644 --- a/uv.lock +++ b/uv.lock @@ -2411,7 +2411,7 @@ wheels = [ [[package]] name = "policyengine" -version = "4.9.1" +version = "4.10.0" source = { editable = "." } dependencies = [ { name = "h5py", version = "3.14.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },