From 1169a71fc472d51694504c5e0be31c274b31444a Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Wed, 13 May 2026 15:49:11 +0200 Subject: [PATCH] Add UK wealth decile impact outputs --- .../add-uk-wealth-decile-outputs.added.md | 1 + docs/impact-analysis.md | 8 +- docs/outputs.md | 22 +++- src/policyengine/outputs/decile_impact.py | 13 +- .../tax_benefit_models/uk/analysis.py | 22 ++++ tests/test_intra_decile_impact.py | 75 +++++++++++ tests/test_uk_analysis.py | 118 ++++++++++++++++++ 7 files changed, 255 insertions(+), 4 deletions(-) create mode 100644 changelog.d/add-uk-wealth-decile-outputs.added.md create mode 100644 tests/test_uk_analysis.py diff --git a/changelog.d/add-uk-wealth-decile-outputs.added.md b/changelog.d/add-uk-wealth-decile-outputs.added.md new file mode 100644 index 00000000..768871c7 --- /dev/null +++ b/changelog.d/add-uk-wealth-decile-outputs.added.md @@ -0,0 +1 @@ +Add UK wealth-decile impact outputs and integration coverage for economic impact analysis. diff --git a/docs/impact-analysis.md b/docs/impact-analysis.md index 6f7d43fe..25db85ae 100644 --- a/docs/impact-analysis.md +++ b/docs/impact-analysis.md @@ -2,7 +2,7 @@ title: "Impact analysis" --- -`economic_impact_analysis` runs a baseline and a reform simulation through a bundled set of outputs — decile impacts, program statistics, poverty, and inequality — and returns a typed `PolicyReformAnalysis`. +`economic_impact_analysis` runs a baseline and a reform simulation through a bundled set of outputs — decile impacts, program statistics, poverty, and inequality — and returns a typed `PolicyReformAnalysis`. UK analysis also includes wealth-decile impact outputs. ## Usage @@ -35,6 +35,8 @@ A `PolicyReformAnalysis` with: | Attribute | Type | Content | |---|---|---| | `decile_impacts` | `OutputCollection[DecileImpact]` | Mean baseline / reform / change and winner-loser counts per decile | +| `wealth_decile_impacts` | `OutputCollection[DecileImpact]` | UK only: household net income impacts grouped by `household_wealth_decile` | +| `intra_wealth_decile_impacts` | `OutputCollection[IntraDecileImpact]` | UK only: within-wealth-decile distribution of household net income changes | | `program_statistics` | `OutputCollection[ProgramStatistics]` | Totals, counts, winners/losers per program | | `baseline_poverty` | `OutputCollection[Poverty]` | Baseline rates by measure and demographic group | | `reform_poverty` | `OutputCollection[Poverty]` | Reform rates, same schema as baseline | @@ -50,6 +52,10 @@ for prog in analysis.program_statistics.outputs: for d in analysis.decile_impacts.outputs: print(d.decile, d.absolute_change, d.relative_change) +if hasattr(analysis, "wealth_decile_impacts"): + for d in analysis.wealth_decile_impacts.outputs: + print(d.decile, d.absolute_change, d.relative_change) + analysis.reform_inequality.gini - analysis.baseline_inequality.gini ``` diff --git a/docs/outputs.md b/docs/outputs.md index 0d2de093..788f45dd 100644 --- a/docs/outputs.md +++ b/docs/outputs.md @@ -96,6 +96,8 @@ Or on a relative change — `relative_change_geq=0.05` selects households with a One decile's baseline mean, reform mean, and mean change. For all ten at once, use `calculate_decile_impacts`. +By default, `calculate_decile_impacts` ranks units into deciles using `income_variable`. To measure changes in one variable while grouping by an existing decile variable, pass `decile_variable`. For example, UK wealth-decile impacts measure changes in household net income grouped by `household_wealth_decile`. + ```python from policyengine.outputs import calculate_decile_impacts @@ -108,12 +110,20 @@ impacts = calculate_decile_impacts( for row in impacts.outputs: print(row.decile, row.absolute_change, row.relative_change) -impacts.dataframe # same data as a DataFrame +wealth_deciles = calculate_decile_impacts( + baseline_simulation=baseline, + reform_simulation=reform, + income_variable="household_net_income", + decile_variable="household_wealth_decile", + entity="household", +) + +impacts.dataframe # includes the decile_variable column ``` ## IntraDecileImpact -Distribution of household-level impact within each decile (five bucket categories summing to 1.0). Use `compute_intra_decile_impacts` for the full set. +Distribution of household-level impact within each decile (five bucket categories summing to 1.0). Use `compute_intra_decile_impacts` for the full set. Like `calculate_decile_impacts`, this helper accepts `decile_variable` when the grouping variable is already present in the simulation output. ```python from policyengine.outputs import compute_intra_decile_impacts @@ -123,6 +133,14 @@ spread = compute_intra_decile_impacts( reform_simulation=reform, income_variable="household_net_income", ) + +wealth_spread = compute_intra_decile_impacts( + baseline_simulation=baseline, + reform_simulation=reform, + income_variable="household_net_income", + decile_variable="household_wealth_decile", + entity="household", +) ``` ## Poverty diff --git a/src/policyengine/outputs/decile_impact.py b/src/policyengine/outputs/decile_impact.py index b0f2306e..3f19931a 100644 --- a/src/policyengine/outputs/decile_impact.py +++ b/src/policyengine/outputs/decile_impact.py @@ -105,6 +105,7 @@ def calculate_decile_impacts( reform_policy: Optional[Policy] = None, dynamic: Optional[Dynamic] = None, income_variable: str = "equiv_hbai_household_net_income", + decile_variable: Optional[str] = None, entity: Optional[str] = None, quantiles: int = 10, baseline_simulation: Optional[Simulation] = None, @@ -112,8 +113,16 @@ def calculate_decile_impacts( ) -> OutputCollection[DecileImpact]: """Calculate decile-by-decile impact of a reform. + By default, deciles are computed from ``income_variable``. Pass + ``decile_variable`` to group by a pre-computed decile variable while + still measuring changes in ``income_variable``; for example, UK wealth + deciles use ``income_variable="household_net_income"`` with + ``decile_variable="household_wealth_decile"``. + Returns: - OutputCollection containing list of DecileImpact objects and DataFrame + OutputCollection containing list of DecileImpact objects and a DataFrame. + The DataFrame includes ``decile_variable`` so callers can distinguish + income-derived deciles from pre-computed grouping variables. """ if (baseline_simulation is None) != (reform_simulation is None): raise ValueError( @@ -148,6 +157,7 @@ def calculate_decile_impacts( baseline_simulation=baseline_simulation, reform_simulation=reform_simulation, income_variable=income_variable, + decile_variable=decile_variable, entity=entity, decile=decile, quantiles=quantiles, @@ -162,6 +172,7 @@ def calculate_decile_impacts( "baseline_simulation_id": r.baseline_simulation.id, "reform_simulation_id": r.reform_simulation.id, "income_variable": r.income_variable, + "decile_variable": r.decile_variable, "decile": r.decile, "baseline_mean": r.baseline_mean, "reform_mean": r.reform_mean, diff --git a/src/policyengine/tax_benefit_models/uk/analysis.py b/src/policyengine/tax_benefit_models/uk/analysis.py index f37d18be..50f555c8 100644 --- a/src/policyengine/tax_benefit_models/uk/analysis.py +++ b/src/policyengine/tax_benefit_models/uk/analysis.py @@ -19,6 +19,10 @@ Inequality, calculate_uk_inequality, ) +from policyengine.outputs.intra_decile_impact import ( + IntraDecileImpact, + compute_intra_decile_impacts, +) from policyengine.outputs.poverty import ( Poverty, calculate_uk_poverty_rates, @@ -29,6 +33,8 @@ class PolicyReformAnalysis(BaseModel): """Complete policy reform analysis result.""" decile_impacts: OutputCollection[DecileImpact] + wealth_decile_impacts: OutputCollection[DecileImpact] + intra_wealth_decile_impacts: OutputCollection[IntraDecileImpact] program_statistics: OutputCollection[ProgramStatistics] baseline_poverty: OutputCollection[Poverty] reform_poverty: OutputCollection[Poverty] @@ -55,6 +61,20 @@ def economic_impact_analysis( baseline_simulation=baseline_simulation, reform_simulation=reform_simulation, ) + wealth_decile_impacts = calculate_decile_impacts( + baseline_simulation=baseline_simulation, + reform_simulation=reform_simulation, + income_variable="household_net_income", + decile_variable="household_wealth_decile", + entity="household", + ) + intra_wealth_decile_impacts = compute_intra_decile_impacts( + baseline_simulation=baseline_simulation, + reform_simulation=reform_simulation, + income_variable="household_net_income", + decile_variable="household_wealth_decile", + entity="household", + ) programs = { "income_tax": {"is_tax": True}, @@ -114,6 +134,8 @@ def economic_impact_analysis( return PolicyReformAnalysis( decile_impacts=decile_impacts, + wealth_decile_impacts=wealth_decile_impacts, + intra_wealth_decile_impacts=intra_wealth_decile_impacts, program_statistics=program_collection, baseline_poverty=baseline_poverty, reform_poverty=reform_poverty, diff --git a/tests/test_intra_decile_impact.py b/tests/test_intra_decile_impact.py index 04ae5412..8f8960c1 100644 --- a/tests/test_intra_decile_impact.py +++ b/tests/test_intra_decile_impact.py @@ -211,6 +211,13 @@ def test_intra_decile_with_decile_variable(): decile_2.gain_more_than_5pct == 1.0 or abs(decile_2.gain_more_than_5pct - 1.0) < 1e-9 ) + assert results.dataframe["decile"].tolist() == [1, 2, 0] + assert ( + results.dataframe.loc[ + results.dataframe["decile"] == 1, "gain_more_than_5pct" + ].iloc[0] + == 1.0 + ) # --------------------------------------------------------------------------- @@ -254,6 +261,74 @@ def test_decile_impact_with_decile_variable(): assert abs(di.absolute_change - 2000.0) < 1e-6 +def test_calculate_decile_impacts_with_decile_variable(monkeypatch): + """calculate_decile_impacts passes pre-computed grouping through.""" + version = _make_version("household_net_income", "household") + baseline = Simulation.model_construct( + tax_benefit_model_version=version, + output_dataset=MagicMock( + data=MagicMock( + household=MicroDataFrame( + pd.DataFrame( + { + "household_net_income": [10.0, 20.0, 100.0, 200.0], + "household_weight": [1.0, 1.0, 1.0, 1.0], + "household_wealth_decile": [2, 2, 1, 1], + } + ), + weights="household_weight", + ) + ) + ), + ) + reform = Simulation.model_construct( + tax_benefit_model_version=version, + output_dataset=MagicMock( + data=MagicMock( + household=MicroDataFrame( + pd.DataFrame( + { + "household_net_income": [11.0, 21.0, 110.0, 210.0], + "household_weight": [1.0, 1.0, 1.0, 1.0], + "household_wealth_decile": [2, 2, 1, 1], + } + ), + weights="household_weight", + ) + ) + ), + ) + + monkeypatch.setattr( + "policyengine.outputs.decile_impact.Simulation.ensure", + lambda self: None, + ) + + results = calculate_decile_impacts( + baseline_simulation=baseline, + reform_simulation=reform, + income_variable="household_net_income", + decile_variable="household_wealth_decile", + entity="household", + quantiles=2, + ) + + decile_1 = next(r for r in results.outputs if r.decile == 1) + decile_2 = next(r for r in results.outputs if r.decile == 2) + + assert decile_1.decile_variable == "household_wealth_decile" + assert decile_1.baseline_mean == 150.0 + assert decile_1.reform_mean == 160.0 + assert decile_1.absolute_change == 10.0 + assert decile_2.baseline_mean == 15.0 + assert decile_2.absolute_change == 1.0 + assert results.dataframe["decile"].tolist() == [1, 2] + assert results.dataframe["decile_variable"].tolist() == [ + "household_wealth_decile", + "household_wealth_decile", + ] + + def test_decile_impact_qcut_default(): """Without decile_variable, DecileImpact uses qcut (default behavior).""" n = 100 diff --git a/tests/test_uk_analysis.py b/tests/test_uk_analysis.py new file mode 100644 index 00000000..eb4153f5 --- /dev/null +++ b/tests/test_uk_analysis.py @@ -0,0 +1,118 @@ +from unittest.mock import MagicMock + +import pandas as pd + +from policyengine.core import OutputCollection +from policyengine.outputs import ProgramStatistics +from policyengine.outputs.inequality import Inequality +from policyengine.tax_benefit_models.uk import analysis as uk_analysis + + +def _empty_collection() -> OutputCollection: + return OutputCollection(outputs=[], dataframe=pd.DataFrame()) + + +def _make_simulation() -> MagicMock: + simulation = MagicMock() + simulation.dataset.data.household = pd.DataFrame({"household_id": range(101)}) + simulation.tax_benefit_model_version.get_variable.return_value.entity = "household" + return simulation + + +def test_uk_economic_impact_analysis_includes_wealth_decile_outputs(monkeypatch): + baseline = _make_simulation() + reform = _make_simulation() + + decile_calls = [] + standard_deciles = OutputCollection( + outputs=[], + dataframe=pd.DataFrame({"source": ["standard"]}), + ) + wealth_deciles = OutputCollection( + outputs=[], + dataframe=pd.DataFrame({"source": ["wealth"]}), + ) + intra_wealth_deciles = OutputCollection( + outputs=[], + dataframe=pd.DataFrame({"decile": list(range(1, 11)) + [0]}), + ) + + def fake_calculate_decile_impacts(**kwargs): + decile_calls.append(kwargs) + if kwargs.get("decile_variable") == "household_wealth_decile": + return wealth_deciles + return standard_deciles + + intra_calls = [] + + def fake_compute_intra_decile_impacts(**kwargs): + intra_calls.append(kwargs) + return intra_wealth_deciles + + class DummyProgramStatistics(ProgramStatistics): + def run(self): + self.baseline_total = 0.0 + self.reform_total = 0.0 + self.change = 0.0 + self.baseline_count = 0.0 + self.reform_count = 0.0 + self.winners = 0.0 + self.losers = 0.0 + + def fake_program_statistics(**kwargs): + return DummyProgramStatistics.model_construct(**kwargs) + + def fake_poverty_rates(_simulation): + return _empty_collection() + + def fake_inequality(simulation): + return Inequality.model_construct( + simulation=simulation, + income_variable="equiv_hbai_household_net_income", + gini=0.0, + top_10_share=0.0, + top_1_share=0.0, + bottom_50_share=0.0, + ) + + monkeypatch.setattr( + uk_analysis, "calculate_decile_impacts", fake_calculate_decile_impacts + ) + monkeypatch.setattr( + uk_analysis, "compute_intra_decile_impacts", fake_compute_intra_decile_impacts + ) + monkeypatch.setattr(uk_analysis, "ProgramStatistics", fake_program_statistics) + monkeypatch.setattr(uk_analysis, "calculate_uk_poverty_rates", fake_poverty_rates) + monkeypatch.setattr(uk_analysis, "calculate_uk_inequality", fake_inequality) + + result = uk_analysis.economic_impact_analysis( + baseline_simulation=baseline, + reform_simulation=reform, + ) + + assert result.decile_impacts.dataframe["source"].tolist() == ["standard"] + assert result.wealth_decile_impacts.dataframe["source"].tolist() == ["wealth"] + + assert decile_calls[0] == { + "baseline_simulation": baseline, + "reform_simulation": reform, + } + assert decile_calls[1] == { + "baseline_simulation": baseline, + "reform_simulation": reform, + "income_variable": "household_net_income", + "decile_variable": "household_wealth_decile", + "entity": "household", + } + assert intra_calls == [ + { + "baseline_simulation": baseline, + "reform_simulation": reform, + "income_variable": "household_net_income", + "decile_variable": "household_wealth_decile", + "entity": "household", + } + ] + assert result.intra_wealth_decile_impacts.dataframe["decile"].tolist() == ( + list(range(1, 11)) + [0] + )