Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions changelog.d/pre-launch-cleanup.removed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Pre-launch cleanup — remove dead code and drop `plotly` from the core dependency set:

- Delete `policyengine.tax_benefit_models.us` and `policyengine.tax_benefit_models.uk` module shims. Python resolves the package directory first, so the `.py` shims were always shadowed; worse, both attempted to re-export `general_policy_reform_analysis` which is not defined anywhere, making `from policyengine.tax_benefit_models.us import general_policy_reform_analysis` raise `ImportError` at runtime.
- Delete `_create_entity_output_model` plus the `PersonOutput` / `BenunitOutput` / `HouseholdEntityOutput` factory products in `policyengine.tax_benefit_models.uk.analysis` — built via `pydantic.create_model` but never referenced anywhere in the codebase.
- Delete `policyengine.core.DatasetVersion` (only consumer was an `Optional` field on `Dataset` that was never set, and the `policyengine.core` re-export).
- Move `plotly>=5.0.0` from the base install to a new `policyengine[plotting]` extra. Only `policyengine.utils.plotting` uses it, and that module is itself only used by the `examples/` scripts. The package now imports cleanly without `plotly`.
13 changes: 13 additions & 0 deletions changelog.d/v4-drop-filter-fields.removed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
**BREAKING (v4):** Remove the legacy `filter_field` / `filter_value`
fields from `Simulation` and `Region`, the `_auto_construct_strategy`
model validator that rewrote them into a `RowFilterStrategy`, and the
`_filter_dataset_by_household_variable` methods they fed on both
country models. All scoping now flows through `scoping_strategy:
Optional[ScopingStrategy]`. `Region.requires_filter` becomes a derived
property (`True` iff `scoping_strategy is not None`). The sub-national
region factories (`countries/us/regions.py`, `countries/uk/regions.py`)
construct `scoping_strategy=RowFilterStrategy(...)` /
`WeightReplacementStrategy(...)` directly. Callers that previously
passed `filter_field="place_fips", filter_value="44000"` now pass
`scoping_strategy=RowFilterStrategy(variable_name="place_fips",
variable_value="44000")`.
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ dependencies = [
"pydantic>=2.0.0",
"pandas>=2.0.0",
"microdf_python>=1.2.1",
"plotly>=5.0.0",
"requests>=2.31.0",
"psutil>=5.9.0",
"packaging>=23.0",
Expand All @@ -34,6 +33,9 @@ dependencies = [
policyengine = "policyengine.cli:main"

[project.optional-dependencies]
plotting = [
"plotly>=5.0.0",
]
uk = [
"policyengine_core>=3.25.0",
"policyengine-uk==2.88.0",
Expand All @@ -51,6 +53,7 @@ dev = [
"itables",
"build",
"jsonschema>=4.0.0",
"plotly>=5.0.0",
"pytest-asyncio>=0.26.0",
"ruff>=0.9.0",
"policyengine_core>=3.25.0",
Expand Down
1 change: 0 additions & 1 deletion src/policyengine/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from .dataset import Dataset
from .dataset import YearData as YearData
from .dataset import map_to_entity as map_to_entity
from .dataset_version import DatasetVersion as DatasetVersion
from .dynamic import Dynamic as Dynamic
from .output import Output as Output
from .output import OutputCollection as OutputCollection
Expand Down
2 changes: 0 additions & 2 deletions src/policyengine/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from microdf import MicroDataFrame
from pydantic import BaseModel, ConfigDict, Field

from .dataset_version import DatasetVersion
from .tax_benefit_model import TaxBenefitModel


Expand Down Expand Up @@ -85,7 +84,6 @@ class MyDataset(Dataset):
id: str = Field(default_factory=lambda: str(uuid4()))
name: str
description: str
dataset_version: Optional[DatasetVersion] = None
filepath: str
is_output_dataset: bool = False
tax_benefit_model: Optional[TaxBenefitModel] = None
Expand Down
16 changes: 0 additions & 16 deletions src/policyengine/core/dataset_version.py

This file was deleted.

47 changes: 14 additions & 33 deletions src/policyengine/core/region.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
This module provides the Region and RegionRegistry classes for defining
geographic regions that a tax-benefit model supports. Regions can have:
1. A dedicated dataset (e.g., US states, congressional districts)
2. Filter from a parent region's dataset (e.g., US places/cities, UK countries)
2. A scoping strategy that derives the region from a parent dataset
(row filter or weight replacement)
"""

from typing import Literal, Optional, Union
Expand All @@ -22,8 +23,9 @@ class Region(BaseModel):
"""Geographic region for tax-benefit simulations.

Regions can either have:
1. A dedicated dataset (dataset_path is set, requires_filter is False)
2. Filter from a parent region's dataset (requires_filter is True)
1. A dedicated dataset (``dataset_path`` is set).
2. A scoping strategy that derives the region from a parent dataset
(``scoping_strategy`` is set).

The unique identifier is the code field, which uses a prefixed format:
- National: "us", "uk"
Expand Down Expand Up @@ -57,25 +59,16 @@ class Region(BaseModel):
description="GCS path to dedicated dataset (e.g., 'gs://policyengine-us-data/states/CA.h5')",
)

# Scoping strategy (preferred over legacy filter fields)
# Scoping strategy for regions that derive from a parent dataset
scoping_strategy: Optional[ScopingStrategy] = Field(
default=None,
description="Strategy for scoping dataset to this region (row filtering or weight replacement)",
)

# Legacy filtering configuration (kept for backward compatibility)
requires_filter: bool = Field(
default=False,
description="True if this region filters from a parent dataset rather than having its own",
)
filter_field: Optional[str] = Field(
default=None,
description="Dataset field to filter on (e.g., 'place_fips', 'country')",
)
filter_value: Optional[str] = Field(
default=None,
description="Value to match when filtering (defaults to code suffix if not set)",
)
@property
def requires_filter(self) -> bool:
"""Whether this region needs a parent dataset + a scoping strategy."""
return self.scoping_strategy is not None

# Metadata (primarily for US congressional districts)
state_code: Optional[str] = Field(
Expand Down Expand Up @@ -180,24 +173,12 @@ def get_children(self, parent_code: str) -> list[Region]:
return [r for r in self.regions if r.parent_code == parent_code]

def get_dataset_regions(self) -> list[Region]:
"""Get all regions that have dedicated datasets.

Returns:
List of regions with dataset_path set and requires_filter False
"""
return [
r
for r in self.regions
if r.dataset_path is not None and not r.requires_filter
]
"""Get all regions that have a dedicated dataset on disk."""
return [r for r in self.regions if r.dataset_path is not None]

def get_filter_regions(self) -> list[Region]:
"""Get all regions that require filtering from parent datasets.

Returns:
List of regions with requires_filter True
"""
return [r for r in self.regions if r.requires_filter]
"""Get all regions that derive from a parent dataset via a scoping strategy."""
return [r for r in self.regions if r.scoping_strategy is not None]

def __len__(self) -> int:
"""Return the number of regions in the registry."""
Expand Down
38 changes: 9 additions & 29 deletions src/policyengine/core/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
from typing import Optional
from uuid import uuid4

from pydantic import BaseModel, Field, model_validator
from pydantic import BaseModel, Field

from .cache import LRUCache
from .dataset import Dataset
from .dynamic import Dynamic
from .policy import Policy
from .scoping_strategy import RowFilterStrategy, ScopingStrategy
from .scoping_strategy import ScopingStrategy
from .tax_benefit_model_version import TaxBenefitModelVersion

logger = logging.getLogger(__name__)
Expand All @@ -26,42 +26,22 @@ class Simulation(BaseModel):
dynamic: Optional[Dynamic] = None
dataset: Dataset = None

# Scoping strategy (preferred over legacy filter fields)
scoping_strategy: Optional[ScopingStrategy] = Field(
default=None,
description="Strategy for scoping dataset to a sub-national region",
)

# Legacy regional filtering parameters (kept for backward compatibility)
filter_field: Optional[str] = Field(
default=None,
description="Household-level variable to filter dataset by (e.g., 'place_fips', 'country')",
)
filter_value: Optional[str] = Field(
default=None,
description="Value to match when filtering (e.g., '44000', 'ENGLAND')",
extra_variables: dict[str, list[str]] = Field(
default_factory=dict,
description=(
"Additional variables to calculate beyond the model version's "
"default entity_variables, keyed by entity name. Use when a "
"caller needs variables that are not in the bundled default set."
),
)

tax_benefit_model_version: TaxBenefitModelVersion = None

@model_validator(mode="after")
def _auto_construct_strategy(self) -> "Simulation":
"""Auto-construct a RowFilterStrategy from legacy filter fields.

If filter_field and filter_value are set but scoping_strategy is not,
create a RowFilterStrategy for backward compatibility.
"""
if (
self.scoping_strategy is None
and self.filter_field is not None
and self.filter_value is not None
):
self.scoping_strategy = RowFilterStrategy(
variable_name=self.filter_field,
variable_value=self.filter_value,
)
return self

output_dataset: Optional[Dataset] = None

def run(self):
Expand Down
9 changes: 0 additions & 9 deletions src/policyengine/countries/uk/regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,6 @@ def build_uk_region_registry(
label=name,
region_type="country",
parent_code="uk",
requires_filter=True,
filter_field="country",
filter_value=code.upper(),
scoping_strategy=RowFilterStrategy(
variable_name="country",
variable_value=code.upper(),
Expand All @@ -161,9 +158,6 @@ def build_uk_region_registry(
label=const["name"],
region_type="constituency",
parent_code="uk",
requires_filter=True,
filter_field="household_weight",
filter_value=const["code"],
scoping_strategy=WeightReplacementStrategy(
weight_matrix_bucket="policyengine-uk-data-private",
weight_matrix_key="parliamentary_constituency_weights.h5",
Expand All @@ -185,9 +179,6 @@ def build_uk_region_registry(
label=la["name"],
region_type="local_authority",
parent_code="uk",
requires_filter=True,
filter_field="household_weight",
filter_value=la["code"],
scoping_strategy=WeightReplacementStrategy(
weight_matrix_bucket="policyengine-uk-data-private",
weight_matrix_key="local_authority_weights.h5",
Expand Down
3 changes: 0 additions & 3 deletions src/policyengine/countries/us/regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,6 @@ def build_us_region_registry() -> RegionRegistry:
label=place["name"],
region_type="place",
parent_code=f"state/{state_abbrev.lower()}",
requires_filter=True,
filter_field="place_fips",
filter_value=fips,
state_code=state_abbrev,
state_name=place["state_name"],
scoping_strategy=RowFilterStrategy(
Expand Down
40 changes: 0 additions & 40 deletions src/policyengine/tax_benefit_models/uk.py

This file was deleted.

20 changes: 1 addition & 19 deletions src/policyengine/tax_benefit_models/uk/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import pandas as pd
from microdf import MicroDataFrame
from pydantic import BaseModel, Field, create_model
from pydantic import BaseModel, Field

from policyengine.core import OutputCollection, Simulation
from policyengine.core.policy import Policy
Expand All @@ -28,24 +28,6 @@
from .outputs import ProgrammeStatistics


def _create_entity_output_model(entity: str, variables: list[str]) -> type[BaseModel]:
"""Create a dynamic Pydantic model for entity output variables."""
fields = {var: (float, ...) for var in variables}
return create_model(f"{entity.title()}Output", **fields)


# Create output models dynamically from uk_latest.entity_variables
PersonOutput = _create_entity_output_model(
"person", uk_latest.entity_variables["person"]
)
BenunitOutput = _create_entity_output_model(
"benunit", uk_latest.entity_variables["benunit"]
)
HouseholdEntityOutput = _create_entity_output_model(
"household", uk_latest.entity_variables["household"]
)


class UKHouseholdOutput(BaseModel):
"""Output from a UK household calculation with all entity data."""

Expand Down
Loading
Loading