Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/613.improvement.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Unify facet filter parsing across CLI commands. `--filter` in `executions list-groups` and `delete-groups` now supports multiple values for the same key with OR semantics (e.g., `--filter source_id=A --filter source_id=B`), consistent with `--dataset-filter` in `datasets list` and `solve`.
25 changes: 15 additions & 10 deletions packages/climate-ref/src/climate_ref/cli/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,24 @@ def format_size(size_bytes: int | float) -> str:
return f"{size:.1f} TB"


def parse_facet_filters(filters: list[str] | None) -> dict[str, str]:
def parse_facet_filters(filters: list[str] | None) -> dict[str, list[str]]:
"""
Parse facet filters from key=value format into a dictionary.

Multiple values for the same key are collected into a list (OR logic).
Different keys are ANDed together when used for filtering.

Parameters
----------
filters
List of filter strings in 'key=value' format
List of filter strings in 'key=value' format.
Multiple entries with the same key are ORed
(e.g., ``["source_id=A", "source_id=B"]`` matches A or B).

Returns
-------
dict[str, str]
Dictionary mapping facet keys to values
dict[str, list[str]]
Dictionary mapping facet keys to lists of allowed values

Raises
------
Expand All @@ -57,12 +62,15 @@ def parse_facet_filters(filters: list[str] | None) -> dict[str, str]:
Examples
--------
>>> parse_facet_filters(["source_id=GFDL-ESM4", "variable_id=tas"])
{'source_id': 'GFDL-ESM4', 'variable_id': 'tas'}
{'source_id': ['GFDL-ESM4'], 'variable_id': ['tas']}

>>> parse_facet_filters(["source_id=A", "source_id=B"])
{'source_id': ['A', 'B']}
"""
if not filters:
return {}

parsed: dict[str, str] = {}
parsed: dict[str, list[str]] = {}
for filter_str in filters:
if "=" not in filter_str:
raise ValueError(
Expand All @@ -80,10 +88,7 @@ def parse_facet_filters(filters: list[str] | None) -> dict[str, str]:
if not value:
raise ValueError(f"Empty value in filter: '{filter_str}'")

if key in parsed:
logger.warning(f"Filter key '{key}' specified multiple times. Using last value: '{value}'")

parsed[key] = value
parsed.setdefault(key, []).append(value)

return parsed

Expand Down
20 changes: 6 additions & 14 deletions packages/climate-ref/src/climate_ref/cli/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import typer
from loguru import logger

from climate_ref.cli._utils import pretty_print_df
from climate_ref.cli._utils import parse_facet_filters, pretty_print_df
from climate_ref.datasets import get_dataset_adapter
from climate_ref.models import Dataset
from climate_ref.solver import apply_dataset_filters
Expand Down Expand Up @@ -60,15 +60,10 @@ def list_( # noqa: PLR0913
data_catalog = adapter.load_catalog(database, include_files=include_files, limit=limit)

if dataset_filter:
parsed_filters: dict[str, list[str]] = {}
for entry in dataset_filter:
if "=" not in entry:
raise typer.BadParameter(
f"Invalid dataset filter {entry!r}. Expected key=value format.",
param_hint="--dataset-filter",
)
key, value = entry.split("=", 1)
parsed_filters.setdefault(key, []).append(value)
try:
parsed_filters = parse_facet_filters(dataset_filter)
except ValueError as e:
raise typer.BadParameter(str(e), param_hint="--dataset-filter")

for facet in parsed_filters:
if facet not in data_catalog.columns:
Expand Down Expand Up @@ -108,10 +103,7 @@ def list_columns(
include_files: bool = typer.Option(False, help="Include files in the output"),
) -> None:
"""
Print the current climate_ref configuration

If a configuration directory is provided,
the configuration will attempt to load from the specified directory.
List the available columns in the data catalog for the given source type.
"""
database = ctx.obj.database

Expand Down
9 changes: 6 additions & 3 deletions packages/climate-ref/src/climate_ref/cli/executions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ class ListGroupsFilterOptions:
provider: list[str] | None = None
"""Filter by provider slug (substring, case-insensitive)"""

facets: dict[str, str] | None = None
"""Filter by facet key-value pairs (exact match)"""
facets: dict[str, list[str]] | None = None
"""Filter by facet key-value pairs (AND across keys, OR within key)"""


@app.command()
Expand Down Expand Up @@ -486,7 +486,10 @@ def emit_no_results_warning(
if filters.provider:
filter_parts.append(f"provider filters: {filters.provider}")
if filters.facets:
facet_strs = [f"{k}={v}" for k, v in filters.facets.items()]
facet_strs = [
f"{k}={','.join(values)}" if len(values) > 1 else f"{k}={values[0]}"
for k, values in filters.facets.items()
]
filter_parts.append(f"facet filters: {facet_strs}")

logger.warning(
Expand Down
17 changes: 6 additions & 11 deletions packages/climate-ref/src/climate_ref/cli/solve.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import typer

from climate_ref.cli._utils import parse_facet_filters

app = typer.Typer()


Expand Down Expand Up @@ -85,17 +87,10 @@ def solve( # noqa: PLR0913
config = ctx.obj.config
db = ctx.obj.database

parsed_dataset_filters: dict[str, list[str]] | None = None
if dataset_filter:
parsed_dataset_filters = {}
for entry in dataset_filter:
if "=" not in entry:
raise typer.BadParameter(
f"Invalid dataset filter {entry!r}. Expected key=value format.",
param_hint="--dataset-filter",
)
key, value = entry.split("=", 1)
parsed_dataset_filters.setdefault(key, []).append(value)
try:
parsed_dataset_filters = parse_facet_filters(dataset_filter) or None
except ValueError as e:
raise typer.BadParameter(str(e), param_hint="--dataset-filter")

filters = SolveFilterOptions(
diagnostic=diagnostic,
Expand Down
80 changes: 47 additions & 33 deletions packages/climate-ref/src/climate_ref/models/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,23 +406,53 @@ def get_execution_group_and_latest(
return query # type: ignore


def _selectors_match_facet(
selectors: dict[str, list[list[str]]],
facet_key: str,
facet_values: list[str],
) -> bool:
"""
Check if an execution group's selectors match a single facet filter.

Parameters
----------
selectors
The execution group's selectors dict (dataset_type -> list of [key, value] pairs)
facet_key
Facet key, optionally prefixed with ``dataset_type.`` to scope to one type
facet_values
Allowed values (OR logic -- any match is sufficient)
"""
if "." in facet_key:
dataset_type, key = facet_key.split(".", 1)
if dataset_type in selectors:
return any([key, fv] in selectors[dataset_type] for fv in facet_values)
return False

# Bare key: search across all dataset types
for ds_type_selectors in selectors.values():
if any([facet_key, fv] in ds_type_selectors for fv in facet_values):
return True
return False


def _filter_executions_by_facets(
results: Sequence[tuple[ExecutionGroup, Execution | None]],
facet_filters: dict[str, str],
facet_filters: dict[str, list[str]],
) -> list[tuple[ExecutionGroup, Execution | None]]:
"""
Filter execution groups and their latest executions based on facet key-value pairs.

This is a relatively expensive operation as it requires iterating over all results.
This should be replaced once we have normalised the selectors into a separate table.


Parameters
----------
results
List of tuples containing ExecutionGroup and its latest Execution (or None)
facet_filters
Dictionary of facet key-value pairs to filter by (AND logic, exact match)
Dictionary mapping facet keys to lists of allowed values.
Different keys are ANDed; multiple values for the same key are ORed.

Returns
-------
Expand All @@ -434,41 +464,24 @@ def _filter_executions_by_facets(
or dataset_type.key=value (searches specific dataset type)
- Key=value filters search across all dataset types
- dataset_type.key=value filters only search within the specified dataset type
- Multiple values within same filter type use OR logic
- All specified facets must match for an execution group to be included (AND logic)
"""
filtered_results = []
for eg, execution in results:
all_filters_match = True
for facet_key, facet_value in facet_filters.items():
filter_match = False
if "." in facet_key:
# Handle dataset_type.key=value format
dataset_type, key = facet_key.split(".", 1)
if dataset_type in eg.selectors:
if [key, facet_value] in eg.selectors[dataset_type]:
filter_match = True
break
else:
# Handle key=value format (search across all dataset types)
for ds_type_selectors in eg.selectors.values():
if [facet_key, facet_value] in ds_type_selectors:
filter_match = True
break

if not filter_match:
all_filters_match = False
break
if all_filters_match:
filtered_results.append((eg, execution))
return filtered_results
- Multiple values for the same key use OR logic
- All specified keys must match for an execution group to be included (AND logic)
"""
return [
(eg, execution)
for eg, execution in results
if all(
_selectors_match_facet(eg.selectors, facet_key, facet_values)
for facet_key, facet_values in facet_filters.items()
)
]


def get_execution_group_and_latest_filtered( # noqa: PLR0913
session: Session,
diagnostic_filters: list[str] | None = None,
provider_filters: list[str] | None = None,
facet_filters: dict[str, str] | None = None,
facet_filters: dict[str, list[str]] | None = None,
dirty: bool | None = None,
successful: bool | None = None,
) -> list[tuple[ExecutionGroup, Execution | None]]:
Expand All @@ -484,7 +497,8 @@ def get_execution_group_and_latest_filtered( # noqa: PLR0913
provider_filters
List of provider slug substrings (OR logic, case-insensitive)
facet_filters
Dictionary of facet key-value pairs (AND logic, exact match)
Dictionary mapping facet keys to lists of allowed values.
Different keys are ANDed; multiple values for the same key are ORed.
dirty
If True, only return dirty execution groups.
If False, only return clean execution groups.
Expand Down
10 changes: 3 additions & 7 deletions packages/climate-ref/tests/unit/cli/test_executions.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,8 @@ def test_filter_empty_results_warning(self, db_with_groups, invoke_cli):
assert "Applied filters: facet filters: ['source_id=NONEXISTENT']" in result.stderr
assert "id" in result.stdout # Ensure empty table headers are still printed

def test_facet_warning_multiple_same_key(self, db_with_groups, invoke_cli):
# This functionality might be useful in future, but not today
def test_facet_multiple_same_key_returns_both(self, db_with_groups, invoke_cli):
# Multiple values for the same key are ORed (both should appear)
result = invoke_cli(
[
"executions",
Expand All @@ -264,12 +264,8 @@ def test_facet_warning_multiple_same_key(self, db_with_groups, invoke_cli):
"source_id=ACCESS-ESM1-5",
]
)
assert (
"Filter key 'source_id' specified multiple times. Using last value: 'ACCESS-ESM1-5'"
in result.stderr
)
assert "ACCESS-ESM1-5" in result.stdout
assert "GFDL-ESM4" not in result.stdout
assert "GFDL-ESM4" in result.stdout

def test_filter_successful(self, db_with_groups, invoke_cli):
result = invoke_cli(["executions", "list-groups", "--successful"])
Expand Down
4 changes: 2 additions & 2 deletions packages/climate-ref/tests/unit/cli/test_solve.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ def test_solve_with_invalid_dataset_filter(self, sample_data_dir, db, invoke_cli
expected_exit_code=2,
)

assert "Invalid dataset filter" in result.stderr
assert "Expected key=value format" in result.stderr
assert "Invalid filter format" in result.stderr
assert "Expected format: 'key=value'" in result.stderr

def test_solve_with_limit(self, sample_data_dir, db, invoke_cli, mocker):
mock_solve = mocker.patch("climate_ref.solver.solve_required_executions")
Expand Down
33 changes: 22 additions & 11 deletions packages/climate-ref/tests/unit/cli/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_format_size_boundary_values(self):

def test_parse_facet_filters_valid_input():
filters = ["source_id=GFDL-ESM4", "variable_id=tas"]
expected = {"source_id": "GFDL-ESM4", "variable_id": "tas"}
expected = {"source_id": ["GFDL-ESM4"], "variable_id": ["tas"]}
assert parse_facet_filters(filters) == expected


Expand All @@ -60,17 +60,15 @@ def test_parse_facet_filters_none_input():

def test_parse_facet_filters_with_whitespace():
filters = [" key1 = value1 ", "key2=value2 "]
expected = {"key1": "value1", "key2": "value2"}
expected = {"key1": ["value1"], "key2": ["value2"]}
assert parse_facet_filters(filters) == expected


def test_parse_facet_filters_duplicate_key(caplog):
def test_parse_facet_filters_multiple_values_same_key():
"""Multiple values for the same key are collected into a list (OR semantics)."""
filters = ["key=value1", "key=value2"]
expected = {"key": "value2"}
with caplog.at_level("WARNING"):
result = parse_facet_filters(filters)
assert result == expected
assert "Filter key 'key' specified multiple times. Using last value: 'value2'" in caplog.text
result = parse_facet_filters(filters)
assert result == {"key": ["value1", "value2"]}


def test_parse_facet_filters_invalid_format_no_equals():
Expand All @@ -90,12 +88,25 @@ def test_parse_facet_filters_empty_value():

def test_parse_facet_filters_value_with_equals():
filters = ["query=some_key=some_value"]
expected = {"query": "some_key=some_value"}
expected = {"query": ["some_key=some_value"]}
assert parse_facet_filters(filters) == expected


def test_parse_facet_filters_mixed_valid_and_invalid(caplog):
def test_parse_facet_filters_mixed_valid_and_invalid():
filters = ["key1=value1", "invalid", "key2=value2"]
with pytest.raises(ValueError, match="Invalid filter format: 'invalid'"):
parse_facet_filters(filters)
assert not caplog.text


def test_parse_facet_filters_mixed_single_and_multi():
"""Different keys can have different numbers of values."""
filters = ["source_id=A", "source_id=B", "variable_id=tas"]
result = parse_facet_filters(filters)
assert result == {"source_id": ["A", "B"], "variable_id": ["tas"]}


def test_parse_facet_filters_dotted_key():
"""Dotted keys (dataset_type.facet) are preserved as-is."""
filters = ["cmip6.source_id=ACCESS-CM2"]
result = parse_facet_filters(filters)
assert result == {"cmip6.source_id": ["ACCESS-CM2"]}
Loading