Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions dataframely/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,8 @@ def sample(
Raises:
ValueError: If ``num_rows`` is not equal to the length of the values in
``overrides``.
ValueError: If ``overrides`` are specified as a sequence of mappings and
the mappings do not provide the same keys.
ValueError: If no valid data frame can be found in the configured maximum
number of iterations.

Expand All @@ -225,6 +227,21 @@ def sample(
override_keys = (
set(overrides) if isinstance(overrides, Mapping) else set(overrides[0])
)
if isinstance(overrides, Sequence):
# Check that overrides entries are consistent. Not necessary for mapping
# overrides as polars checks the series lists upon data frame construction.
inconsistent_override_keys = [
index
for index, current in enumerate(overrides)
if set(current) != override_keys
]
if len(inconsistent_override_keys) > 0:
raise ValueError(
"The `overrides` entries at the following indices "
"do not provide the same keys as the first entry: "
f"{inconsistent_override_keys}."
)

column_names = set(cls.column_names())
if not override_keys.issubset(column_names):
raise ValueError(
Expand Down
17 changes: 17 additions & 0 deletions tests/schema/test_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,3 +206,20 @@ def test_sample_raises_superfluous_column_override() -> None:
match=r"`_sampling_overrides` for columns that are not in the schema",
):
SchemaWithIrrelevantColumnPreProcessing.sample(100)


def test_sample_with_inconsistent_overrides_keys_raises() -> None:
with pytest.raises(
ValueError,
match=(
r"The `overrides` entries at the following indices do not provide "
r"the same keys as the first entry: \[1, 2\]."
),
):
MySimpleSchema.sample(
overrides=[
{"a": 1, "b": "one"},
{"a": 2},
{"b": 2},
]
)
Loading