From 66d49cb793a95f0f1efef38073dc3be23953f724 Mon Sep 17 00:00:00 2001 From: Moritz Potthoff Date: Fri, 10 Oct 2025 20:32:16 +0200 Subject: [PATCH 1/3] validate overrides keys --- dataframely/schema.py | 15 +++++++++++++++ tests/schema/test_sample.py | 24 ++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/dataframely/schema.py b/dataframely/schema.py index b724bfba..7999e18f 100644 --- a/dataframely/schema.py +++ b/dataframely/schema.py @@ -225,6 +225,21 @@ def sample( override_keys = ( set(overrides) if isinstance(overrides, Mapping) else set(overrides[0]) ) + if not isinstance(overrides, Mapping): + # Check that overrides entries are consistent. Not necessary for mapping + # overrides as polars checks the series lists upon data frame construction. + inconsistent_override_keys = [ + index + for index, current in enumerate(overrides) + if set(current) != override_keys + ] + if len(inconsistent_override_keys) > 0: + raise ValueError( + "The `overrides` entries at the following indices " + "do not provide the same keys as the first entry: " + f"{inconsistent_override_keys}." + ) + column_names = set(cls.column_names()) if not override_keys.issubset(column_names): raise ValueError( diff --git a/tests/schema/test_sample.py b/tests/schema/test_sample.py index 7da0e332..7ba2f98c 100644 --- a/tests/schema/test_sample.py +++ b/tests/schema/test_sample.py @@ -206,3 +206,27 @@ def test_sample_raises_superfluous_column_override() -> None: match=r"`_sampling_overrides` for columns that are not in the schema", ): SchemaWithIrrelevantColumnPreProcessing.sample(100) + + +def test_sample_with_inconsistent_overrides_raises() -> None: + with pytest.raises( + ValueError, + match=( + r"The `overrides` entries at the following indices do not provide " + r"the same keys as the first entry: \[1, 2\]." + ), + ): + MySimpleSchema.sample( + overrides=[ + { + "a": 1, + "b": "one", + }, + { + "a": 2, + }, + { + "b": 2, + }, + ] + ) From 664bfa726a6c8f015010f4cc5b08f1ffe120cb01 Mon Sep 17 00:00:00 2001 From: Moritz Potthoff Date: Fri, 10 Oct 2025 20:34:50 +0200 Subject: [PATCH 2/3] docs --- dataframely/schema.py | 4 +++- tests/schema/test_sample.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dataframely/schema.py b/dataframely/schema.py index 7999e18f..16a87ec4 100644 --- a/dataframely/schema.py +++ b/dataframely/schema.py @@ -207,6 +207,8 @@ def sample( Raises: ValueError: If ``num_rows`` is not equal to the length of the values in ``overrides``. + ValueError: If ``overrides`` are specified as a sequence of mappings and + the mappings do not provide the same keys. ValueError: If no valid data frame can be found in the configured maximum number of iterations. @@ -225,7 +227,7 @@ def sample( override_keys = ( set(overrides) if isinstance(overrides, Mapping) else set(overrides[0]) ) - if not isinstance(overrides, Mapping): + if isinstance(overrides, Sequence): # Check that overrides entries are consistent. Not necessary for mapping # overrides as polars checks the series lists upon data frame construction. inconsistent_override_keys = [ diff --git a/tests/schema/test_sample.py b/tests/schema/test_sample.py index 7ba2f98c..56f85c72 100644 --- a/tests/schema/test_sample.py +++ b/tests/schema/test_sample.py @@ -208,7 +208,7 @@ def test_sample_raises_superfluous_column_override() -> None: SchemaWithIrrelevantColumnPreProcessing.sample(100) -def test_sample_with_inconsistent_overrides_raises() -> None: +def test_sample_with_inconsistent_overrides_keys_raises() -> None: with pytest.raises( ValueError, match=( From 3c9702c911c5f035b8cfa19dd99995cc14cf4994 Mon Sep 17 00:00:00 2001 From: Moritz Potthoff Date: Mon, 13 Oct 2025 08:57:50 +0200 Subject: [PATCH 3/3] formatting --- tests/schema/test_sample.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/tests/schema/test_sample.py b/tests/schema/test_sample.py index 56f85c72..dbeae13d 100644 --- a/tests/schema/test_sample.py +++ b/tests/schema/test_sample.py @@ -218,15 +218,8 @@ def test_sample_with_inconsistent_overrides_keys_raises() -> None: ): MySimpleSchema.sample( overrides=[ - { - "a": 1, - "b": "one", - }, - { - "a": 2, - }, - { - "b": 2, - }, + {"a": 1, "b": "one"}, + {"a": 2}, + {"b": 2}, ] )