diff --git a/dataframely/collection/collection.py b/dataframely/collection/collection.py index 673b7da..1576f9f 100644 --- a/dataframely/collection/collection.py +++ b/dataframely/collection/collection.py @@ -531,6 +531,25 @@ def filter( Raises: ValueError: If an insufficient set of input data frames is provided, i.e. if any required member of this collection is missing in the input. + + Example: + + .. code-block:: python + + # Define collection + class HospitalInvoiceData(dy.Collection): + invoice: dy.LazyFrame[InvoiceSchema] + ... + + # Filter the data and cast columns to expected types + good, failure = HospitalInvoiceData.filter(df, cast=True) + + # Inspect the reasons for the failed rows for member `invoice` + print(failure.invoice.counts()) + + # Inspect the failed rows + failed_df = failure.invoice.invalid() + print(failed_df) """ cls._validate_input_keys(data) diff --git a/dataframely/schema.py b/dataframely/schema.py index 5a197cc..c457ec7 100644 --- a/dataframely/schema.py +++ b/dataframely/schema.py @@ -637,7 +637,8 @@ def filter( cast: bool = False, eager: bool = True, ) -> FilterResult[Self] | LazyFilterResult[Self]: - """Filter the data frame by the rules of this schema. + """Filter the data frame by the rules of this schema, returning `(valid, + failures)`. This method can be thought of as a "soft alternative" to :meth:`validate`. While :meth:`validate` raises an exception when a row does not adhere to the @@ -670,6 +671,20 @@ def filter( Note: This method preserves the ordering of the input data frame. + + Example: + + .. code-block:: python + + # Filter the data and cast columns to expected types + good, failure = HouseSchema.filter(df, cast=True) + + # Inspect the reasons for the failed rows + print(failure.counts()) + + # Inspect the failed rows + failed_df = failure.invalid() + print(failed_df) """ lf = df.lazy().pipe( match_to_schema, cls, casting=("lenient" if cast else "none")