From 89887ca662bc19a7b699a28ea7b0df7a22584554 Mon Sep 17 00:00:00 2001 From: Andreas Albert Date: Mon, 20 Oct 2025 14:10:25 +0200 Subject: [PATCH 1/5] wip --- dataframely/collection.py | 141 ++++++++++++++++++++------------------ dataframely/schema.py | 62 +++++++++-------- docs/conf.py | 9 ++- 3 files changed, 116 insertions(+), 96 deletions(-) diff --git a/dataframely/collection.py b/dataframely/collection.py index 761dbd2..30a83f1 100644 --- a/dataframely/collection.py +++ b/dataframely/collection.py @@ -54,7 +54,7 @@ class Collection(BaseCollection, ABC): A collection is comprised of a set of *members* which are collectively "consistent", meaning they the collection ensures that invariants are held up *across* members. - This is different to :mod:`dataframely` schemas which only ensure invariants + This is different to :class:`~dataframely.Schema` which only ensure invariants *within* individual members. In order to properly ensure that invariants hold up across members, members must @@ -75,10 +75,6 @@ class MyCollection(dy.Collection): Besides, it may define *filters* (c.f. :meth:`~dataframely.filter`) and arbitrary methods. - Note: - The :mod:`dataframely` mypy plugin ensures that the dictionaries passed to class - methods contain exactly the required keys. - Attention: Do NOT use this class in combination with ``from __future__ import annotations`` as it requires the proper schema definitions to ensure that the collection is @@ -91,8 +87,8 @@ class MyCollection(dy.Collection): def create_empty(cls) -> Self: """Create an empty collection without any data. - This method simply calls ``create_empty`` on all member schemas, including - non-optional ones. + This method simply calls :meth:``~dataframely.Schema.create_empty`` on all member schemas, + including non-optional ones. Returns: An instance of this collection. @@ -121,12 +117,12 @@ def sample( function must be "row-oriented" (or "sample-oriented"). Args: - num_rows: The number of rows to sample for each member. If this is set to - ``None``, the number of rows is inferred from the length of the + num_rows: The number of rows to sample for each member. + If this is set to ``None``, the number of rows is inferred from the length of the overrides. - overrides: The overrides to set values in member schemas. The overrides must - be provided as a list of samples. The structure of the samples must be - as follows: + overrides: The overrides to set values in member schemas. + The overrides must be provided as a list of samples. + The structure of the samples must be as follows: .. code:: @@ -150,8 +146,8 @@ def sample( Note that overrides for columns of members that are annotated with ``inline_for_sampling=True`` can be supplied on the top-level instead of in a nested dictionary. - generator: The (seeded) generator to use for sampling data. If ``None``, a - generator with random seed is automatically created. + generator: The (seeded) generator to use for sampling data. + If ``None``, a generator with random seed is automatically created. Returns: A collection where all members (including optional ones) have been sampled @@ -159,16 +155,18 @@ def sample( Attention: In case the collection has members with a common primary key, the - `_preprocess_sample` method must return distinct primary key values for each + :meth:`_preprocess_sample` method must return distinct primary key values for each sample. The default implementation does this on a best-effort basis but may cause primary key violations. Hence, it is recommended to override this method and ensure that all primary key columns are set. Raises: - ValueError: If the :meth:`_preprocess_sample` method does not return all + ValueError: + If the :meth:`_preprocess_sample` method does not return all common primary key columns for all samples. - ValidationError: If the sampled members violate any of the collection - filters. If the collection does not have filters, this error is never + ValidationError: + If the sampled members violate any of the collection filters. + If the collection does not have filters, this error is never raised. To prevent validation errors, overwrite the :meth:`_preprocess_sample` method appropriately. """ @@ -403,8 +401,7 @@ def is_valid(cls, data: Mapping[str, FrameType], /, *, cast: bool = False) -> bo Args: data: The members of the collection which ought to be validated. The dictionary must contain exactly one entry per member with the name of - the member as key. The existence of all keys is checked via the - :mod:`dataframely` mypy plugin. + the member as key. cast: Whether columns with a wrong data type in the member data frame are cast to their schemas' defined data types if possible. @@ -412,8 +409,8 @@ def is_valid(cls, data: Mapping[str, FrameType], /, *, cast: bool = False) -> bo Whether the provided members satisfy the invariants of the collection. Raises: - ValueError: If an insufficient set of input data frames is provided, i.e. if - any required member of this collection is missing in the input. + ValueError: If an insufficient set of input data frames is provided, + i.e. if any required member of this collection is missing in the input. """ try: cls.validate(data, cast=cast) @@ -430,8 +427,8 @@ def filter( """Filter the members data frame by their schemas and the collection's filters. Args: - data: The members of the collection which ought to be filtered. The - dictionary must contain exactly one entry per member with the name of + data: The members of the collection which ought to be filtered. + The dictionary must contain exactly one entry per member with the name of the member as key, except for optional members which may be missing. All data frames passed here will be eagerly collected within the method, regardless of whether they are a :class:`~polars.DataFrame` or @@ -447,16 +444,17 @@ def filter( filtered out by any of the collection's filters. While collection members are always instances of :class:`~polars.LazyFrame`, the members of the returned collection are essentially eager as they are constructed by - calling ``.lazy()`` on eager data frames. Just like in polars' native - :meth:`~polars.DataFrame.filter`, the order of rows is maintained in all - returned data frames. - - A mapping from member name to a :class:`FailureInfo` object which provides - details on why individual rows had been removed. Optional members are only - included in this dictionary if they had been provided in the input. + calling :meth:`polars.DataFrame.lazy()` on eager data frames. + Just like in polars' native :meth:`~polars.DataFrame.filter`, + the order of rows is maintained in all returned data frames. + - A mapping from member name to a :class:`~dataframely.FailureInfo` object + which provides details on why individual rows had been removed. + Optional members are only included in this dictionary if they were + provided in the input. Raises: - ValueError: If an insufficient set of input data frames is provided, i.e. if - any required member of this collection is missing in the input. + ValueError: If an insufficient set of input data frames is provided, + i.e. if any required member of this collection is missing in the input. ValidationError: If the columns of any of the input data frames are invalid. This happens only if a data frame misses a column defined in its schema or a column has an invalid dtype while ``cast`` is set to ``False``. @@ -587,15 +585,17 @@ def join( The collection, with members potentially reduced in length. Raises: - ValueError: If the collection contains any member that is annotated with + ValueError: + If the collection contains any member that is annotated with `ignored_in_filters=True`. Attention: This method does not validate the resulting collection. Ensure to only use this if the resulting collection still satisfies the filters of the collection. The joins are not evaluated eagerly. Therefore, a downstream - call to :meth:`collect` might fail, especially if `primary_keys` does not - contain all columns for all common primary keys. + call to :meth:`polars.LazyFrame.collect` + may fail, especially if `primary_keys` does not contain all columns + for all common primary keys. """ if any(member.ignored_in_filters for member in self.members().values()): raise ValueError( @@ -620,7 +620,7 @@ def join( def cast(cls, data: Mapping[str, FrameType], /) -> Self: """Initialize a collection by casting all members into their correct schemas. - This method calls :meth:`~Schema.cast` on every member, thus, removing + This method calls :meth:`~dataframely.Schema.cast` on every member, thus, removing superfluous columns and casting to the correct dtypes for all input data frames. You should typically use :meth:`validate` or :meth:`filter` to obtain instances @@ -629,19 +629,21 @@ def cast(cls, data: Mapping[str, FrameType], /) -> Self: it is known that the provided data adheres to the collection's invariants. Args: - data: The data for all members. The dictionary must contain exactly one - entry per member with the name of the member as key. + data: The data for all members. + The dictionary must contain exactly one entry per member + with the name of the member as key. Returns: The initialized collection. Raises: - ValueError: If an insufficient set of input data frames is provided, i.e. if - any required member of this collection is missing in the input. + ValueError: If an insufficient set of input data frames is provided + i.e. if any required member of this collection is missing in the input. Attention: For lazy frames, casting is not performed eagerly. This prevents collecting - the lazy frames' schemas but also means that a call to :meth:`collect` + the lazy frames' schemas but also means that a call to + :meth:`polars.LazyFrame.collect` further down the line might fail because of the cast and/or missing columns. """ cls._validate_input_keys(data) @@ -677,10 +679,10 @@ def collect_all(self) -> Self: @classmethod def serialize(cls) -> str: - """Serialize this collection to a JSON string. + """Serialize the metadata for this collection to a JSON string. This method does NOT serialize any data frames, but only the _structure_ of the - collection, similar to :meth:`Schema.serialize`. + collection, similar to :meth:`dataframely.Schema.serialize`. Returns: The serialized collection. @@ -702,9 +704,10 @@ def serialize(cls) -> str: without it being considered a breaking change. Raises: - TypeError: If a column of any member contains metadata that is not - JSON-serializable. - ValueError: If a column of any member is not a "native" dataframely column + TypeError: + If a column of any member contains metadata that is not JSON-serializable. + ValueError: + If a column of any member is not a "native" dataframely column type but a custom subclass. """ result = { @@ -736,15 +739,14 @@ def write_parquet(self, directory: str | Path, **kwargs: Any) -> None: members which are not provided in the current collection. Args: - directory: The directory where the Parquet files should be written to. If - the directory does not exist, it is created automatically, including all - of its parents. - kwargs: Additional keyword arguments passed directly to - :meth:`polars.write_parquet` of all members. ``metadata`` may only be - provided if it is a dictionary. + directory: The directory the Parquet files should be written to. + If the directory does not exist, it is created automatically, + including all of its parents. + kwargs: Additional keyword arguments passed to :meth:`polars.DataFrame.write_parquet`. + ``metadata`` may only be provided if it is a dictionary. Attention: - This method suffers from the same limitations as :meth:`Schema.serialize`. + This method suffers from the same limitations as :meth:`~dataframely.Schema.serialize`. """ self._write(ParquetStorageBackend(), directory=directory, **kwargs) @@ -759,12 +761,11 @@ def sink_parquet(self, directory: str | Path, **kwargs: Any) -> None: directory: The directory where the Parquet files should be written to. If the directory does not exist, it is created automatically, including all of its parents. - kwargs: Additional keyword arguments passed directly to - :meth:`polars.sink_parquet` of all members. ``metadata`` may only be - provided if it is a dictionary. + kwargs: Additional keyword arguments passed to :meth:`polars.LazyFrame.sink_parquet`. + ``metadata`` may only be provided if it is a dictionary. Attention: - This method suffers from the same limitations as :meth:`Schema.serialize`. + This method suffers from the same limitations as :meth:`~dataframely.Schema.serialize`. """ self._sink(ParquetStorageBackend(), directory=directory, **kwargs) @@ -802,17 +803,19 @@ def read_parquet( carefully*. kwargs: Additional keyword arguments passed directly to - :meth:`polars.read_parquet`. + :func:`polars.read_parquet`. Returns: The initialized collection. Raises: - ValidationRequiredError: If no collection schema can be read from the + ValidationRequiredError: + If no collection schema can be read from the directory and ``validation`` is set to ``"forbid"``. - ValueError: If the provided directory does not contain parquet files for + ValueError: + If the provided directory does not contain parquet files for all required members. - ValidationError: If the collection cannot be validate. + ValidationError: If the collection cannot be validated. Note: This method is backward compatible with older versions of dataframely @@ -865,7 +868,7 @@ def scan_parquet( carefully*. kwargs: Additional keyword arguments passed directly to - :meth:`polars.scan_parquet` for all members. + :func:`polars.scan_parquet` for all members. Returns: The initialized collection. @@ -910,7 +913,7 @@ def write_delta( target: The location or DeltaTable where the data should be written. If the location does not exist, it is created automatically, including all of its parents. - kwargs: Additional keyword arguments passed directly to :meth:`polars.write_delta`. + kwargs: Additional keyword arguments passed to :meth:`polars.DataFrame.write_delta`. Attention: Schema metadata is stored as custom commit metadata. Only the schema @@ -922,7 +925,7 @@ def write_delta( without re-validating. Only use appends if you are certain that they do not break your schema. - This method suffers from the same limitations as :meth:`Schema.serialize`. + This method suffers from the same limitations as :meth:`~dataframely.Schema.serialize`. """ self._write( backend=DeltaStorageBackend(), @@ -962,14 +965,16 @@ def scan_delta( data, entrusting the user that the schema is valid. *Use this option carefully*. - kwargs: Additional keyword arguments passed directly to :meth:`polars.scan_delta`. + kwargs: Additional keyword arguments passed to :func:`polars.scan_delta`. Returns: The initialized collection. Raises: - ValidationRequiredError: If no collection schema can be read from the source and ``validation`` is set to ``"forbid"``. - ValueError: If the provided source does not contain Delta tables for all required members. + ValidationRequiredError: + If no collection schema can be read from the source and ``validation`` is set to ``"forbid"``. + ValueError: + If the provided source does not contain Delta tables for all required members. Note: Due to current limitations in dataframely, this method may read the Delta table into memory if ``validation`` is ``"warn"`` or ``"allow"`` and validation is required. @@ -1025,7 +1030,7 @@ def read_delta( data, entrusting the user that the schema is valid. *Use this option carefully*. - kwargs: Additional keyword arguments passed directly to :meth:`polars.read_delta`. + kwargs: Additional keyword arguments passed directly to :func:`polars.read_delta`. Returns: The initialized collection. diff --git a/dataframely/schema.py b/dataframely/schema.py index 6d78132..39ff042 100644 --- a/dataframely/schema.py +++ b/dataframely/schema.py @@ -89,8 +89,9 @@ def create_empty(cls, *, lazy: bool = False) -> DataFrame[Self] | LazyFrame[Self """Create an empty data or lazy frame from this schema. Args: - lazy: Whether to create a lazy data frame. If ``True``, returns a lazy frame - with this Schema. Otherwise, returns an eager frame. + lazy: Whether to create a lazy data frame. + If ``True``, returns a lazy frame with this :class:`Schema`. + Otherwise, returns an eager frame. Returns: An instance of :class:`polars.DataFrame` or :class:`polars.LazyFrame` with @@ -141,11 +142,12 @@ def create_empty_if_none( frame or return the input as lazy or eager frame. Args: - df: The data frame to check for ``None``. If it is not ``None``, it is - returned as lazy or eager frame. Otherwise, a schema-compliant data - or lazy frame with no rows is returned. - lazy: Whether to return a lazy data frame. If ``True``, returns a lazy frame - with this Schema. Otherwise, returns an eager frame. + df: The data frame to check for ``None``. + If it is not ``None``, it is returned as lazy or eager frame. + Otherwise, a schema-compliant data or lazy frame with no rows is returned. + lazy: Whether to return a lazy data frame. + If ``True``, returns a lazy frame with this :class:`Schema`. + Otherwise, returns an eager frame. Returns: The given data frame ``df`` as lazy or eager frame, if it is not ``None``. @@ -463,6 +465,7 @@ def is_valid( Args: df: The data frame to check for validity. + allow_extra_columns: Whether to allow the data frame to contain columns that are not defined in the schema. cast: Whether columns with a wrong data type in the input data frame are @@ -516,10 +519,11 @@ def filter( succeeds. Args: - df: The data frame to filter for valid rows. The data frame is collected - within this method, regardless of whether a :class:`~polars.DataFrame` - or :class:`~polars.LazyFrame` is passed. - cast: Whether columns with a wrong data type in the input data frame are + df: The data frame to filter for valid rows. + The data frame is collected within this method, regardless of whether + a :class:`~polars.DataFrame` or :class:`~polars.LazyFrame` is passed. + cast: + Whether columns with a wrong data type in the input data frame are cast to the schema's defined data type if possible. Rows for which the cast fails for any column are filtered out. @@ -629,11 +633,11 @@ def cast( Note: If you only require a generic data frame for the type checker, consider - using :meth:`typing.cast` instead of this method. + using :func:`python:typing.cast` instead of this method. Attention: For lazy frames, casting is not performed eagerly. This prevents collecting - the lazy frame's schema but also means that a call to :meth:`collect` + the lazy frame's schema but also means that a call to :meth:`polars.LazyFrame.collect` further down the line might fail because of the cast and/or missing columns. """ lf = df.lazy().select( @@ -766,12 +770,12 @@ def read_parquet( source: Path, directory, or file-like object from which to read the data. validation: The strategy for running validation when reading the data: - - ``"allow"`: The method tries to read the parquet file's metadata. If + - ``"allow"``: The method tries to read the parquet file's metadata. If the stored schema matches this schema, the data frame is read without validation. If the stored schema mismatches this schema or no schema information can be found in the metadata, this method automatically runs :meth:`validate` with ``cast=True``. - - ``"warn"`: The method behaves similarly to ``"allow"``. However, + - ``"warn"``: The method behaves similarly to ``"allow"``. However, it prints a warning if validation is necessary. - ``"forbid"``: The method never runs validation automatically and only returns if the schema stored in the parquet file's metadata matches @@ -788,7 +792,8 @@ def read_parquet( The data frame with this schema. Raises: - ValidationRequiredError: If no schema information can be read from the + ValidationRequiredError: + If no schema information can be read from the source and ``validation`` is set to ``"forbid"``. Attention: @@ -821,12 +826,12 @@ def scan_parquet( source: Path, directory, or file-like object from which to read the data. validation: The strategy for running validation when reading the data: - - ``"allow"`: The method tries to read the parquet file's metadata. If + - ``"allow"``: The method tries to read the parquet file's metadata. If the stored schema matches this schema, the data frame is read without validation. If the stored schema mismatches this schema or no schema information can be found in the metadata, this method automatically runs :meth:`validate` with ``cast=True``. - - ``"warn"`: The method behaves similarly to ``"allow"``. However, + - ``"warn"``: The method behaves similarly to ``"allow"``. However, it prints a warning if validation is necessary. - ``"forbid"``: The method never runs validation automatically and only returns if the schema stored in the parquet file's metadata matches @@ -843,7 +848,8 @@ def scan_parquet( The data frame with this schema. Raises: - ValidationRequiredError: If no schema information can be read from the + ValidationRequiredError: + If no schema information can be read from the source and ``validation`` is set to ``"forbid"``. Note: @@ -952,12 +958,12 @@ def scan_delta( source: Path or DeltaTable object from which to read the data. validation: The strategy for running validation when reading the data: - - ``"allow"`: The method tries to read the parquet file's metadata. If + - ``"allow"``: The method tries to read the parquet file's metadata. If the stored schema matches this schema, the data frame is read without validation. If the stored schema mismatches this schema or no schema information can be found in the metadata, this method automatically runs :meth:`validate` with ``cast=True``. - - ``"warn"`: The method behaves similarly to ``"allow"``. However, + - ``"warn"``: The method behaves similarly to ``"allow"``. However, it prints a warning if validation is necessary. - ``"forbid"``: The method never runs validation automatically and only returns if the schema stored in the parquet file's metadata matches @@ -973,8 +979,9 @@ def scan_delta( The lazy data frame with this schema. Raises: - ValidationRequiredError: If no schema information can be read - from the source and ``validation`` is set to ``"forbid"``. + ValidationRequiredError: + If no schema information can be read + from the source and ``validation`` is set to ``"forbid"``. Attention: Schema metadata is stored as custom commit metadata. Only the schema @@ -1013,12 +1020,12 @@ def read_delta( source: Path or DeltaTable object from which to read the data. validation: The strategy for running validation when reading the data: - - ``"allow"`: The method tries to read the parquet file's metadata. If + - ``"allow"``: The method tries to read the parquet file's metadata. If the stored schema matches this schema, the data frame is read without validation. If the stored schema mismatches this schema or no schema information can be found in the metadata, this method automatically runs :meth:`validate` with ``cast=True``. - - ``"warn"`: The method behaves similarly to ``"allow"``. However, + - ``"warn"``: The method behaves similarly to ``"allow"``. However, it prints a warning if validation is necessary. - ``"forbid"``: The method never runs validation automatically and only returns if the schema stored in the parquet file's metadata matches @@ -1034,8 +1041,9 @@ def read_delta( The data frame with this schema. Raises: - ValidationRequiredError: If no schema information can be read - from the source and ``validation`` is set to ``"forbid"``. + ValidationRequiredError: + If no schema information can be read from the source + and ``validation`` is set to ``"forbid"``. Attention: Schema metadata is stored as custom commit metadata. Only the schema diff --git a/docs/conf.py b/docs/conf.py index ad2e9bf..5a2cc45 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -41,12 +41,19 @@ "numpydoc", "sphinx_copybutton", "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", "sphinx.ext.linkcode", "sphinxcontrib.apidoc", "myst_parser", + "sphinx.ext.napoleon", ] - myst_parser_config = {"myst_enable_extensions": ["rst_eval_roles"]} +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "polars": ("https://docs.pola.rs/py-polars/html/", None), + "sqlalchemy": ("https://docs.sqlalchemy.org/en/20/", None), +} + source_suffix = { ".rst": "restructuredtext", ".txt": "markdown", From f30edbac7d2e9dae22b0f2e1e369ccae1017f028 Mon Sep 17 00:00:00 2001 From: Andreas Albert Date: Mon, 20 Oct 2025 14:11:11 +0200 Subject: [PATCH 2/5] fix --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index cd142b8..a41d7d5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,6 @@ # Dataframely -Dataframely is a Python package to validate the schema and content of [polars](https://pola.rs/)\_ data frames. +Dataframely is a Python package to validate the schema and content of [polars](https://pola.rs/) data frames. Its purpose is to make data pipelines more robust by ensuring that data meet expectations and more readable by adding schema information to data frame type hints. From fac1fe21de968628ed4083204e79bb2dc9081e17 Mon Sep 17 00:00:00 2001 From: Andreas Albert Date: Mon, 20 Oct 2025 14:14:22 +0200 Subject: [PATCH 3/5] fix --- docs/sites/faq.md | 5 +++-- docs/sites/features/primary-keys.md | 21 +++++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/docs/sites/faq.md b/docs/sites/faq.md index 66a9365..ed58cf7 100644 --- a/docs/sites/faq.md +++ b/docs/sites/faq.md @@ -3,9 +3,10 @@ Whenever you find out something that you were surprised by or needed some non-trivial thinking, please add it here. -## How do I define additional unique keys in a `dy.Schema`? +## How do I define additional unique keys in a :class:`~dataframely.Schema`? -By default, `dataframely` only supports defining a single non-nullable (composite) primary key in `dy.Schema`. +By default, `dataframely` only supports defining a single non-nullable (composite) primary key in :class: +`~dataframely.Schema`. However, in some scenarios it may be useful to define additional unique keys (which support nullable fields and/or which are additionally unique). diff --git a/docs/sites/features/primary-keys.md b/docs/sites/features/primary-keys.md index b0daa25..c80d3e4 100644 --- a/docs/sites/features/primary-keys.md +++ b/docs/sites/features/primary-keys.md @@ -1,12 +1,12 @@ # Primary keys -## Defining primary keys in `dy.Schema` +## Defining primary keys in {class}`~dataframely.Schema` When working with tabular data, it is often useful to define a [primary key](https://en.wikipedia.org/wiki/Primary_key). A primary key is a set of one or multiple columns, the combined values of which form a unique identifier for every record in a table. -Dataframely supports marking columns as part of the primary key when defining a `dy.Schema` by setting +Dataframely supports marking columns as part of the primary key when defining a {class}`~dataframely.Schema` by setting `primary_key=True` on the respective column(s). ```{note} @@ -42,10 +42,15 @@ class LineItemSchema(dy.Schema): Validation will now ensure that all pairs of (`invoice_id`, `item_id`) are unique. -## Primary keys in `dy.Collection` - -The central idea behind `dy.Collection` is to unify multiple tables relating to the same set of underlying entities. -This is useful because it allows us to write `dy.filter`s that use information from multiple tables to identify whether -the underlying entity is valid or not. If any `dy.filter`s are defined, dataframely requires the tables in a -`dy.Collection` to have an overlapping primary key (i.e., there must be at least one column that is a primary key in all +## Primary keys in {class}`~dataframely.Collection` + +The central idea behind {class}`~dataframely.Collection` is to unify multiple tables relating to the same set of +underlying entities. +This is useful because it allows us to write {func}`~dataframely.filter`s that use information from multiple tables to +identify +whether +the underlying entity is valid or not. If any {func}`~dataframely.filter`s are defined, dataframely requires the tables +in a +{class}`~dataframely.Collection` to have an overlapping primary key (i.e., there must be at least one column that is a +primary key in all tables). From 772bf6c0e667b571667f51d388f02d22c503c958 Mon Sep 17 00:00:00 2001 From: Andreas Albert Date: Mon, 20 Oct 2025 16:04:46 +0200 Subject: [PATCH 4/5] fix --- docs/sites/faq.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sites/faq.md b/docs/sites/faq.md index ed58cf7..2a02d06 100644 --- a/docs/sites/faq.md +++ b/docs/sites/faq.md @@ -3,7 +3,7 @@ Whenever you find out something that you were surprised by or needed some non-trivial thinking, please add it here. -## How do I define additional unique keys in a :class:`~dataframely.Schema`? +## How do I define additional unique keys in a {class}`~dataframely.Schema`? By default, `dataframely` only supports defining a single non-nullable (composite) primary key in :class: `~dataframely.Schema`. From c82f0a3d91940f71a0c17f77f18dc00b9227be0f Mon Sep 17 00:00:00 2001 From: Andreas Albert Date: Mon, 20 Oct 2025 16:05:53 +0200 Subject: [PATCH 5/5] fix --- dataframely/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataframely/schema.py b/dataframely/schema.py index 39ff042..4b9acd4 100644 --- a/dataframely/schema.py +++ b/dataframely/schema.py @@ -633,7 +633,7 @@ def cast( Note: If you only require a generic data frame for the type checker, consider - using :func:`python:typing.cast` instead of this method. + using :func:`typing.cast` instead of this method. Attention: For lazy frames, casting is not performed eagerly. This prevents collecting