diff --git a/src/structify/resources/polars.py b/src/structify/resources/polars.py index b9d5e7192..ddcfab5a1 100644 --- a/src/structify/resources/polars.py +++ b/src/structify/resources/polars.py @@ -1187,19 +1187,24 @@ def _upload_df( dataset_name: str, table_name: str, ) -> None: - parquet_bytes = io.BytesIO() - df.write_parquet(parquet_bytes) - parquet_bytes.seek(0) - - response = self._client._client.post( - "/entity/upload_parquet", - params={"dataset": dataset_name, "table_name": table_name}, - files={"file": ("data.parquet", parquet_bytes.getvalue(), "application/octet-stream")}, - headers=self._client.auth_headers, - ) - response.raise_for_status() - - pass + BATCH_UPLOAD_SIZE = 10_000 + + if df.is_empty(): + return + + for offset in range(0, df.height, BATCH_UPLOAD_SIZE): + chunk = df.slice(offset, BATCH_UPLOAD_SIZE) + parquet_bytes = io.BytesIO() + chunk.write_parquet(parquet_bytes) + parquet_bytes.seek(0) + + response = self._client._client.post( + "/entity/upload_parquet", + params={"dataset": dataset_name, "table_name": table_name}, + files={"file": ("data.parquet", parquet_bytes.getvalue(), "application/octet-stream")}, + headers=self._client.auth_headers, + ) + response.raise_for_status() class PolarsResourceWithRawResponse: