diff --git a/CHANGELOG.md b/CHANGELOG.md index a45bce56b..46567f7b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ ### Added * Added `get_by_name()` method to MetadataOntology object to access both custom and reserved metadata by name. +### Changed +* `Dataset.create_data_rows()` max limit of DataRows increased to 150,000 + # Version 3.33.1 (2022-12-14) ### Fixed * Fixed where batch creation limit was still limiting # of data rows. SDK should now support creating batches with up to 100k data rows diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index bdfe02e35..bbabd6259 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -23,8 +23,7 @@ logger = logging.getLogger(__name__) -MAX_DATAROW_PER_API_OPERATION = 150000 -MAX_DATAROW_WITH_METADATA = 30000 +MAX_DATAROW_PER_API_OPERATION = 150_000 class Dataset(DbObject, Updateable, Deletable): @@ -426,15 +425,6 @@ def convert_item(item): f"Cannot create more than {MAX_DATAROW_PER_API_OPERATION} DataRows per function call." ) - # TODO: If any datarows contain metadata, we're limiting max # of datarows - # until we address performance issues with datarow create with metadata - if len(items) > MAX_DATAROW_WITH_METADATA: - for row in items: - if 'metadata_fields' in row: - raise MalformedQueryException( - f"Cannot create more than {MAX_DATAROW_WITH_METADATA} DataRows, if any DataRows contain metadata" - ) - with ThreadPoolExecutor(file_upload_thread_count) as executor: futures = [executor.submit(convert_item, item) for item in items] items = [future.result() for future in as_completed(futures)]