From 94f5d422aa7c6383af872b90354da49b06b30dc3 Mon Sep 17 00:00:00 2001 From: Richard Sun Date: Tue, 22 Aug 2023 14:36:35 -0700 Subject: [PATCH 1/2] [QQC-2355] Limit number of data rows to check for processing status at once --- labelbox/schema/project.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index addb8c10b..11b33ea66 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -1454,12 +1454,35 @@ def _wait_until_data_rows_are_processed( """ Wait until all the specified data rows are processed""" start_time = datetime.now() + max_data_rows_per_poll = 100_000 + if data_row_ids is not None: + for i in range(0, len(data_row_ids), max_data_rows_per_poll): + chunk = data_row_ids[i:i + max_data_rows_per_poll] + self._poll_data_row_processing_status( + chunk, [], start_time, wait_processing_max_seconds, + sleep_interval) + + if global_keys is not None: + for i in range(0, len(global_keys), max_data_rows_per_poll): + chunk = global_keys[i:i + max_data_rows_per_poll] + self._poll_data_row_processing_status( + [], chunk, start_time, wait_processing_max_seconds, + sleep_interval) + + def _poll_data_row_processing_status( + self, + data_row_ids: List[str], + global_keys: List[str], + start_time: datetime, + wait_processing_max_seconds: int = _wait_processing_max_seconds, + sleep_interval=30): + while True: if (datetime.now() - start_time).total_seconds() >= wait_processing_max_seconds: raise ProcessingWaitTimeout( - "Maximum wait time exceeded while waiting for data rows to be processed. Try creating a batch a bit later" - ) + "Maximum wait time exceeded while waiting for data rows to be processed. " + "Try creating a batch a bit later") all_good = self.__check_data_rows_have_been_processed( data_row_ids, global_keys) From a663ed7853908735e145095503e7a0c9103b0841 Mon Sep 17 00:00:00 2001 From: Richard Sun Date: Tue, 22 Aug 2023 15:56:42 -0700 Subject: [PATCH 2/2] [QQC-2355] Review feedback --- labelbox/schema/project.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 11b33ea66..bd912aa0b 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -1481,8 +1481,8 @@ def _poll_data_row_processing_status( if (datetime.now() - start_time).total_seconds() >= wait_processing_max_seconds: raise ProcessingWaitTimeout( - "Maximum wait time exceeded while waiting for data rows to be processed. " - "Try creating a batch a bit later") + """Maximum wait time exceeded while waiting for data rows to be processed. + Try creating a batch a bit later""") all_good = self.__check_data_rows_have_been_processed( data_row_ids, global_keys)