diff --git a/medcat-v2/medcat/cat.py b/medcat-v2/medcat/cat.py index cd45d489..148cbd15 100644 --- a/medcat-v2/medcat/cat.py +++ b/medcat-v2/medcat/cat.py @@ -265,6 +265,9 @@ def _mp_one_batch_per_process( executor.submit(self._mp_worker_func, batch)) except StopIteration: break + if not futures: + # NOTE: if there wasn't any data, we didn't process anything + return # Main process works on next batch while workers are busy main_batch: Optional[list[tuple[str, str, bool]]] try: @@ -282,6 +285,10 @@ def _mp_one_batch_per_process( # so we're going to wait for them to finish, yield their results, # and subsequently submit the next batch to keep them busy for _ in range(external_processes): + if not futures: + # NOTE: if there's no futures then there can't be + # anything to batch + break # Wait for any future to complete done_future = next(as_completed(futures)) futures.remove(done_future) diff --git a/medcat-v2/tests/test_cat.py b/medcat-v2/tests/test_cat.py index f945903c..5c4de7cd 100644 --- a/medcat-v2/tests/test_cat.py +++ b/medcat-v2/tests/test_cat.py @@ -418,6 +418,9 @@ def test_can_get_multiple_entities(self): "The dog is sitting outside the house." ] ents = list(self.cat.get_entities_multi_texts(texts)) + self.assert_ents(ents, texts) + + def assert_ents(self, ents: list[tuple], texts: list[str]): self.assertEqual(len(ents), len(texts)) # NOTE: text IDs are integers starting from 0 exp_ids = set(str(i) for i in range(len(texts))) @@ -425,6 +428,19 @@ def test_can_get_multiple_entities(self): with self.subTest(f"Entity: {ent_id_str} [{ent}]"): self.assertIn(ent_id_str, exp_ids) + def test_can_multiprocess_empty(self): + texts = [] + ents = list(self.cat.get_entities_multi_texts(texts, n_process=3)) + self.assert_ents(ents, texts) + + def test_can_get_multiprocess(self): + texts = [ + "The fittest most fit of chronic kidney failure", + "The dog is sitting outside the house." + ] + ents = list(self.cat.get_entities_multi_texts(texts, n_process=3)) + self.assert_ents(ents, texts) + class CATWithDocAddonTests(CATIncludingTests): EXAMPLE_TEXT = "Example text to tokenize"