From 08a00a181d27d07ceef985980b0d1101465fb473 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 7 Jul 2025 14:25:47 +0100 Subject: [PATCH] CU-8699pv8uw: Add multiprocessing method to DeID --- medcat-v2/medcat/components/ner/trf/deid.py | 25 ++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/medcat-v2/medcat/components/ner/trf/deid.py b/medcat-v2/medcat/components/ner/trf/deid.py index 213092f55..d4fd648fd 100644 --- a/medcat-v2/medcat/components/ner/trf/deid.py +++ b/medcat-v2/medcat/components/ner/trf/deid.py @@ -34,7 +34,7 @@ - config - cdb """ -from typing import Union, Any, Optional +from typing import Union, Any, Optional, Iterable import re import logging @@ -92,6 +92,29 @@ def deid_text(self, text: str, redact: bool = False) -> str: return replace_entities_in_text(text, entities, self.cat.cdb.get_name, redact=redact) + def deid_multi_text(self, texts: Iterable[str], redact: bool = False, + n_process: Optional[int] = None) -> list[str]: + if n_process is None: + n_process = 1 + + entities = self.cat.get_entities_multi_texts( + texts, n_process=n_process) + out: list[str] = [] + for raw_text, (_, _ents) in zip(texts, entities): + ents = _ents['entities'] + text: str + if isinstance(raw_text, tuple): + text = raw_text[1] + elif isinstance(raw_text, str): + text = raw_text + else: + raise ValueError("Unknown raw text: " + f"{type(raw_text)}: {raw_text}") + new_text = replace_entities_in_text( + text, ents, get_cui_name=self.cat.cdb.get_name, redact=redact) + out.append(new_text) + return out + @classmethod def load_model_pack(cls, model_pack_path: str, config: Optional[dict] = None) -> 'DeIdModel':