diff --git a/.github/workflows/medcat-service_run-tests.yml b/.github/workflows/medcat-service_run-tests.yml index f98802f9d..3be28ee82 100755 --- a/.github/workflows/medcat-service_run-tests.yml +++ b/.github/workflows/medcat-service_run-tests.yml @@ -29,7 +29,7 @@ jobs: - name: Install Python 3 uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: 3.11 cache: 'pip' # caching pip dependencies - name: Install dependencies diff --git a/medcat-service/Dockerfile b/medcat-service/Dockerfile index cbb779b30..ec0703caf 100644 --- a/medcat-service/Dockerfile +++ b/medcat-service/Dockerfile @@ -6,6 +6,9 @@ ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1 WORKDIR /cat COPY ./requirements.txt /cat +# NOTE: need git for URL based installs +RUN apt-get update && apt-get install -y git + # Install Python dependencies ARG USE_CPU_TORCH=true # NOTE: Allow building without GPU so as to lower image size (GPU is disabled by default) diff --git a/medcat-service/README.md b/medcat-service/README.md index 67899a4bd..89694dfc5 100644 --- a/medcat-service/README.md +++ b/medcat-service/README.md @@ -1,6 +1,6 @@ # Introduction -This project implements the [MedCAT](https://github.com/CogStack/MedCAT/) NLP application as a service behind a REST API. The general idea is to be able send the text to MedCAT NLP service and receive back the annotations. The REST API is built using [Flask](https://flask.palletsprojects.com/). +This project implements the [MedCAT](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/) NLP application as a service behind a REST API. The general idea is to be able send the text to MedCAT NLP service and receive back the annotations. The REST API is built using [Flask](https://flask.palletsprojects.com/). Git Branches: - devel: development branch, latest updates and features, might be unstable. @@ -327,4 +327,4 @@ The main settings that can be used to improve the performance when querying larg ## MedCAT library MedCAT parameters are defined in selected `envs/env_medcat*` file. -For details on available MedCAT parameters please refer to [the official GitHub repository](https://github.com/CogStack/MedCAT/). +For details on available MedCAT parameters please refer to [the official GitHub repository](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/). diff --git a/medcat-service/medcat_service/nlp_processor/medcat_processor.py b/medcat-service/medcat_service/nlp_processor/medcat_processor.py index 73dadac42..b75f26b6b 100644 --- a/medcat-service/medcat_service/nlp_processor/medcat_processor.py +++ b/medcat-service/medcat_service/nlp_processor/medcat_processor.py @@ -10,8 +10,9 @@ from medcat.cat import CAT from medcat.cdb import CDB from medcat.config import Config -from medcat.meta_cat import MetaCAT -from medcat.utils.ner.deid import DeIdModel +from medcat.config.config_meta_cat import ConfigMetaCAT +from medcat.components.addons.meta_cat import MetaCATAddon +from medcat.components.ner.trf.deid import DeIdModel from medcat.vocab import Vocab @@ -188,7 +189,7 @@ def process_content_bulk(self, content): # use generators both to provide input documents and to provide resulting annotations # to avoid too many mem-copies invalid_doc_ids = [] - ann_res = [] + ann_res = {} start_time_ns = time.time_ns() @@ -197,11 +198,14 @@ def process_content_bulk(self, content): ann_res = self.cat.deid_multi_texts(MedCatProcessor._generate_input_doc(content, invalid_doc_ids), redact=self.DEID_REDACT) else: - ann_res = self.cat.multiprocessing_batch_char_size( - MedCatProcessor._generate_input_doc(content, invalid_doc_ids), nproc=self.bulk_nproc) - + text_input = MedCatProcessor._generate_input_doc(content, invalid_doc_ids) + ann_res = { + ann_id: res for ann_id, res in + self.cat.get_entities_multi_texts( + text_input, n_process=self.bulk_nproc) + } except Exception as e: - self.log.error(repr(e)) + self.log.error("Unable to process data", exc_info=e) additional_info = {"elapsed_time": str((time.time_ns() - start_time_ns) / 10e8)} @@ -239,11 +243,12 @@ def _populate_model_card_info(self, config: Config): Args: config (Config): MedCAT configuration object. """ - self.model_card_info["ontologies"] = config.version.ontology \ - if (isinstance(config.version.ontology, list)) else str(config.version.ontology) - self.model_card_info["meta_cat_model_names"] = [i["Category Name"] for i in config.version.meta_cats] \ - if (isinstance(config.version.meta_cats, list)) else str(config.version.meta_cats) - self.model_card_info["model_last_modified_on"] = str(config.version.last_modified) + self.model_card_info["ontologies"] = config.meta.ontology \ + if (isinstance(config.meta.ontology, list)) else str(config.meta.ontology) + self.model_card_info["meta_cat_model_names"] = [ + cnf.general.category_name for cnf in config.components.addons + if (isinstance(cnf, ConfigMetaCAT))] + self.model_card_info["model_last_modified_on"] = str(config.meta.last_saved) # helper MedCAT methods # @@ -281,7 +286,7 @@ def _create_cat(self): cat.cdb.filter_by_cui(cuis_to_keep) if self.app_model.lower() in ["", "unknown", "medmen"]: - self.app_model = cat.config.version.id + self.app_model = cat.config.meta.hash self._populate_model_card_info(cat.config) @@ -305,13 +310,13 @@ def _create_cat(self): spacy_model = os.getenv("SPACY_MODEL", "") if spacy_model != "": - cdb.config.general["spacy_model"] = spacy_model + cdb.config.general.nlp.modelname = spacy_model else: logging.warning("SPACY_MODEL environment var not set" + ", attempting to load the spacy model found within the CDB : " - + cdb.config.general["spacy_model"]) + + cdb.config.general.nlp.modelname) - if cdb.config.general["spacy_model"] == "": + if cdb.config.general.nlp.modelname == "": raise ValueError("No SPACY_MODEL env var declared, the CDB loaded does not have a\ spacy_model set in the config variable! \ To solve this declare the SPACY_MODEL in the env_medcat file.") @@ -330,18 +335,21 @@ def _create_cat(self): if os.getenv("APP_MODEL_META_PATH_LIST", None) is not None: self.log.debug("Loading META annotations ...") for model_path in os.getenv("APP_MODEL_META_PATH_LIST").split(":"): - m = MetaCAT.load(model_path) + m = MetaCATAddon.deserialise_from(model_path) meta_models.append(m) - if cat: - meta_models.extend(cat._meta_cats) + # if cat: + # meta_models.extend(cat._meta_cats) if self.app_model.lower() in [None, "unknown"]: - self.app_model = cdb.config.version.id + self.app_model = cdb.config.meta.hash - config.general["log_level"] = os.getenv("LOG_LEVEL", logging.INFO) + config.general.log_level = os.getenv("LOG_LEVEL", logging.INFO) - cat = CAT(cdb=cdb, config=config, vocab=vocab, meta_cats=meta_models) + cat = CAT(cdb=cdb, config=config, vocab=vocab) + # add MetaCATs + for mc in meta_models: + cat.add_addon(mc) self._populate_model_card_info(cat.config) diff --git a/medcat-service/models/examples/examples.md b/medcat-service/models/examples/examples.md index fca65eebe..89445110a 100644 --- a/medcat-service/models/examples/examples.md +++ b/medcat-service/models/examples/examples.md @@ -2,7 +2,7 @@ ## [example-medcat-v1-model-pack][(models/examples/example-medcat-v1-model-pack.zip) - This model pack is built by running the MedCAT V1 Tutorial Part 3.1. -- https://github.com/CogStack/MedCATtutorials/blob/5a07e4d77da404631cc16b47d3f1c6bd028de396/notebooks/introductory/Part_3_1_Building_a_Concept_Database_and_Vocabulary.ipynb +- https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v1-tutorials/notebooks/introductory/Part_3_1_Building_a_Concept_Database_and_Vocabulary.ipynb It isn't a trained model, but has the concepts "Kidney Failure" and "Failure of Kidneys" built in diff --git a/medcat-service/requirements.txt b/medcat-service/requirements.txt index 7ff331a61..6b31d46e7 100644 --- a/medcat-service/requirements.txt +++ b/medcat-service/requirements.txt @@ -6,7 +6,7 @@ setuptools==78.1.1 simplejson==3.19.3 werkzeug==3.1.3 setuptools-rust==1.11.0 -medcat==1.16.0 +medcat[meta-cat,spacy,deid] @ git+https://github.com/CogStack/cogstack-nlp.git@refs/tags/medcat/v0.13.5#subdirectory=medcat-v2 # pinned because of issues with de-id models and past models (it will not do any de-id) transformers>=4.34.0,<5.0.0 -requests==2.32.4 \ No newline at end of file +requests==2.32.4