CogStack · mart-r · Jul 10, 2025 · Jul 4, 2025 · Jul 4, 2025 · Jul 4, 2025
diff --git a/.github/workflows/medcat-service_run-tests.yml b/.github/workflows/medcat-service_run-tests.yml
@@ -29,7 +29,7 @@ jobs:
       - name: Install Python 3
         uses: actions/setup-python@v5
         with:
-          python-version: 3.9
+          python-version: 3.11
           cache: 'pip' # caching pip dependencies
 
       - name: Install dependencies

diff --git a/medcat-service/Dockerfile b/medcat-service/Dockerfile
@@ -6,6 +6,9 @@ ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1
 WORKDIR /cat
 COPY ./requirements.txt /cat
 
+# NOTE: need git for URL based installs
+RUN apt-get update && apt-get install -y git
+
 # Install Python dependencies
 ARG USE_CPU_TORCH=true
 # NOTE: Allow building without GPU so as to lower image size (GPU is disabled by default)

diff --git a/medcat-service/README.md b/medcat-service/README.md
@@ -1,6 +1,6 @@
 # Introduction
 
-This project implements the [MedCAT](https://github.com/CogStack/MedCAT/) NLP application as a service behind a REST API. The general idea is to be able send the text to MedCAT NLP service and receive back the annotations. The REST API is built using [Flask](https://flask.palletsprojects.com/).
+This project implements the [MedCAT](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/) NLP application as a service behind a REST API. The general idea is to be able send the text to MedCAT NLP service and receive back the annotations. The REST API is built using [Flask](https://flask.palletsprojects.com/).
 
 Git Branches:
   - devel: development branch, latest updates and features, might be unstable.
@@ -327,4 +327,4 @@ The main settings that can be used to improve the performance when querying larg
 ## MedCAT library
 MedCAT parameters are defined in selected `envs/env_medcat*`  file. 
 
-For details on available MedCAT parameters please refer to [the official GitHub repository](https://github.com/CogStack/MedCAT/).
+For details on available MedCAT parameters please refer to [the official GitHub repository](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/).
diff --git a/medcat-service/medcat_service/nlp_processor/medcat_processor.py b/medcat-service/medcat_service/nlp_processor/medcat_processor.py
@@ -10,8 +10,9 @@
 from medcat.cat import CAT
 from medcat.cdb import CDB
 from medcat.config import Config
-from medcat.meta_cat import MetaCAT
-from medcat.utils.ner.deid import DeIdModel
+from medcat.config.config_meta_cat import ConfigMetaCAT
+from medcat.components.addons.meta_cat import MetaCATAddon
+from medcat.components.ner.trf.deid import DeIdModel
 from medcat.vocab import Vocab
 
 
@@ -188,7 +189,7 @@ def process_content_bulk(self, content):
         # use generators both to provide input documents and to provide resulting annotations
         # to avoid too many mem-copies
         invalid_doc_ids = []
-        ann_res = []
+        ann_res = {}
 
         start_time_ns = time.time_ns()
 
@@ -197,11 +198,14 @@ def process_content_bulk(self, content):
                 ann_res = self.cat.deid_multi_texts(MedCatProcessor._generate_input_doc(content, invalid_doc_ids),
                                                     redact=self.DEID_REDACT)
             else:
-                ann_res = self.cat.multiprocessing_batch_char_size(
-                    MedCatProcessor._generate_input_doc(content, invalid_doc_ids), nproc=self.bulk_nproc)
-
+                text_input = MedCatProcessor._generate_input_doc(content, invalid_doc_ids)
+                ann_res = {
+                    ann_id: res for ann_id, res in
+                    self.cat.get_entities_multi_texts(
+                        text_input, n_process=self.bulk_nproc)
+                }
         except Exception as e:
-            self.log.error(repr(e))
+            self.log.error("Unable to process data", exc_info=e)
 
         additional_info = {"elapsed_time": str((time.time_ns() - start_time_ns) / 10e8)}
 
@@ -239,11 +243,12 @@ def _populate_model_card_info(self, config: Config):
         Args:
             config (Config): MedCAT configuration object.
         """
-        self.model_card_info["ontologies"] = config.version.ontology \
-            if (isinstance(config.version.ontology, list)) else str(config.version.ontology)
-        self.model_card_info["meta_cat_model_names"] = [i["Category Name"] for i in config.version.meta_cats] \
-            if (isinstance(config.version.meta_cats, list)) else str(config.version.meta_cats)
-        self.model_card_info["model_last_modified_on"] = str(config.version.last_modified)
+        self.model_card_info["ontologies"] = config.meta.ontology \
+            if (isinstance(config.meta.ontology, list)) else str(config.meta.ontology)
+        self.model_card_info["meta_cat_model_names"] = [
+            cnf.general.category_name for cnf in config.components.addons
+            if (isinstance(cnf, ConfigMetaCAT))]
+        self.model_card_info["model_last_modified_on"] = str(config.meta.last_saved)
 
     # helper MedCAT methods
     #
@@ -281,7 +286,7 @@ def _create_cat(self):
                 cat.cdb.filter_by_cui(cuis_to_keep)
 
             if self.app_model.lower() in ["", "unknown", "medmen"]:
-                self.app_model = cat.config.version.id
+                self.app_model = cat.config.meta.hash
 
             self._populate_model_card_info(cat.config)
 
@@ -305,13 +310,13 @@ def _create_cat(self):
         spacy_model = os.getenv("SPACY_MODEL", "")
 
         if spacy_model != "":
-            cdb.config.general["spacy_model"] = spacy_model
+            cdb.config.general.nlp.modelname = spacy_model
         else:
             logging.warning("SPACY_MODEL environment var not set" +
                             ", attempting to load the spacy model found within the CDB : "
-                            + cdb.config.general["spacy_model"])
+                            + cdb.config.general.nlp.modelname)
 
-            if cdb.config.general["spacy_model"] == "":
+            if cdb.config.general.nlp.modelname == "":
                 raise ValueError("No SPACY_MODEL env var declared, the CDB loaded does not have a\
                      spacy_model set in the config variable! \
                  To solve this declare the SPACY_MODEL in the env_medcat file.")
@@ -330,18 +335,21 @@ def _create_cat(self):
         if os.getenv("APP_MODEL_META_PATH_LIST", None) is not None:
             self.log.debug("Loading META annotations ...")
             for model_path in os.getenv("APP_MODEL_META_PATH_LIST").split(":"):
-                m = MetaCAT.load(model_path)
+                m = MetaCATAddon.deserialise_from(model_path)
                 meta_models.append(m)
 
-        if cat:
-            meta_models.extend(cat._meta_cats)
+        # if cat:
+        #     meta_models.extend(cat._meta_cats)
 
         if self.app_model.lower() in [None, "unknown"]:
-            self.app_model = cdb.config.version.id
+            self.app_model = cdb.config.meta.hash
 
-        config.general["log_level"] = os.getenv("LOG_LEVEL", logging.INFO)
+        config.general.log_level = os.getenv("LOG_LEVEL", logging.INFO)
 
-        cat = CAT(cdb=cdb, config=config, vocab=vocab, meta_cats=meta_models)
+        cat = CAT(cdb=cdb, config=config, vocab=vocab)
+        # add MetaCATs
+        for mc in meta_models:
+            cat.add_addon(mc)
 
         self._populate_model_card_info(cat.config)
 

diff --git a/medcat-service/models/examples/examples.md b/medcat-service/models/examples/examples.md
@@ -2,7 +2,7 @@
 
 ## [example-medcat-v1-model-pack][(models/examples/example-medcat-v1-model-pack.zip)
 - This model pack is built by running the MedCAT V1 Tutorial Part 3.1.
-- https://github.com/CogStack/MedCATtutorials/blob/5a07e4d77da404631cc16b47d3f1c6bd028de396/notebooks/introductory/Part_3_1_Building_a_Concept_Database_and_Vocabulary.ipynb
+- https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v1-tutorials/notebooks/introductory/Part_3_1_Building_a_Concept_Database_and_Vocabulary.ipynb
 
 It isn't a trained model, but has the concepts "Kidney Failure" and "Failure of Kidneys" built in
 
diff --git a/medcat-service/requirements.txt b/medcat-service/requirements.txt
@@ -6,7 +6,7 @@ setuptools==78.1.1
 simplejson==3.19.3
 werkzeug==3.1.3
 setuptools-rust==1.11.0
-medcat==1.16.0
+medcat[meta-cat,spacy,deid] @ git+https://github.com/CogStack/cogstack-nlp.git@refs/tags/medcat/v0.13.5#subdirectory=medcat-v2
 # pinned because of issues with de-id models and past models (it will not do any de-id)
 transformers>=4.34.0,<5.0.0
-requests==2.32.4
+requests==2.32.4