CogStack · alhendrickson · Aug 22, 2025 · Aug 20, 2025 · Aug 20, 2025 · Aug 20, 2025
diff --git a/medcat-service/medcat_service/config.py b/medcat-service/medcat_service/config.py
@@ -3,6 +3,15 @@
 
 
 class Settings(BaseSettings):
+    class Config:
+        frozen = True
+
     app_root_path: str = Field(
         default="/", description="The Root Path for the FastAPI App", examples=["/medcat-service"]
     )
+
+    deid_mode: bool = Field(default=False, description="Enable DEID mode")
+    deid_redact: bool = Field(
+        default=True,
+        description="Enable DEID redaction. Returns text like [***] instead of [ANNOTATION]",
+    )
diff --git a/medcat-service/medcat_service/dependencies.py b/medcat-service/medcat_service/dependencies.py
@@ -1,3 +1,4 @@
+import logging
 from functools import lru_cache
 from typing import Annotated
 
@@ -6,15 +7,20 @@
 from medcat_service.config import Settings
 from medcat_service.nlp_processor.medcat_processor import MedCatProcessor
 
+log = logging.getLogger(__name__)
+
 
 @lru_cache
-def get_medcat_processor() -> MedCatProcessor:
-    return MedCatProcessor()
+def get_settings() -> Settings:
+    settings = Settings()
+    log.debug("Using settings: %s", settings)
+    return settings
 
 
 @lru_cache
-def get_settings() -> Settings:
-    return Settings()
+def get_medcat_processor(settings: Annotated[Settings, Depends(get_settings)]) -> MedCatProcessor:
+    log.debug("Creating new Medcat Processsor using settings: %s", settings)
+    return MedCatProcessor(settings)
 
 
 MedCatProcessorDep = Annotated[MedCatProcessor, Depends(get_medcat_processor)]
diff --git a/medcat-service/medcat_service/nlp_processor/medcat_processor.py b/medcat-service/medcat_service/nlp_processor/medcat_processor.py
@@ -15,16 +15,17 @@
 from medcat.config.config_meta_cat import ConfigMetaCAT
 from medcat.vocab import Vocab
 
+from medcat_service.config import Settings
 from medcat_service.types import HealthCheckResponse, ModelCardInfo, ProcessErrorsResult, ProcessResult, ServiceInfo
 
 
-class MedCatProcessor():
+class MedCatProcessor:
     """"
     MedCAT Processor class is wrapper over MedCAT that implements annotations extractions functionality
     (both single and bulk processing) that can be easily exposed for an API.
     """
 
-    def __init__(self):
+    def __init__(self, settings: Settings):
         app_log_level = os.getenv("APP_LOG_LEVEL", logging.INFO)
         medcat_log_level = os.getenv("LOG_LEVEL", logging.INFO)
 
@@ -46,8 +47,8 @@ def __init__(self):
 
         self.bulk_nproc = int(os.getenv("APP_BULK_NPROC", 8))
         self.torch_threads = int(os.getenv("APP_TORCH_THREADS", -1))
-        self.DEID_MODE = eval(os.getenv("DEID_MODE", "False"))
-        self.DEID_REDACT = eval(os.getenv("DEID_REDACT", "True"))
+        self.DEID_MODE = settings.deid_mode
+        self.DEID_REDACT = settings.deid_redact
         self.model_card_info = ModelCardInfo(
             ontologies=None, meta_cat_model_names=[], model_last_modified_on=None)
 
@@ -209,13 +210,13 @@ def process_content_bulk(self, content):
         start_time_ns = time.time_ns()
 
         try:
+            text_input = MedCatProcessor._generate_input_doc(content, invalid_doc_ids)
             if self.DEID_MODE:
-                # TODO 2025-07-21: deid_multi_texts doesnt exist in medcat 2?
-                ann_res = self.cat.deid_multi_texts(MedCatProcessor._generate_input_doc(content, invalid_doc_ids),
-                                                    redact=self.DEID_REDACT)
+                text_to_deid_from_tuple = (x[1] for x in text_input)
+
+                ann_res = self.cat.deid_multi_text(list(text_to_deid_from_tuple),
+                                                   redact=self.DEID_REDACT, n_process=self.bulk_nproc)
             else:
-                text_input = MedCatProcessor._generate_input_doc(
-                    content, invalid_doc_ids)
                 ann_res = {
                     ann_id: res for ann_id, res in
                     self.cat.get_entities_multi_texts(
@@ -426,9 +427,11 @@ def _generate_result(self, in_documents, annotations, elapsed_time):
                     footer=in_ct.get("footer"),
                 )
             elif self.DEID_MODE:
-
                 out_res = ProcessResult(
-                    text=str(in_ct["text"]),
+                    # TODO: DEID mode is passing the resulting text in the annotations field here but shouldnt.
+                    text=str(annotations[i]),
+                    # TODO: DEID bulk mode should also be able to return the list of annotations found,
+                    #  to match the features of the singular api. CU-869a6wc6z
                     annotations=[],
                     success=True,
                     timestamp=self._get_timestamp(),

diff --git a/medcat-service/medcat_service/test/common.py b/medcat-service/medcat_service/test/common.py
@@ -85,3 +85,6 @@ def setup_medcat_processor():
         os.environ["APP_BULK_NPROC"] = "8"
 
     os.environ["APP_TRAINING_MODE"] = "False"
+
+    os.environ["DEID_MODE"] = "False"
+    os.environ["DEID_REDACT"] = "False"
diff --git a/medcat-service/medcat_service/test/test_deid.py b/medcat-service/medcat_service/test/test_deid.py
@@ -1,43 +1,39 @@
+import os
 import unittest
 
 from fastapi.testclient import TestClient
 
 import medcat_service.test.common as common
+from medcat_service.config import Settings
+from medcat_service.dependencies import get_settings
+from medcat_service.main import app
 
 
-class TestMedcatServiceDeId(unittest.TestCase):
-    """
-    Implementation of test cases for MedCAT service
-    """
+def get_settings_override():
+    return Settings(deid_mode=True, deid_redact=True)
+
 
-    # Available endpoints
-    #
+class TestMedcatServiceDeId(unittest.TestCase):
     ENDPOINT_PROCESS_SINGLE = "/api/process"
     ENDPOINT_PROCESS_BULK = "/api/process_bulk"
     client: TestClient
 
-    # Static initialization methods
-    #
     @classmethod
     def setUpClass(cls):
-        pass
-        # Enable when test enabled. Complexity around env vars being shared accross tests,
-        # Should instead move to use pydantic settings for easy test overrides.
+        common.setup_medcat_processor()
 
-        # common.setup_medcat_processor()
-        # os.environ["DEID_MODE"] = "True"
-        # os.environ["DEID_REDACT"] = "True"
+        if "APP_MEDCAT_MODEL_PACK" not in os.environ:
+            os.environ["APP_MEDCAT_MODEL_PACK"] = "./models/examples/example-deid-model-pack.zip"
 
-        # if "APP_MEDCAT_MODEL_PACK" not in os.environ:
-        #     os.environ["APP_MEDCAT_MODEL_PACK"] = "./models/example-deid-model-pack.zip"
+        app.dependency_overrides[get_settings] = get_settings_override
+        cls.client = TestClient(app)
 
-        # cls.client = TestClient(app)
-
-    @unittest.skip("Disabled until deid model is committed")
-    def testDeidProcess(self):
+    def test_deid_process_api(self):
         payload = common.create_payload_content_from_doc_single(
             "John had been diagnosed with acute Kidney Failure the week before"
         )
+        app.dependency_overrides[get_settings] = get_settings_override
+
         response = self.client.post(self.ENDPOINT_PROCESS_SINGLE, json=payload)
         self.assertEqual(response.status_code, 200)
 
@@ -58,3 +54,37 @@ def testDeidProcess(self):
         self.assertEqual(ann["pretty_name"], expected["pretty_name"])
         self.assertEqual(ann["source_value"], expected["source_value"])
         self.assertEqual(ann["cui"], expected["cui"])
+        app.dependency_overrides = {}
+
+    def test_deid_process_bulk_api(self):
+        payload = common.create_payload_content_from_doc_bulk([
+            "John had been diagnosed with acute Kidney Failure the week before"
+        ])
+        app.dependency_overrides[get_settings] = get_settings_override
+
+        response = self.client.post(self.ENDPOINT_PROCESS_BULK, json=payload)
+        self.assertEqual(response.status_code, 200)
+
+        actual = response.json()
+
+        expected = {
+            "pretty_name": "PATIENT",
+            "source_value": "John",
+            "cui": "PATIENT",
+            "text": "[****] had been diagnosed with acute Kidney Failure the week before",
+        }
+        self.assertEqual(len(actual["result"]), 1)
+        self.assertEqual(actual["result"][0]["text"], expected["text"])
+
+        self.assertEqual(
+            len(actual["result"][0]["annotations"]),
+            0,
+            "CU-869a6wc6z No annotations are currently returned by the bulk API",
+        )
+
+        # Note: CU-869a6wc6z commended out these asserts until annations are returned
+        # ann = actual["result"][0]["annotations"][0]["0"]
+        # self.assertEqual(ann["pretty_name"], expected["pretty_name"])
+        # self.assertEqual(ann["source_value"], expected["source_value"])
+        # self.assertEqual(ann["cui"], expected["cui"])
+        app.dependency_overrides = {}
diff --git a/medcat-service/medcat_service/test/test_medcat_processor.py b/medcat-service/medcat_service/test/test_medcat_processor.py
@@ -1,13 +1,14 @@
 import unittest
 
+from medcat_service.config import Settings
 from medcat_service.nlp_processor import MedCatProcessor
 from medcat_service.test.common import setup_medcat_processor
 
 
 class TestMedCatProcessorReadiness(unittest.TestCase):
     def setUp(self):
         setup_medcat_processor()
-        self.processor = MedCatProcessor()
+        self.processor = MedCatProcessor(Settings())
 
     def test_readiness_is_ok(self):
         result = self.processor._check_medcat_readiness()

diff --git a/medcat-service/scripts/integration_test_functions.sh b/medcat-service/scripts/integration_test_functions.sh
@@ -70,26 +70,37 @@ integration_test_medcat_service() {
 
   # Test /api/process_bulk
 
-  if [[ "$expected_annotation" == "PATIENT" ]]; then
-     echo "Skipping Process_bulk test for DeID Mode testing "
-     echo "Process_bulk in DeID mode appears to have a bug making it return the text without deid"
-     return 0
-  fi
-
   local api="http://${localhost_name}:${port}/api/process_bulk"
   local input_text="Patient J. Smith had been diagnosed with acute kidney failure the week before"
   local input_payload="{\"content\": [{\"text\":\"${input_text}\"}]}"
-  local expected_annotation="Kidney Failure"
-
+  local expected_annotation=${3:-Kidney Failure}
 
   echo "Calling POST $api with payload '$input_payload'"
   local actual
 
-  actual=$(curl -s -X POST $api \
+ # Capture both body and HTTP code
+  response=$(curl -s -w "\n%{http_code}" -X POST "$api" \
     -H 'Content-Type: application/json' \
     -d "$input_payload")
 
-  echo "Recieved result '$actual'"
+  # Split body and code
+  http_code=$(echo "$response" | tail -n1)
+  actual=$(echo "$response" | sed '$d')
+
+  echo "HTTP status: $http_code"
+  echo "Response body: '$actual'"
+
+  if [[ "$http_code" != "200" ]]; then
+    echo "ERROR: Expected HTTP 200, got $http_code"
+    echo -e "Actual response was:\n${actual}"
+    return 1
+  fi
+
+  if [[ "$expected_annotation" == "PATIENT" ]]; then
+     echo "CU-869a6wc6z Skipping Process_bulk annotation test for DeID Mode testing "
+     echo "Process_bulk in DeID mode has missing feature making it not return the annotations, just the deid text"
+     return 0
+  fi
 
   local actual_annotation
   actual_annotation=$(echo "$actual" | jq -r '.result[0].annotations[0]["0"].pretty_name')