Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions medcat-service/medcat_service/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@


class Settings(BaseSettings):
class Config:
frozen = True

app_root_path: str = Field(
default="/", description="The Root Path for the FastAPI App", examples=["/medcat-service"]
)

deid_mode: bool = Field(default=False, description="Enable DEID mode")
deid_redact: bool = Field(
default=True,
description="Enable DEID redaction. Returns text like [***] instead of [ANNOTATION]",
)
14 changes: 10 additions & 4 deletions medcat-service/medcat_service/dependencies.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from functools import lru_cache
from typing import Annotated

Expand All @@ -6,15 +7,20 @@
from medcat_service.config import Settings
from medcat_service.nlp_processor.medcat_processor import MedCatProcessor

log = logging.getLogger(__name__)


@lru_cache
def get_medcat_processor() -> MedCatProcessor:
return MedCatProcessor()
def get_settings() -> Settings:
settings = Settings()
log.debug("Using settings: %s", settings)
return settings


@lru_cache
def get_settings() -> Settings:
return Settings()
def get_medcat_processor(settings: Annotated[Settings, Depends(get_settings)]) -> MedCatProcessor:
log.debug("Creating new Medcat Processsor using settings: %s", settings)
return MedCatProcessor(settings)


MedCatProcessorDep = Annotated[MedCatProcessor, Depends(get_medcat_processor)]
25 changes: 14 additions & 11 deletions medcat-service/medcat_service/nlp_processor/medcat_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,17 @@
from medcat.config.config_meta_cat import ConfigMetaCAT
from medcat.vocab import Vocab

from medcat_service.config import Settings
from medcat_service.types import HealthCheckResponse, ModelCardInfo, ProcessErrorsResult, ProcessResult, ServiceInfo


class MedCatProcessor():
class MedCatProcessor:
""""
MedCAT Processor class is wrapper over MedCAT that implements annotations extractions functionality
(both single and bulk processing) that can be easily exposed for an API.
"""

def __init__(self):
def __init__(self, settings: Settings):
app_log_level = os.getenv("APP_LOG_LEVEL", logging.INFO)
medcat_log_level = os.getenv("LOG_LEVEL", logging.INFO)

Expand All @@ -46,8 +47,8 @@ def __init__(self):

self.bulk_nproc = int(os.getenv("APP_BULK_NPROC", 8))
self.torch_threads = int(os.getenv("APP_TORCH_THREADS", -1))
self.DEID_MODE = eval(os.getenv("DEID_MODE", "False"))
self.DEID_REDACT = eval(os.getenv("DEID_REDACT", "True"))
self.DEID_MODE = settings.deid_mode
self.DEID_REDACT = settings.deid_redact
self.model_card_info = ModelCardInfo(
ontologies=None, meta_cat_model_names=[], model_last_modified_on=None)

Expand Down Expand Up @@ -209,13 +210,13 @@ def process_content_bulk(self, content):
start_time_ns = time.time_ns()

try:
text_input = MedCatProcessor._generate_input_doc(content, invalid_doc_ids)
if self.DEID_MODE:
# TODO 2025-07-21: deid_multi_texts doesnt exist in medcat 2?
ann_res = self.cat.deid_multi_texts(MedCatProcessor._generate_input_doc(content, invalid_doc_ids),
redact=self.DEID_REDACT)
text_to_deid_from_tuple = (x[1] for x in text_input)

ann_res = self.cat.deid_multi_text(list(text_to_deid_from_tuple),
redact=self.DEID_REDACT, n_process=self.bulk_nproc)
else:
text_input = MedCatProcessor._generate_input_doc(
content, invalid_doc_ids)
ann_res = {
ann_id: res for ann_id, res in
self.cat.get_entities_multi_texts(
Expand Down Expand Up @@ -426,9 +427,11 @@ def _generate_result(self, in_documents, annotations, elapsed_time):
footer=in_ct.get("footer"),
)
elif self.DEID_MODE:

out_res = ProcessResult(
text=str(in_ct["text"]),
# TODO: DEID mode is passing the resulting text in the annotations field here but shouldnt.
text=str(annotations[i]),
# TODO: DEID bulk mode should also be able to return the list of annotations found,
# to match the features of the singular api. CU-869a6wc6z
annotations=[],
success=True,
timestamp=self._get_timestamp(),
Expand Down
3 changes: 3 additions & 0 deletions medcat-service/medcat_service/test/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,6 @@ def setup_medcat_processor():
os.environ["APP_BULK_NPROC"] = "8"

os.environ["APP_TRAINING_MODE"] = "False"

os.environ["DEID_MODE"] = "False"
os.environ["DEID_REDACT"] = "False"
70 changes: 50 additions & 20 deletions medcat-service/medcat_service/test/test_deid.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,39 @@
import os
import unittest

from fastapi.testclient import TestClient

import medcat_service.test.common as common
from medcat_service.config import Settings
from medcat_service.dependencies import get_settings
from medcat_service.main import app


class TestMedcatServiceDeId(unittest.TestCase):
"""
Implementation of test cases for MedCAT service
"""
def get_settings_override():
return Settings(deid_mode=True, deid_redact=True)


# Available endpoints
#
class TestMedcatServiceDeId(unittest.TestCase):
ENDPOINT_PROCESS_SINGLE = "/api/process"
ENDPOINT_PROCESS_BULK = "/api/process_bulk"
client: TestClient

# Static initialization methods
#
@classmethod
def setUpClass(cls):
pass
# Enable when test enabled. Complexity around env vars being shared accross tests,
# Should instead move to use pydantic settings for easy test overrides.
common.setup_medcat_processor()

# common.setup_medcat_processor()
# os.environ["DEID_MODE"] = "True"
# os.environ["DEID_REDACT"] = "True"
if "APP_MEDCAT_MODEL_PACK" not in os.environ:
os.environ["APP_MEDCAT_MODEL_PACK"] = "./models/examples/example-deid-model-pack.zip"

# if "APP_MEDCAT_MODEL_PACK" not in os.environ:
# os.environ["APP_MEDCAT_MODEL_PACK"] = "./models/example-deid-model-pack.zip"
app.dependency_overrides[get_settings] = get_settings_override
cls.client = TestClient(app)

# cls.client = TestClient(app)

@unittest.skip("Disabled until deid model is committed")
def testDeidProcess(self):
def test_deid_process_api(self):
payload = common.create_payload_content_from_doc_single(
"John had been diagnosed with acute Kidney Failure the week before"
)
app.dependency_overrides[get_settings] = get_settings_override

response = self.client.post(self.ENDPOINT_PROCESS_SINGLE, json=payload)
self.assertEqual(response.status_code, 200)

Expand All @@ -58,3 +54,37 @@ def testDeidProcess(self):
self.assertEqual(ann["pretty_name"], expected["pretty_name"])
self.assertEqual(ann["source_value"], expected["source_value"])
self.assertEqual(ann["cui"], expected["cui"])
app.dependency_overrides = {}

def test_deid_process_bulk_api(self):
payload = common.create_payload_content_from_doc_bulk([
"John had been diagnosed with acute Kidney Failure the week before"
])
app.dependency_overrides[get_settings] = get_settings_override

response = self.client.post(self.ENDPOINT_PROCESS_BULK, json=payload)
self.assertEqual(response.status_code, 200)

actual = response.json()

expected = {
"pretty_name": "PATIENT",
"source_value": "John",
"cui": "PATIENT",
"text": "[****] had been diagnosed with acute Kidney Failure the week before",
}
self.assertEqual(len(actual["result"]), 1)
self.assertEqual(actual["result"][0]["text"], expected["text"])

self.assertEqual(
len(actual["result"][0]["annotations"]),
0,
"CU-869a6wc6z No annotations are currently returned by the bulk API",
)

# Note: CU-869a6wc6z commended out these asserts until annations are returned
# ann = actual["result"][0]["annotations"][0]["0"]
# self.assertEqual(ann["pretty_name"], expected["pretty_name"])
# self.assertEqual(ann["source_value"], expected["source_value"])
# self.assertEqual(ann["cui"], expected["cui"])
app.dependency_overrides = {}
3 changes: 2 additions & 1 deletion medcat-service/medcat_service/test/test_medcat_processor.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import unittest

from medcat_service.config import Settings
from medcat_service.nlp_processor import MedCatProcessor
from medcat_service.test.common import setup_medcat_processor


class TestMedCatProcessorReadiness(unittest.TestCase):
def setUp(self):
setup_medcat_processor()
self.processor = MedCatProcessor()
self.processor = MedCatProcessor(Settings())

def test_readiness_is_ok(self):
result = self.processor._check_medcat_readiness()
Expand Down
31 changes: 21 additions & 10 deletions medcat-service/scripts/integration_test_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,26 +70,37 @@ integration_test_medcat_service() {

# Test /api/process_bulk

if [[ "$expected_annotation" == "PATIENT" ]]; then
echo "Skipping Process_bulk test for DeID Mode testing "
echo "Process_bulk in DeID mode appears to have a bug making it return the text without deid"
return 0
fi

local api="http://${localhost_name}:${port}/api/process_bulk"
local input_text="Patient J. Smith had been diagnosed with acute kidney failure the week before"
local input_payload="{\"content\": [{\"text\":\"${input_text}\"}]}"
local expected_annotation="Kidney Failure"

local expected_annotation=${3:-Kidney Failure}

echo "Calling POST $api with payload '$input_payload'"
local actual

actual=$(curl -s -X POST $api \
# Capture both body and HTTP code
response=$(curl -s -w "\n%{http_code}" -X POST "$api" \
-H 'Content-Type: application/json' \
-d "$input_payload")

echo "Recieved result '$actual'"
# Split body and code
http_code=$(echo "$response" | tail -n1)
actual=$(echo "$response" | sed '$d')

echo "HTTP status: $http_code"
echo "Response body: '$actual'"

if [[ "$http_code" != "200" ]]; then
echo "ERROR: Expected HTTP 200, got $http_code"
echo -e "Actual response was:\n${actual}"
return 1
fi

if [[ "$expected_annotation" == "PATIENT" ]]; then
echo "CU-869a6wc6z Skipping Process_bulk annotation test for DeID Mode testing "
echo "Process_bulk in DeID mode has missing feature making it not return the annotations, just the deid text"
return 0
fi

local actual_annotation
actual_annotation=$(echo "$actual" | jq -r '.result[0].annotations[0]["0"].pretty_name')
Expand Down
Loading
Loading