From eb2f938328cc4b3a3a60e896480ad29c2e5f8cb2 Mon Sep 17 00:00:00 2001 From: Phoevos Kalemkeris Date: Thu, 28 Aug 2025 18:26:33 +0100 Subject: [PATCH 1/2] feat: Add MedCAT OPCS-4 service Introduce a MedCAT OPCS-4 service to handle OPCS annotation codes, corresponding to interventions and procedures. The new service mirrors the structure and behaviour of the ICD-10 equivalent, exposing only the OPCS-4 labels among the annotations generated by the underlying MedCAT model, relying on the existence of a 'cui2opcs4' mapping in the latter's concept database. This commit introduces the new model type and service, adjusts the existing tests accordingly, extends the Docker Compose stack with a 'medcat-opcs4' Docker service, and updates the Grafana/Prometheus configuration to take it into account. Signed-off-by: Phoevos Kalemkeris --- .github/workflows/api-docs.yaml | 4 +- README.md | 2 + app/api/routers/unsupervised_training.py | 2 +- app/cli/README.md | 8 +- app/cli/cli.py | 8 +- app/domain.py | 1 + app/model_services/medcat_model.py | 4 +- app/model_services/medcat_model_opcs4.py | 121 + app/registry.py | 2 + app/utils.py | 1 + docker-compose-dev.yml | 38 + docker-compose.yml | 47 + docker/medcat-opcs4/.dockerignore | 2 + docker/medcat-opcs4/.env | 3 + docker/medcat-opcs4/Dockerfile | 42 + docker/medcat-opcs4/requirements.txt | 27 + .../dashboards/cms_opcs4_medcat.json | 2837 +++++++++++++++++ docker/monitoring/prometheus/prometheus.yml | 6 + .../etc/nginx/sites-enabled/medcat-opcs4 | 47 + tests/app/api/test_dependencies.py | 6 + tests/app/conftest.py | 9 + .../model_services/test_medcat_model_opcs4.py | 135 + tests/app/test_registry.py | 6 +- tests/app/test_utils.py | 3 +- tests/resources/fixture/medcat_entities.json | 25 + 25 files changed, 3371 insertions(+), 15 deletions(-) create mode 100644 app/model_services/medcat_model_opcs4.py create mode 100644 docker/medcat-opcs4/.dockerignore create mode 100644 docker/medcat-opcs4/.env create mode 100644 docker/medcat-opcs4/Dockerfile create mode 100644 docker/medcat-opcs4/requirements.txt create mode 100644 docker/monitoring/grafana/provisioning/dashboards/cms_opcs4_medcat.json create mode 100644 docker/nginx/etc/nginx/sites-enabled/medcat-opcs4 create mode 100644 tests/app/model_services/test_medcat_model_opcs4.py diff --git a/.github/workflows/api-docs.yaml b/.github/workflows/api-docs.yaml index 27d7133..933e792 100644 --- a/.github/workflows/api-docs.yaml +++ b/.github/workflows/api-docs.yaml @@ -33,6 +33,7 @@ jobs: run: | python app/cli/cli.py export-model-apis --model-type medcat_snomed --add-training-apis --no-exclude-unsupervised-training --no-exclude-metacat-training --add-evaluation-apis --add-previews-apis python app/cli/cli.py export-model-apis --model-type medcat_icd10 --add-training-apis --no-exclude-unsupervised-training --no-exclude-metacat-training --add-evaluation-apis --add-previews-apis + python app/cli/cli.py export-model-apis --model-type medcat_opcs4 --add-training-apis --no-exclude-unsupervised-training --no-exclude-metacat-training --add-evaluation-apis --add-previews-apis python app/cli/cli.py export-model-apis --model-type medcat_umls --add-training-apis --no-exclude-unsupervised-training --no-exclude-metacat-training --add-evaluation-apis --add-previews-apis python app/cli/cli.py export-model-apis --model-type anoncat --add-training-apis --add-evaluation-apis --add-previews-apis --exclude-metacat-training --exclude-unsupervised-training python app/cli/cli.py export-model-apis --model-type transformers_deid --add-training-apis --add-evaluation-apis --add-previews-apis --exclude-metacat-training --exclude-unsupervised-training @@ -43,6 +44,7 @@ jobs: git checkout gh-pages mv ./medcat_snomed_model_apis.json ./docs/medcat_snomed_model_apis.json mv ./medcat_icd10_model_apis.json ./docs/medcat_icd10_model_apis.json + mv ./medcat_opcs4_model_apis.json ./docs/medcat_opcs4_model_apis.json mv ./medcat_umls_model_apis.json ./docs/medcat_umls_model_apis.json mv ./anoncat_model_apis.json ./docs/anoncat_model_apis.json mv ./transformers_deid_model_apis.json ./docs/transformers_deid_model_apis.json @@ -51,7 +53,7 @@ jobs: mv ./cogstack_model_serve_apis.json ./docs/cogstack_model_serve_apis.json git config --global user.name "cogstack-model-serve" git config --global user.email "cogstack-model-serve@users.noreply.github.com" - git add ./docs/medcat_snomed_model_apis.json ./docs/medcat_icd10_model_apis.json ./docs/medcat_umls_model_apis.json ./docs/anoncat_model_apis.json ./docs/transformers_deid_model_apis.json ./docs/huggingface_ner_model_apis.json ./docs/huggingface_llm_model_apis.json ./docs/cogstack_model_serve_apis.json + git add ./docs/medcat_snomed_model_apis.json ./docs/medcat_icd10_model_apis.json ./docs/medcat_opcs4_model_apis.json ./docs/medcat_umls_model_apis.json ./docs/anoncat_model_apis.json ./docs/transformers_deid_model_apis.json ./docs/huggingface_ner_model_apis.json ./docs/huggingface_llm_model_apis.json ./docs/cogstack_model_serve_apis.json if [[ `git status --porcelain --untracked-files=no` ]]; then git commit -m "update api docs" else diff --git a/README.md b/README.md index 653c152..96795e8 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Currently, CMS offers both HTTP endpoints for running NLP-related jobs and a com [OpenAPI Docs](https://cogstack.github.io/CogStack-ModelServe/): - [SNOMED MedCAT Model](https://cogstack.github.io/CogStack-ModelServe/docs/medcat_snomed_model_apis.html) - [ICD-10 MedCAT Model](https://cogstack.github.io/CogStack-ModelServe/docs/medcat_icd10_model_apis.html) +- [OPCS-4 MedCAT Model](https://cogstack.github.io/CogStack-ModelServe/docs/medcat_opcs4_model_apis.html) - [UMLS MedCAT Model](https://cogstack.github.io/CogStack-ModelServe/docs/medcat_umls_model_apis.html) - [De-ID MedCAT Model (AnonCAT)](https://cogstack.github.io/CogStack-ModelServe/docs/anoncat_model_apis.html) - [HuggingFace NER Model](https://cogstack.github.io/CogStack-ModelServe/docs/huggingface_ner_model_apis.html) @@ -59,6 +60,7 @@ The following table summarises the servable model types with their respective ou |:---------------------:|:---------------:|:---------------------------------:| | medcat_snomed | medcat-snomed | labelled with SNOMED concepts | | medcat_icd10 | medcat-icd10 | labelled with ICD-10 concepts | +| medcat_opcs4 | medcat-opcs4 | labelled with OPCS-4 concepts | | medcat_umls | medcat-umls | labelled with UMLS concepts | | medcat_deid (anoncat) | medcat-deid | labelled with latest PII concepts | | huggingface_ner | huggingface_ner | customer managed labels | diff --git a/app/api/routers/unsupervised_training.py b/app/api/routers/unsupervised_training.py index d07f1ab..6e28167 100644 --- a/app/api/routers/unsupervised_training.py +++ b/app/api/routers/unsupervised_training.py @@ -162,7 +162,7 @@ async def train_unsupervised_with_hf_dataset( if hf_dataset_repo_id is None and hf_dataset_package is None: raise ClientException("Either 'hf_dataset_repo_id' or 'hf_dataset_package' must be provided") - if model_service.info().model_type not in [ModelType.HUGGINGFACE_NER, ModelType.MEDCAT_SNOMED, ModelType.MEDCAT_ICD10, ModelType.MEDCAT_UMLS]: + if model_service.info().model_type not in [ModelType.HUGGINGFACE_NER, ModelType.MEDCAT_SNOMED, ModelType.MEDCAT_ICD10, ModelType.MEDCAT_OPCS4, ModelType.MEDCAT_UMLS]: raise ConfigurationException(f"Currently this endpoint is not available for models of type: {model_service.info().model_type.value}") data_dir = tempfile.TemporaryDirectory() diff --git a/app/cli/README.md b/app/cli/README.md index e684f9d..ad2fbf2 100644 --- a/app/cli/README.md +++ b/app/cli/README.md @@ -37,7 +37,7 @@ $ cms serve [OPTIONS] **Options**: -* `--model-type [medcat_snomed|medcat_umls|medcat_icd10|medcat_deid|anoncat|transformers_deid|huggingface_ner]`: The type of the model to serve [required] +* `--model-type [medcat_snomed|medcat_umls|medcat_icd10|medcat_opcs4|medcat_deid|anoncat|transformers_deid|huggingface_ner]`: The type of the model to serve [required] * `--model-path TEXT`: The file path to the model package * `--mlflow-model-uri models:/MODEL_NAME/ENV`: The URI of the MLflow model to serve * `--host TEXT`: The hostname of the server [default: 127.0.0.1] @@ -60,7 +60,7 @@ $ cms train [OPTIONS] **Options**: -* `--model-type [medcat_snomed|medcat_umls|medcat_icd10|medcat_deid|anoncat|transformers_deid|huggingface_ner]`: The type of the model to train [required] +* `--model-type [medcat_snomed|medcat_umls|medcat_icd10|medcat_opcs4|medcat_deid|anoncat|transformers_deid|huggingface_ner]`: The type of the model to train [required] * `--base-model-path TEXT`: The file path to the base model package to be trained on * `--mlflow-model-uri models:/MODEL_NAME/ENV`: The URI of the MLflow model to train * `--training-type [supervised|unsupervised|meta_supervised]`: The type of training [required] @@ -86,7 +86,7 @@ $ cms register [OPTIONS] **Options**: -* `--model-type [medcat_snomed|medcat_umls|medcat_icd10|medcat_deid|anoncat|transformers_deid|huggingface_ner]`: The type of the model to register [required] +* `--model-type [medcat_snomed|medcat_umls|medcat_icd10|medcat_opcs4|medcat_deid|anoncat|transformers_deid|huggingface_ner]`: The type of the model to register [required] * `--model-path TEXT`: The file path to the model package [required] * `--model-name TEXT`: The string representation of the registered model [required] * `--training-type [supervised|unsupervised|meta_supervised]`: The type of training the model went through @@ -108,7 +108,7 @@ $ cms export-model-apis [OPTIONS] **Options**: -* `--model-type [medcat_snomed|medcat_umls|medcat_icd10|medcat_deid|anoncat|transformers_deid|huggingface_ner]`: The type of the model to serve [required] +* `--model-type [medcat_snomed|medcat_umls|medcat_icd10|medcat_opcs4|medcat_deid|anoncat|transformers_deid|huggingface_ner]`: The type of the model to serve [required] * `--add-training-apis / --no-add-training-apis`: Add training APIs to the doc [default: no-add-training-apis] * `--add-evaluation-apis / --no-add-evaluation-apis`: Add evaluation APIs to the doc [default: no-add-evaluation-apis] * `--add-previews-apis / --no-add-previews-apis`: Add preview APIs to the doc [default: no-add-previews-apis] diff --git a/app/cli/cli.py b/app/cli/cli.py index 8de119b..8a94647 100644 --- a/app/cli/cli.py +++ b/app/cli/cli.py @@ -65,7 +65,7 @@ def serve_model( port: str = typer.Option("8000", help="The port of the server"), model_name: Optional[str] = typer.Option(None, help="The string representation of the model name"), streamable: bool = typer.Option(False, help="Serve the streamable endpoints only"), - device: Device = typer.Option(Device.DEFAULT, help="The device to serve the model on"), + device: Device = typer.Option(Device.DEFAULT.value, help="The device to serve the model on"), llm_engine: Optional[LlmEngine] = typer.Option(LlmEngine.CMS.value, help="The engine to use for text generation"), debug: Optional[bool] = typer.Option(None, help="Run in the debug mode"), ) -> None: @@ -90,7 +90,7 @@ def serve_model( model_name = model_name or "CMS model" logger = _get_logger(debug, model_type, model_name) config = get_settings() - config.DEVICE = device.value + config.DEVICE = device if model_type in [ ModelType.HUGGINGFACE_NER, ModelType.MEDCAT_DEID, @@ -186,7 +186,7 @@ def train_model( hyperparameters: str = typer.Option("{}", help="The overriding hyperparameters serialised as JSON string"), description: Optional[str] = typer.Option(None, help="The description of the training or change logs"), model_name: Optional[str] = typer.Option(None, help="The string representation of the model name"), - device: Device = typer.Option(Device.DEFAULT, help="The device to train the model on"), + device: Device = typer.Option(Device.DEFAULT.value, help="The device to train the model on"), debug: Optional[bool] = typer.Option(None, help="Run in the debug mode"), ) -> None: """ @@ -212,7 +212,7 @@ def train_model( logger = _get_logger(debug, model_type, model_name) config = get_settings() - config.DEVICE = device.value + config.DEVICE = device model_service_dep = ModelServiceDep(model_type, config) cms_globals.model_service_dep = model_service_dep diff --git a/app/domain.py b/app/domain.py index 8cb7862..c9d38cf 100644 --- a/app/domain.py +++ b/app/domain.py @@ -10,6 +10,7 @@ class ModelType(str, Enum): MEDCAT_SNOMED = "medcat_snomed" MEDCAT_UMLS = "medcat_umls" MEDCAT_ICD10 = "medcat_icd10" + MEDCAT_OPCS4 = "medcat_opcs4" MEDCAT_DEID = "medcat_deid" ANONCAT = "anoncat" TRANSFORMERS_DEID = "transformers_deid" diff --git a/app/model_services/medcat_model.py b/app/model_services/medcat_model.py index d5265f1..a3a6f2c 100644 --- a/app/model_services/medcat_model.py +++ b/app/model_services/medcat_model.py @@ -165,7 +165,7 @@ def annotate(self, text: str) -> List[Annotation]: doc = self.model.get_entities( text, - addl_info=["cui2icd10", "cui2ontologies", "cui2snomed", "cui2athena_ids"], + addl_info=["cui2icd10", "cui2opcs4", "cui2ontologies", "cui2snomed", "cui2athena_ids"], ) return [load_pydantic_object_from_dict(Annotation, record) for record in self.get_records_from_doc(doc)] @@ -186,7 +186,7 @@ def batch_annotate(self, texts: List[str]) -> List[List[Annotation]]: self._data_iterator(texts), batch_size_chars=batch_size_chars, nproc=max(int(cpu_count() / 2), 1), - addl_info=["cui2icd10", "cui2ontologies", "cui2snomed", "cui2athena_ids"], + addl_info=["cui2icd10", "cui2opcs4", "cui2ontologies", "cui2snomed", "cui2athena_ids"], ) docs = dict(sorted(docs.items(), key=lambda x: x[0])) annotations_list = [] diff --git a/app/model_services/medcat_model_opcs4.py b/app/model_services/medcat_model_opcs4.py new file mode 100644 index 0000000..a8a8e9c --- /dev/null +++ b/app/model_services/medcat_model_opcs4.py @@ -0,0 +1,121 @@ +import logging +import pandas as pd +from typing import Dict, Optional, final, List + +from app import __version__ as app_version +from app.model_services.medcat_model import MedCATModel +from app.config import Settings +from app.domain import ModelCard, ModelType + +logger = logging.getLogger("cms") + + +@final +class MedCATModelOpcs4(MedCATModel): + """A model service for MedCAT OPCS-4 models.""" + + OPCS4_KEY = "opcs4" + + def __init__( + self, + config: Settings, + model_parent_dir: Optional[str] = None, + enable_trainer: Optional[bool] = None, + model_name: Optional[str] = None, + base_model_file: Optional[str] = None, + ) -> None: + """ + Initialises the MedCAT OPCS-4 model service with specified configurations. + + Args: + config (Settings): The configuration for the model service. + model_parent_dir (Optional[str]): The directory where the model package is stored. Defaults to None. + enable_trainer (Optional[bool]): The flag to enable or disable trainers. Defaults to None. + model_name (Optional[str]): The name of the model. Defaults to None. + base_model_file (Optional[str]): The model package file name. Defaults to None. + """ + super().__init__( + config, + model_parent_dir=model_parent_dir, + enable_trainer=enable_trainer, + model_name=model_name, + base_model_file=base_model_file, + ) + self.model_name = model_name or "OPCS-4 MedCAT model" + + @property + def api_version(self) -> str: + """Getter for the API version of the model service.""" + + # APP version is used although each model service could have its own API versioning + return app_version + + def info(self) -> ModelCard: + """ + Retrieves information about the MedCAT OPCS-4 model. + + Returns: + ModelCard: A card containing information about the MedCAT OPCS-4 model. + """ + + return ModelCard( + model_description=self.model_name, + model_type=ModelType.MEDCAT_OPCS4, + api_version=self.api_version, + model_card=self.model.get_model_card(as_dict=True), + ) + + def get_records_from_doc(self, doc: Dict) -> List[Dict]: + """ + Extracts and formats entity records from a document dictionary. + + Args: + doc (Dict): The document dictionary containing extracted named entities. + + Returns: + List[Dict]: A list of formatted entity records. + """ + + df = pd.DataFrame(doc["entities"].values()) + + if df.empty: + df = pd.DataFrame(columns=["label_name", "label_id", "start", "end", "accuracy"]) + else: + new_rows = [] + for _, row in df.iterrows(): + if self.OPCS4_KEY not in row or not row[self.OPCS4_KEY]: + logger.debug("No mapped OPCS-4 code associated with the entity: %s", row) + else: + for opcs4 in row[self.OPCS4_KEY]: + output_row = row.copy() + if isinstance(opcs4, str): + output_row[self.OPCS4_KEY] = opcs4 + elif isinstance(opcs4, dict): + output_row[self.OPCS4_KEY] = opcs4.get("code") + output_row["pretty_name"] = opcs4.get("name") + elif isinstance(opcs4, list) and opcs4: + output_row[self.OPCS4_KEY] = opcs4[-1] + else: + logger.error("Unknown format for the OPCS-4 code(s): %s", opcs4) + if "athena_ids" in output_row and output_row["athena_ids"]: + output_row["athena_ids"] = [ + athena_id["code"] for athena_id in output_row["athena_ids"] + ] + new_rows.append(output_row) + if new_rows: + df = pd.DataFrame(new_rows) + df.rename( + columns={ + "pretty_name": "label_name", + self.OPCS4_KEY: "label_id", + "types": "categories", + "acc": "accuracy", + "athena_ids": "athena_ids", + }, + inplace=True, + ) + df = self._retrieve_meta_annotations(df) + else: + df = pd.DataFrame(columns=["label_name", "label_id", "start", "end", "accuracy"]) + records = df.to_dict("records") + return records diff --git a/app/registry.py b/app/registry.py index a1cccc1..ec582f3 100644 --- a/app/registry.py +++ b/app/registry.py @@ -3,6 +3,7 @@ from app.model_services.medcat_model_snomed import MedCATModelSnomed from app.model_services.medcat_model_umls import MedCATModelUmls from app.model_services.medcat_model_icd10 import MedCATModelIcd10 +from app.model_services.medcat_model_opcs4 import MedCATModelOpcs4 from app.model_services.medcat_model_deid import MedCATModelDeIdentification from app.model_services.huggingface_ner_model import HuggingFaceNerModel from app.model_services.huggingface_llm_model import HuggingFaceLlmModel @@ -11,6 +12,7 @@ ModelType.MEDCAT_SNOMED: MedCATModelSnomed, ModelType.MEDCAT_UMLS: MedCATModelUmls, ModelType.MEDCAT_ICD10: MedCATModelIcd10, + ModelType.MEDCAT_OPCS4: MedCATModelOpcs4, ModelType.MEDCAT_DEID: MedCATModelDeIdentification, ModelType.ANONCAT: MedCATModelDeIdentification, ModelType.TRANSFORMERS_DEID: TransformersModelDeIdentification, diff --git a/app/utils.py b/app/utils.py index 630a558..df27d51 100644 --- a/app/utils.py +++ b/app/utils.py @@ -63,6 +63,7 @@ def get_code_base_uri(model_name: str) -> Optional[str]: code_base_uris = { CodeType.SNOMED.value: "http://snomed.info/id", CodeType.ICD10.value: "https://icdcodelookup.com/icd-10/codes", + CodeType.OPCS4.value: "https://nhsengland.kahootz.com/connect.ti/t_c_home/view?objectId=14270896#14270896", CodeType.UMLS.value: "https://uts.nlm.nih.gov/uts/umls/concept", } for code_name, base_uri in code_base_uris.items(): diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 30f7fcb..fb41b5b 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -81,6 +81,44 @@ services: - https_proxy=$HTTPS_PROXY - no_proxy=localhost + medcat-opcs4: + extends: + file: ./docker-compose.yml + service: medcat-opcs4 + labels: + - org.cogstack.model-serve.dev=true + build: + context: ./ + dockerfile: ./docker/medcat-opcs4/Dockerfile + args: + - CMS_MODEL_NAME=OPCS-4 MedCAT model + - CMS_UID=${CMS_UID:-1000} + - CMS_GID=${CMS_GID:-1000} + - HTTP_PROXY=$HTTP_PROXY + - HTTPS_PROXY=$HTTPS_PROXY + - NO_PROXY=$NO_PROXY + image: local-cms-medcat-opcs4:do-not-push + environment: + - BASE_MODEL_FULL_PATH=$MODEL_PACKAGE_FULL_PATH + - AWS_ACCESS_KEY_ID= + - AWS_SECRET_ACCESS_KEY= + - MLFLOW_S3_ENDPOINT_URL= + - MLFLOW_TRACKING_URI=${MLFLOW_TRACKING_URI:-file:/tmp/mlruns} + - MLFLOW_TRACKING_USERNAME= + - MLFLOW_TRACKING_PASSWORD= + - MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING=${MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING:-false} + - GELF_INPUT_URI= + - AUTH_USER_ENABLED=${AUTH_USER_ENABLED:-false} + - AUTH_JWT_SECRET=$AUTH_JWT_SECRET + - AUTH_ACCESS_TOKEN_EXPIRE_SECONDS=${AUTH_ACCESS_TOKEN_EXPIRE_SECONDS:-3600} + - AUTH_DATABASE_URL=${AUTH_DATABASE_URL:-sqlite+aiosqlite:///./cms-users.db} + - HTTP_PROXY=$HTTP_PROXY + - HTTPS_PROXY=$HTTPS_PROXY + - NO_PROXY=localhost + - http_proxy=$HTTP_PROXY + - https_proxy=$HTTPS_PROXY + - no_proxy=localhost + medcat-deid: extends: file: ./docker-compose.yml diff --git a/docker-compose.yml b/docker-compose.yml index adcda42..1fc02dc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -98,6 +98,53 @@ services: retries: 3 start_period: 60s + medcat-opcs4: + image: cogstacksystems/cogstack-modelserve:0.1.0 + labels: + - org.cogstack.model-serve=medcat_opcs4 + - org.cogstack.model-name=OPCS-4 MedCAT model + - org.cogstack.model-path=$MODEL_PACKAGE_FULL_PATH + restart: always + networks: + - cms + volumes: + - ${MODEL_PACKAGE_FULL_PATH}:/app/model/model.zip:ro + - retrained-models:/app/model/retrained:rw + - ./docker/medcat-opcs4/.env:/app/envs/.env:ro + environment: + - BASE_MODEL_FULL_PATH=$MODEL_PACKAGE_FULL_PATH + - CMS_MODEL_TYPE=medcat_opcs4 + - CMS_MODEL_NAME=OPCS-4 MedCAT model + - CMS_STREAMABLE=${CMS_STREAMABLE:-false} + - AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY + - MLFLOW_S3_ENDPOINT_URL=${MLFLOW_S3_ENDPOINT_URL:-http://minio:9000} + - MLFLOW_TRACKING_URI=${MLFLOW_TRACKING_URI:-http://mlflow-ui:5000} + - MLFLOW_TRACKING_USERNAME=${MLFLOW_TRACKING_USERNAME:-admin} + - MLFLOW_TRACKING_PASSWORD=${MLFLOW_TRACKING_PASSWORD:-password} + - MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING=${MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING:-true} + - GELF_INPUT_URI=${GELF_INPUT_URI:-http://graylog:12201} + - AUTH_USER_ENABLED=${AUTH_USER_ENABLED:-false} + - AUTH_JWT_SECRET=$AUTH_JWT_SECRET + - AUTH_ACCESS_TOKEN_EXPIRE_SECONDS=${AUTH_ACCESS_TOKEN_EXPIRE_SECONDS:-3600} + - AUTH_DATABASE_URL=${AUTH_DATABASE_URL:-sqlite+aiosqlite:///./cms-users.db} + - HTTP_PROXY=$HTTP_PROXY + - HTTPS_PROXY=$HTTPS_PROXY + - NO_PROXY=mlflow-ui,minio,graylog,auth-db,localhost + - http_proxy=$HTTP_PROXY + - https_proxy=$HTTPS_PROXY + - no_proxy=mlflow-ui,minio,graylog,auth-db,localhost + expose: + - 8000 + ports: + - 8182:8000 + healthcheck: # readiness check + test: ["CMD", "curl", "-f", "http://localhost:8000/info"] + interval: 1m30s + timeout: 10s + retries: 3 + start_period: 60s + medcat-deid: image: cogstacksystems/cogstack-modelserve:0.1.0 labels: diff --git a/docker/medcat-opcs4/.dockerignore b/docker/medcat-opcs4/.dockerignore new file mode 100644 index 0000000..90e2e80 --- /dev/null +++ b/docker/medcat-opcs4/.dockerignore @@ -0,0 +1,2 @@ +app/model/* +app/mlruns/* \ No newline at end of file diff --git a/docker/medcat-opcs4/.env b/docker/medcat-opcs4/.env new file mode 100644 index 0000000..e95279d --- /dev/null +++ b/docker/medcat-opcs4/.env @@ -0,0 +1,3 @@ +ENABLE_TRAINING_APIS=true +ENABLE_EVALUATION_APIS=true +ENABLE_PREVIEWS_APIS=true diff --git a/docker/medcat-opcs4/Dockerfile b/docker/medcat-opcs4/Dockerfile new file mode 100644 index 0000000..fac1dfa --- /dev/null +++ b/docker/medcat-opcs4/Dockerfile @@ -0,0 +1,42 @@ +FROM python:3.10 +LABEL "org.cogstack.model-serve"="medcat_opcs4" + +ARG CMS_MODEL_NAME +ARG HTTP_PROXY +ARG HTTPS_PROXY +ARG NO_PROXY +ARG CMS_UID=1000 +ARG CMS_GID=1000 + +ENV CMS_MODEL_NAME=$CMS_MODEL_NAME +ENV CMS_MODEL_TYPE=medcat_opcs4 +ENV HTTP_PROXY=$HTTP_PROXY +ENV HTTPS_PROXY=$HTTPS_PROXY +ENV NO_PROXY=$NO_PROXY +ENV http_proxy=$HTTP_PROXY +ENV https_proxy=$HTTPS_PROXY +ENV no_proxy=$NO_PROXY +ENV PYTHONUNBUFFERED=1 +ENV PATH="/home/cms/.local/bin:${PATH}" + +RUN apt-get update && apt-get install -y --no-install-recommends nano telnet && \ + rm -rf /var/lib/apt/lists/* +RUN addgroup --gid $CMS_GID cms || true && \ + adduser --uid $CMS_UID --gid $CMS_GID --disabled-password --gecos "" cms || true && \ + echo "cms ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + +COPY app /app +COPY docker/medcat-opcs4/requirements.txt /app/requirements.txt +COPY docker/entrypoint/serve.sh /app/entrypoint.sh +RUN mkdir -p /app/model/model && \ + mkdir -p /app/model/retrained && \ + chown -R $CMS_UID:$CMS_GID /app +RUN pip install --no-cache-dir -U pip &&\ + pip install --no-cache-dir -r /app/requirements.txt && \ + python -m spacy download en_core_web_md +RUN chmod +x /app/entrypoint.sh + +WORKDIR /app +EXPOSE 8000 +USER cms:cms +CMD ["/app/entrypoint.sh"] diff --git a/docker/medcat-opcs4/requirements.txt b/docker/medcat-opcs4/requirements.txt new file mode 100644 index 0000000..6420bb6 --- /dev/null +++ b/docker/medcat-opcs4/requirements.txt @@ -0,0 +1,27 @@ +medcat~=1.14.1 +transformers~=4.43.0 +blis<1.0.0 +fastapi~=0.102.0 +uvicorn~=0.29.0 +python-multipart~=0.0.5 +ijson~=3.1.4 +python-dotenv~=0.20.0 +mlflow~=2.16.2 +psycopg2-binary~=2.9.4 +boto3~=1.28.84 +typer~=0.7.0 +prometheus-fastapi-instrumentator~=5.11.2 +sentencepiece~=0.2.0 +slowapi~=0.1.7 +graypy~=2.1.0 +fastapi-users~=11.0.0 +fastapi-users-db-sqlalchemy~=5.0.0 +asyncpg~=0.27.0 +aiosqlite~=0.19.0 +evaluate~=0.4.1 +websockets~=12.0 +pynvml~=11.5.3 +toml~=0.10.2 +peft<0.14.0 +setuptools +wheel \ No newline at end of file diff --git a/docker/monitoring/grafana/provisioning/dashboards/cms_opcs4_medcat.json b/docker/monitoring/grafana/provisioning/dashboards/cms_opcs4_medcat.json new file mode 100644 index 0000000..d35c2e7 --- /dev/null +++ b/docker/monitoring/grafana/provisioning/dashboards/cms_opcs4_medcat.json @@ -0,0 +1,2837 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 39, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "round(increase(http_requests_total{handler=\"/process\", job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", method=\"POST\", status=\"2xx\"}[10y]))", + "instant": false, + "legendFormat": "/process", + "range": true, + "refId": "A" + } + ], + "title": "Number of Processed Docs", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 31, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "round(increase(http_requests_total{handler=\"/process_bulk\", job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", method=\"POST\", status=\"2xx\"}[10y]))", + "legendFormat": "/process_bulk", + "range": true, + "refId": "A" + } + ], + "title": "Number of Processed Batches", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "errors" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#c15c17", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 4, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:766", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "sum(rate(http_requests_total{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", status=\"4xx\"}[$__rate_interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "errors", + "range": true, + "refId": "A" + } + ], + "title": "4xx Errors", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "errors" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#c15c17", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 18, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:766", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "sum(rate(http_requests_total{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", status=\"5xx\"}[$__rate_interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "errors", + "range": true, + "refId": "A" + } + ], + "title": "5xx Errors", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 8 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "round(increase(cms_doc_annotations_sum{handler=\"/process\", job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\"}[10y]))", + "instant": false, + "legendFormat": "/process", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "round(increase(cms_doc_annotations_sum{handler=\"/process_bulk\", job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\"}[10y]))", + "hide": false, + "legendFormat": "/process_bulk", + "range": true, + "refId": "B" + } + ], + "title": "Number of Recognised Entities ", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 8 + }, + "id": 33, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:146", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "increase(http_request_size_bytes_sum{handler=\"/process\", job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\"}[10y])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "/process", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "increase(http_request_size_bytes_sum{handler=\"/process_bulk\", job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\"}[10y])", + "hide": false, + "legendFormat": "/process_bulk", + "range": true, + "refId": "B" + } + ], + "title": "Processed Doc Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "ave_size" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "avg_entities" + }, + "properties": [ + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 8 + }, + "id": 41, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:146", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "rate(http_request_size_bytes_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval]) / rate(http_request_size_bytes_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "ave_size", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "rate(cms_doc_annotations_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval]) / rate(cms_doc_annotations_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval])", + "hide": false, + "legendFormat": "avg_entities", + "range": true, + "refId": "B" + } + ], + "title": "Doc Size vs Entities (/process)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "ave_size" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "avg_entities" + }, + "properties": [ + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 8 + }, + "id": 43, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:146", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "rate(http_request_size_bytes_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval]) / rate(http_request_size_bytes_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "ave_size", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "rate(cms_doc_annotations_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval]) / rate(cms_doc_annotations_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval])", + "hide": false, + "legendFormat": "avg_entities", + "range": true, + "refId": "B" + } + ], + "title": "Doc Size vs Entities (/process_bulk)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HTTP 500" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#bf1b00", + "mode": "fixed" + } + } + ] + }, + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "2xx" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 13, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:140", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "sum by (status) (rate(http_requests_total{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\"}[$__rate_interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ status }}", + "range": true, + "refId": "A" + } + ], + "title": "Requests per Second", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 16 + }, + "id": 6, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Min", + "sortDesc": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:146", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "rate(http_request_duration_seconds_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler!=\"/favicon.ico\"}[$__rate_interval]) / rate(http_request_duration_seconds_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler!=\"/favicon.ico\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ handler }}", + "range": true, + "refId": "A" + } + ], + "title": "Response Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "none" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 16 + }, + "id": 11, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:1079", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "increase(http_request_duration_seconds_bucket{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler!=\"/favicon.ico\", le=\"0.5\"}[$__rate_interval]) \n/ ignoring (le) increase(http_request_duration_seconds_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler!=\"/favicon.ico\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ handler }}", + "refId": "A" + } + ], + "title": "Requests under 500ms", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 47, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "cms_avg_anno_acc_per_doc{handler=\"/process\", job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\"}", + "instant": false, + "legendFormat": "/process", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "cms_avg_anno_acc_per_doc{handler=\"/process_bulk\", job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\"}", + "hide": false, + "legendFormat": "/process_bulk", + "range": true, + "refId": "B" + } + ], + "title": "Average Accuracy of Recognised Entities per Document", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 45, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "cms_avg_meta_anno_conf_per_doc{handler=\"/process\", job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\"}", + "instant": false, + "legendFormat": "/process", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "cms_avg_meta_anno_conf_per_doc{handler=\"/process_bulk\", job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\"}", + "hide": false, + "legendFormat": "/process_bulk", + "range": true, + "refId": "B" + } + ], + "title": "Average Confidence of Meta Annotations per Document", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "hidden", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "duration" + }, + "properties": [ + { + "id": "unit", + "value": "s" + }, + { + "id": "custom.axisPlacement", + "value": "left" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "size" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + }, + { + "id": "custom.axisPlacement", + "value": "right" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "ratio" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "binBps" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 24, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:146", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(http_request_duration_seconds_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval]) / rate(http_request_duration_seconds_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "duration", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(http_request_size_bytes_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval]) / rate(http_request_size_bytes_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "size", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "(rate(http_request_size_bytes_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval]) / rate(http_request_size_bytes_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval])) / (rate(http_request_duration_seconds_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval]) / rate(http_request_duration_seconds_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "ratio", + "range": true, + "refId": "C" + } + ], + "title": "Request Size vs Duration (/process)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "hidden", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "duration" + }, + "properties": [ + { + "id": "unit", + "value": "s" + }, + { + "id": "custom.axisPlacement", + "value": "left" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "entities" + }, + "properties": [ + { + "id": "unit", + "value": "none" + }, + { + "id": "custom.axisPlacement", + "value": "right" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "ratio" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "cps" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 35, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:146", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(http_request_duration_seconds_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval]) / rate(http_request_duration_seconds_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "duration", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(cms_doc_annotations_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval]) / rate(cms_doc_annotations_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "entities", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "(rate(cms_doc_annotations_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval]) / rate(cms_doc_annotations_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval])) / (rate(http_request_duration_seconds_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval]) / rate(http_request_duration_seconds_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "ratio", + "range": true, + "refId": "C" + } + ], + "title": "Recognised Entities vs Duration (/process)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "hidden", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "duration" + }, + "properties": [ + { + "id": "unit", + "value": "s" + }, + { + "id": "custom.axisPlacement", + "value": "left" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "size" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + }, + { + "id": "custom.axisPlacement", + "value": "right" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "ratio" + }, + "properties": [ + { + "id": "unit", + "value": "binBps" + }, + { + "id": "custom.axisPlacement", + "value": "right" + } + ] + }, + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "ratio" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 25, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:146", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(http_request_duration_seconds_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval]) / rate(http_request_duration_seconds_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "duration", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(http_request_size_bytes_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval]) / rate(http_request_size_bytes_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "size", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "(rate(http_request_size_bytes_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval]) / rate(http_request_size_bytes_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval])) / (rate(http_request_duration_seconds_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval]) / rate(http_request_duration_seconds_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "ratio", + "range": true, + "refId": "C" + } + ], + "title": "Request Size vs Duration (/process_bulk)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "hidden", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "duration" + }, + "properties": [ + { + "id": "unit", + "value": "s" + }, + { + "id": "custom.axisPlacement", + "value": "left" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "entities" + }, + "properties": [ + { + "id": "unit", + "value": "none" + }, + { + "id": "custom.axisPlacement", + "value": "right" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "ratio" + }, + "properties": [ + { + "id": "unit", + "value": "cps" + }, + { + "id": "custom.axisPlacement", + "value": "right" + } + ] + }, + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "ratio" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 37, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:146", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(http_request_duration_seconds_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval]) / rate(http_request_duration_seconds_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "duration", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "rate(cms_doc_annotations_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval]) / rate(cms_doc_annotations_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "entities", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "(rate(cms_doc_annotations_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval]) / rate(cms_doc_annotations_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval])) / (rate(http_request_duration_seconds_sum{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval]) / rate(http_request_duration_seconds_count{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler=\"/process_bulk\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "ratio", + "range": true, + "refId": "C" + } + ], + "title": "Recognised Entities vs Duration (/process_bulk)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 48 + }, + "id": 15, + "links": [], + "options": { + "legend": { + "calcs": [ + "max", + "min" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:426", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, rate(http_request_duration_seconds_bucket{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler!~\"none|/favicon.ico\"}[$__rate_interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ handler }}", + "range": true, + "refId": "A" + } + ], + "title": "Request Duration (P50)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line+area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "red", + "value": 0 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 56 + }, + "id": 16, + "links": [], + "options": { + "legend": { + "calcs": [ + "max", + "min" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:426", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, rate(http_request_duration_seconds_bucket{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\", handler!~\"none|/favicon.ico\"}[$__rate_interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ handler }}", + "range": true, + "refId": "A" + } + ], + "title": "Request Duration (P90)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 64 + }, + "id": 8, + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:638", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "process_resident_memory_bytes{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "mem", + "range": true, + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 64 + }, + "id": 9, + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.3.2", + "targets": [ + { + "$$hashKey": "object:638", + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "builder", + "expr": "rate(process_cpu_seconds_total{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "cpu", + "range": true, + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 64 + }, + "id": 27, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A53D3B2E146C92D" + }, + "editorMode": "code", + "expr": "process_open_fds{job=\"cms_medcat-opcs4\", instance=\"medcat-opcs4:8000\"}", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Open FDS", + "type": "timeseries" + } + ], + "refresh": false, + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "OPCS-4 MedCAT Model", + "uid": "34LdJuuNz", + "version": 1, + "weekStart": "" +} diff --git a/docker/monitoring/prometheus/prometheus.yml b/docker/monitoring/prometheus/prometheus.yml index f61fea3..762153b 100644 --- a/docker/monitoring/prometheus/prometheus.yml +++ b/docker/monitoring/prometheus/prometheus.yml @@ -31,6 +31,12 @@ scrape_configs: - targets: - "medcat-icd10:8000" + - job_name: "cms_medcat-opcs4" + scrape_interval: 15s + static_configs: + - targets: + - "medcat-opcs4:8000" + - job_name: "cms_medcat-deid" scrape_interval: 15s static_configs: diff --git a/docker/nginx/etc/nginx/sites-enabled/medcat-opcs4 b/docker/nginx/etc/nginx/sites-enabled/medcat-opcs4 new file mode 100644 index 0000000..db0b425 --- /dev/null +++ b/docker/nginx/etc/nginx/sites-enabled/medcat-opcs4 @@ -0,0 +1,47 @@ +server { + listen 28181 ssl http2 default_server; + listen [::]:28181 ssl http2 default_server; + server_name localhost; + + add_header Strict-Transport-Security "max-age=31536000" always; + + ssl_session_cache shared:SSL:20m; + ssl_session_timeout 10m; + + ssl_protocols TLSv1.2; + ssl_prefer_server_ciphers on; + ssl_ciphers "ECDH+AESGCM:ECDH+AES256:ECDH+AES128:!ADH:!AECDH:!MD5;"; + + ssl_stapling on; + ssl_stapling_verify on; + resolver 8.8.8.8 8.8.4.4; + + ssl_certificate /etc/nginx/root-ca.pem; + ssl_certificate_key /etc/nginx/root-ca.key; + + access_log /var/log/nginx/access_medcat-opcs4.log; + error_log /var/log/nginx/error_medcat-opcs4.log; + + location / { + include cors.conf; + resolver 127.0.0.11 valid=30s; + set $backend "medcat-opcs4:8000"; + proxy_pass http://$backend; + proxy_set_header Host $host; + error_page 502 503 504 = @fallback; + } + + location /stream/ws { + include cors.conf; + resolver 127.0.0.11 valid=30s; + set $backend "medcat-opcs4:8000"; + proxy_pass http://$backend; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + } + + location @fallback { + return 503 "Service is temporarily unavailable. Please try again later."; + } +} diff --git a/tests/app/api/test_dependencies.py b/tests/app/api/test_dependencies.py index e9c21b2..33541f0 100644 --- a/tests/app/api/test_dependencies.py +++ b/tests/app/api/test_dependencies.py @@ -5,6 +5,7 @@ from app.config import Settings from app.model_services.medcat_model import MedCATModel from app.model_services.medcat_model_icd10 import MedCATModelIcd10 +from app.model_services.medcat_model_opcs4 import MedCATModelOpcs4 from app.model_services.medcat_model_umls import MedCATModelUmls from app.model_services.medcat_model_deid import MedCATModelDeIdentification from app.model_services.trf_model_deid import TransformersModelDeIdentification @@ -21,6 +22,11 @@ def test_medcat_icd10_dep(): assert isinstance(model_service_dep(), MedCATModelIcd10) +def test_medcat_opcs4_dep(): + model_service_dep = ModelServiceDep("medcat_opcs4", Settings()) + assert isinstance(model_service_dep(), MedCATModelOpcs4) + + def test_medcat_umls_dep(): model_service_dep = ModelServiceDep("medcat_umls", Settings()) assert isinstance(model_service_dep(), MedCATModelUmls) diff --git a/tests/app/conftest.py b/tests/app/conftest.py index 0f012f0..ee9c3c5 100644 --- a/tests/app/conftest.py +++ b/tests/app/conftest.py @@ -4,6 +4,7 @@ from app.config import Settings from app.model_services.medcat_model_snomed import MedCATModelSnomed from app.model_services.medcat_model_icd10 import MedCATModelIcd10 +from app.model_services.medcat_model_opcs4 import MedCATModelOpcs4 from app.model_services.medcat_model_umls import MedCATModelUmls from app.model_services.medcat_model_deid import MedCATModelDeIdentification from app.model_services.trf_model_deid import TransformersModelDeIdentification @@ -60,6 +61,14 @@ def medcat_icd10_model(): return MedCATModelIcd10(config, MODEL_PARENT_DIR, True) +@pytest.fixture(scope="function") +def medcat_opcs4_model(): + config = Settings() + config.BASE_MODEL_FILE = "opcs4_model.zip" + config.TYPE_UNIQUE_ID_WHITELIST = "T-9,T-11,T-18,T-39,T-40,T-45" + return MedCATModelOpcs4(config, MODEL_PARENT_DIR, True) + + @pytest.fixture(scope="function") def medcat_umls_model(): config = Settings() diff --git a/tests/app/model_services/test_medcat_model_opcs4.py b/tests/app/model_services/test_medcat_model_opcs4.py new file mode 100644 index 0000000..12b9d0d --- /dev/null +++ b/tests/app/model_services/test_medcat_model_opcs4.py @@ -0,0 +1,135 @@ +import os +import tempfile +import pytest +from unittest.mock import Mock +from tests.app.conftest import MODEL_PARENT_DIR +from medcat.cat import CAT +from app import __version__ +from app.domain import ModelType +from app.model_services.medcat_model_opcs4 import MedCATModelOpcs4 + + +def test_model_name(medcat_opcs4_model): + assert medcat_opcs4_model.model_name == "OPCS-4 MedCAT model" + + +def test_api_version(medcat_opcs4_model): + assert medcat_opcs4_model.api_version == __version__ + + +def test_from_model(medcat_opcs4_model): + new_model_service = medcat_opcs4_model.from_model(medcat_opcs4_model.model) + assert isinstance(new_model_service, MedCATModelOpcs4) + assert new_model_service.model == medcat_opcs4_model.model + + +def test_get_records_from_doc(medcat_opcs4_model): + records = medcat_opcs4_model.get_records_from_doc({ + "entities": + { + "0": { + "pretty_name": "pretty_name", + "cui": "cui", + "types": ["type"], + "opcs4": [{"code": "code", "name": "name"}], + "athena_ids": [{"name": "name_1", "code": "code_1"}, {"name": "name_2", "code": "code_2"}], + "acc": 1.0, + "meta_anns": {} + } + } + }) + assert len(records) == 1 + assert records[0]["label_name"] == "name" + assert records[0]["cui"] == "cui" + assert records[0]["label_id"] == "code" + assert records[0]["categories"] == ["type"] + assert records[0]["athena_ids"] == ["code_1", "code_2"] + assert records[0]["accuracy"] == 1.0 + assert records[0]["meta_anns"] == {} + + +@pytest.mark.skipif( + not os.path.exists(os.path.join(MODEL_PARENT_DIR, "opcs4_model.zip")), + reason="requires the model file to be present in the resources folder", +) +def test_init_model_with_no_tui_filter(medcat_opcs4_model): + original = MedCATModelOpcs4.load_model(os.path.join(MODEL_PARENT_DIR, "opcs4_model.zip")) + medcat_opcs4_model._whitelisted_tuis = set([""]) + medcat_opcs4_model.init_model() + assert medcat_opcs4_model.model is not None + assert medcat_opcs4_model.model.cdb.config.linking.filters.get("cuis") == original.cdb.config.linking.filters.get("cuis") + + +@pytest.mark.skipif( + not os.path.exists(os.path.join(MODEL_PARENT_DIR, "opcs4_model.zip")), + reason="requires the model file to be present in the resources folder", +) +def test_init_model(medcat_opcs4_model): + medcat_opcs4_model.init_model() + target_tuis = medcat_opcs4_model._config.TYPE_UNIQUE_ID_WHITELIST.split(",") + target_cuis = {cui for tui in target_tuis for cui in medcat_opcs4_model.model.cdb.addl_info.get("type_id2cuis").get(tui, {})} + assert medcat_opcs4_model.model is not None + assert medcat_opcs4_model.model.cdb.config.linking.filters.get("cuis") == target_cuis + + +@pytest.mark.skipif( + not os.path.exists(os.path.join(MODEL_PARENT_DIR, "opcs4_model.zip")), + reason="requires the model file to be present in the resources folder", +) +def test_load_model(medcat_opcs4_model): + cat = MedCATModelOpcs4.load_model(os.path.join(MODEL_PARENT_DIR, "opcs4_model.zip")) + assert type(cat) is CAT + + +@pytest.mark.skipif( + not os.path.exists(os.path.join(MODEL_PARENT_DIR, "opcs4_model.zip")), + reason="requires the model file to be present in the resources folder", +) +def test_info(medcat_opcs4_model): + medcat_opcs4_model.init_model() + model_card = medcat_opcs4_model.info() + assert type(model_card.api_version) is str + assert type(model_card.model_description) is str + assert model_card.model_type == ModelType.MEDCAT_OPCS4 + + +@pytest.mark.skipif( + not os.path.exists(os.path.join(MODEL_PARENT_DIR, "opcs4_model.zip")), + reason="requires the model file to be present in the resources folder", +) +def test_annotate(medcat_opcs4_model): + medcat_opcs4_model.init_model() + annotations = medcat_opcs4_model.annotate("Spinal tap") + assert len(annotations) == 1 + assert type(annotations[0]["label_name"]) is str + assert annotations[0].start == 0 + assert annotations[0].end == 10 + assert annotations[0].accuracy > 0 + + +@pytest.mark.skipif( + not os.path.exists(os.path.join(MODEL_PARENT_DIR, "opcs4_model.zip")), + reason="requires the model file to be present in the resources folder", +) +def test_train_supervised(medcat_opcs4_model): + medcat_opcs4_model.init_model() + medcat_opcs4_model._config.REDEPLOY_TRAINED_MODEL = "false" + medcat_opcs4_model._config.SKIP_SAVE_MODEL = "true" + medcat_opcs4_model._supervised_trainer = Mock() + with tempfile.TemporaryFile("r+") as f: + medcat_opcs4_model.train_supervised(f, 1, 1, "training_id", "input_file_name") + medcat_opcs4_model._supervised_trainer.train.assert_called() + + +@pytest.mark.skipif( + not os.path.exists(os.path.join(MODEL_PARENT_DIR, "opcs4_model.zip")), + reason="requires the model file to be present in the resources folder", +) +def test_train_unsupervised(medcat_opcs4_model): + medcat_opcs4_model.init_model() + medcat_opcs4_model._config.REDEPLOY_TRAINED_MODEL = "false" + medcat_opcs4_model._config.SKIP_SAVE_MODEL = "true" + medcat_opcs4_model._unsupervised_trainer = Mock() + with tempfile.TemporaryFile("r+") as f: + medcat_opcs4_model.train_unsupervised(f, 1, 1, "training_id", "input_file_name") + medcat_opcs4_model._unsupervised_trainer.train.assert_called() diff --git a/tests/app/test_registry.py b/tests/app/test_registry.py index 69d8c75..60fc5fe 100644 --- a/tests/app/test_registry.py +++ b/tests/app/test_registry.py @@ -4,6 +4,7 @@ from app.model_services.medcat_model_snomed import MedCATModelSnomed from app.model_services.medcat_model_umls import MedCATModelUmls from app.model_services.medcat_model_icd10 import MedCATModelIcd10 +from app.model_services.medcat_model_opcs4 import MedCATModelOpcs4 from app.model_services.medcat_model_deid import MedCATModelDeIdentification from app.model_services.huggingface_ner_model import HuggingFaceNerModel from app.model_services.huggingface_llm_model import HuggingFaceLlmModel @@ -13,8 +14,9 @@ def test_model_registry(): assert model_service_registry[ModelType.MEDCAT_SNOMED.value] == MedCATModelSnomed assert model_service_registry[ModelType.MEDCAT_UMLS.value] == MedCATModelUmls assert model_service_registry[ModelType.MEDCAT_ICD10.value] == MedCATModelIcd10 + assert model_service_registry[ModelType.MEDCAT_OPCS4.value] == MedCATModelOpcs4 assert model_service_registry[ModelType.MEDCAT_DEID.value] == MedCATModelDeIdentification assert model_service_registry[ModelType.ANONCAT.value] == MedCATModelDeIdentification assert model_service_registry[ModelType.TRANSFORMERS_DEID.value] == TransformersModelDeIdentification - assert model_service_registry[ModelType.HUGGINGFACE_NER] == HuggingFaceNerModel - assert model_service_registry[ModelType.HUGGINGFACE_LLM] == HuggingFaceLlmModel + assert model_service_registry[ModelType.HUGGINGFACE_NER.value] == HuggingFaceNerModel + assert model_service_registry[ModelType.HUGGINGFACE_LLM.value] == HuggingFaceLlmModel diff --git a/tests/app/test_utils.py b/tests/app/test_utils.py index c644aca..f3d82ce 100644 --- a/tests/app/test_utils.py +++ b/tests/app/test_utils.py @@ -40,6 +40,7 @@ def test_get_code_base_uri(): assert get_code_base_uri("SNOMED model") == "http://snomed.info/id" assert get_code_base_uri("ICD-10 model") == "https://icdcodelookup.com/icd-10/codes" + assert get_code_base_uri("OPCS-4 model") == "https://nhsengland.kahootz.com/connect.ti/t_c_home/view?objectId=14270896#14270896" assert get_code_base_uri("UMLS model") == "https://uts.nlm.nih.gov/uts/umls/concept" @@ -95,7 +96,7 @@ def test_json_normalize_medcat_entities(): medcat_entities = json.load(f) df = json_normalize_medcat_entities(medcat_entities) assert len(df) == 25 - assert df.columns.tolist() == ["pretty_name", "cui", "type_ids", "types", "source_value", "detected_name", "acc", "context_similarity", "start", "end", "icd10", "ontologies", "snomed", "id", "meta_anns.Presence.value", "meta_anns.Presence.confidence", "meta_anns.Presence.name", "meta_anns.Subject.value", "meta_anns.Subject.confidence", "meta_anns.Subject.name", "meta_anns.Time.value", "meta_anns.Time.confidence", "meta_anns.Time.name"] + assert df.columns.tolist() == ["pretty_name", "cui", "type_ids", "types", "source_value", "detected_name", "acc", "context_similarity", "start", "end", "icd10", "opcs4", "ontologies", "snomed", "id", "meta_anns.Presence.value", "meta_anns.Presence.confidence", "meta_anns.Presence.name", "meta_anns.Subject.value", "meta_anns.Subject.confidence", "meta_anns.Subject.name", "meta_anns.Time.value", "meta_anns.Time.confidence", "meta_anns.Time.name"] def test_json_normalize_trainer_export(): diff --git a/tests/resources/fixture/medcat_entities.json b/tests/resources/fixture/medcat_entities.json index 977e6c4..dd9d3f5 100644 --- a/tests/resources/fixture/medcat_entities.json +++ b/tests/resources/fixture/medcat_entities.json @@ -15,6 +15,7 @@ "name": "Intracerebral haemorrhage, unspecified", "code": "I61.9" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 1, @@ -51,6 +52,7 @@ "name": "Other and unspecified disturbances of skin sensation", "code": "R20.8" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 8, @@ -87,6 +89,7 @@ "name": "Malaise and fatigue", "code": "R53" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 11, @@ -123,6 +126,7 @@ "name": "Malaise and fatigue", "code": "R53" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 18, @@ -159,6 +163,7 @@ "name": "Dizziness and giddiness", "code": "R42" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 20, @@ -195,6 +200,7 @@ "name": "Other and unspecified disturbances of skin sensation", "code": "R20.8" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 27, @@ -231,6 +237,7 @@ "name": "Malaise and fatigue", "code": "R53" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 35, @@ -267,6 +274,7 @@ "name": "Dizziness and giddiness", "code": "R42" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 37, @@ -303,6 +311,7 @@ "name": "Dizziness and giddiness", "code": "R42" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 39, @@ -339,6 +348,7 @@ "name": "Coma, unspecified", "code": "R40.2" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 42, @@ -372,6 +382,7 @@ "start": 656, "end": 663, "icd10": [], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 50, @@ -408,6 +419,7 @@ "name": "Syncope and collapse", "code": "R55" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 52, @@ -441,6 +453,7 @@ "start": 721, "end": 735, "icd10": [], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 53, @@ -477,6 +490,7 @@ "name": "Disorders of calcium metabolism", "code": "E83.5" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 54, @@ -510,6 +524,7 @@ "start": 783, "end": 796, "icd10": [], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 56, @@ -546,6 +561,7 @@ "name": "Disorders of calcium metabolism", "code": "E83.5" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 59, @@ -582,6 +598,7 @@ "name": "Endocarditis, valve unspecified", "code": "I38" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 87, @@ -618,6 +635,7 @@ "name": "Essential (primary) hypertension", "code": "I10" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 91, @@ -654,6 +672,7 @@ "name": "Rheumatic fever without mention of heart involvement", "code": "I00" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 93, @@ -690,6 +709,7 @@ "name": "Heart disease, unspecified", "code": "I51.9" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 96, @@ -726,6 +746,7 @@ "name": "Chronic obstructive pulmonary disease, unspecified", "code": "J44.9" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 99, @@ -762,6 +783,7 @@ "name": "Mental and behavioural disorders due to use of alcohol", "code": "F10.1" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 100, @@ -798,6 +820,7 @@ "name": "Polyp of colon", "code": "K63.5" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 102, @@ -834,6 +857,7 @@ "name": "Chronic ischaemic heart disease, unspecified", "code": "I25.9" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 104, @@ -870,6 +894,7 @@ "name": "Heart failure, unspecified", "code": "I50.9" }], + "opcs4": [], "ontologies": ["SNOMED"], "snomed": [], "id": 105, From b4ae76b7f831a3994e094d03f5d28dd5216db1c3 Mon Sep 17 00:00:00 2001 From: Phoevos Kalemkeris Date: Fri, 29 Aug 2025 12:17:54 +0100 Subject: [PATCH 2/2] fix: Update OPCS-4 URL Signed-off-by: Phoevos Kalemkeris --- app/utils.py | 2 +- tests/app/test_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/utils.py b/app/utils.py index df27d51..245da97 100644 --- a/app/utils.py +++ b/app/utils.py @@ -63,7 +63,7 @@ def get_code_base_uri(model_name: str) -> Optional[str]: code_base_uris = { CodeType.SNOMED.value: "http://snomed.info/id", CodeType.ICD10.value: "https://icdcodelookup.com/icd-10/codes", - CodeType.OPCS4.value: "https://nhsengland.kahootz.com/connect.ti/t_c_home/view?objectId=14270896#14270896", + CodeType.OPCS4.value: "https://nhsengland.kahootz.com/t_c_home/view?objectID=14270896", CodeType.UMLS.value: "https://uts.nlm.nih.gov/uts/umls/concept", } for code_name, base_uri in code_base_uris.items(): diff --git a/tests/app/test_utils.py b/tests/app/test_utils.py index f3d82ce..4519350 100644 --- a/tests/app/test_utils.py +++ b/tests/app/test_utils.py @@ -40,7 +40,7 @@ def test_get_code_base_uri(): assert get_code_base_uri("SNOMED model") == "http://snomed.info/id" assert get_code_base_uri("ICD-10 model") == "https://icdcodelookup.com/icd-10/codes" - assert get_code_base_uri("OPCS-4 model") == "https://nhsengland.kahootz.com/connect.ti/t_c_home/view?objectId=14270896#14270896" + assert get_code_base_uri("OPCS-4 model") == "https://nhsengland.kahootz.com/t_c_home/view?objectID=14270896" assert get_code_base_uri("UMLS model") == "https://uts.nlm.nih.gov/uts/umls/concept"