From 13b6199d72739f5411e86edebccbedca0718d419 Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 1 Oct 2025 16:05:20 +0100 Subject: [PATCH 01/13] CU-869apb8ju: Do a first-time dynamic mapping of additional ontologies. --- medcat-v2/medcat/cat.py | 12 +++++++++++- medcat-v2/medcat/config/config.py | 8 ++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/medcat-v2/medcat/cat.py b/medcat-v2/medcat/cat.py index 8b723648e..c6ae17c33 100644 --- a/medcat-v2/medcat/cat.py +++ b/medcat-v2/medcat/cat.py @@ -532,7 +532,17 @@ def _get_entity(self, ent: MutableEntity, # addons: out_dict.update(self.get_addon_output(ent)) # type: ignore # other ontologies - if self.config.general.map_to_other_ontologies: + other_onts = self.config.general.map_to_other_ontologies + if other_onts: + if other_onts == "auto": + self.config.general.map_to_other_ontologies = other_onts = [ + key.removeprefix("cui2") + for key in self.cdb.addl_info + if key.startswith("cui2") + ] + logger.info( + "Automatically finding ontologies to map to: %s", + other_onts) for ont in self.config.general.map_to_other_ontologies: if ont in out_dict: logger.warning( diff --git a/medcat-v2/medcat/config/config.py b/medcat-v2/medcat/config/config.py index 5c8c4261d..cd9685757 100644 --- a/medcat-v2/medcat/config/config.py +++ b/medcat-v2/medcat/config/config.py @@ -1,6 +1,6 @@ import os from typing import (Optional, Iterator, Iterable, TypeVar, cast, Type, Any, - Literal) + Literal, Union) from typing import Protocol, runtime_checkable from typing_extensions import Self import logging @@ -252,13 +252,17 @@ class General(SerialisableBaseModel): map_cui_to_group: bool = False """If the cdb.addl_info['cui2group'] is provided and this option enabled, each CUI will be mapped to the group""" - map_to_other_ontologies: list[str] = ["opcs4", "icd10"] + map_to_other_ontologies: Union[Literal["auto"], list[str]] = "auto" """Which other ontologies to map to if possible. This will force medcat to include mapping for other ontologies in its outputs. It will use the mappings in `cdb.addl_info["cui2"]` are present. + If set to "auto" (or missign), the value will be inferred from available + data at first init time. That is to say, it'll map to all ontologies + available. + NB! This will only work if the `cdb.addl_info["cui2"]` exists. Otherwise, no mapping will be done. From 716b89694d90a168c48ac6b7cef59acb1b3ccd02 Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 1 Oct 2025 16:10:49 +0100 Subject: [PATCH 02/13] CU-869apb8ju: Move the other ontology getting to a separate method --- medcat-v2/medcat/cat.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/medcat-v2/medcat/cat.py b/medcat-v2/medcat/cat.py index c6ae17c33..4aade5e04 100644 --- a/medcat-v2/medcat/cat.py +++ b/medcat-v2/medcat/cat.py @@ -535,15 +535,8 @@ def _get_entity(self, ent: MutableEntity, other_onts = self.config.general.map_to_other_ontologies if other_onts: if other_onts == "auto": - self.config.general.map_to_other_ontologies = other_onts = [ - key.removeprefix("cui2") - for key in self.cdb.addl_info - if key.startswith("cui2") - ] - logger.info( - "Automatically finding ontologies to map to: %s", - other_onts) - for ont in self.config.general.map_to_other_ontologies: + other_onts = self._set_and_get_mapped_ontologies() + for ont in other_onts: if ont in out_dict: logger.warning( "Trying to map to ontology '%s', but it already " @@ -563,6 +556,18 @@ def _get_entity(self, ent: MutableEntity, out_dict[ont] = ont_values # type: ignore return out_dict + def _set_and_get_mapped_ontologies(self) -> list[str]: + other_onts = self.config.general.map_to_other_ontologies + if other_onts == "auto": + self.config.general.map_to_other_ontologies = other_onts = [ + key.removeprefix("cui2") + for key in self.cdb.addl_info + if key.startswith("cui2") + ] + logger.info( + "Automatically finding ontologies to map to: %s", other_onts) + return other_onts + def get_addon_output(self, ent: MutableEntity) -> dict[str, dict]: """Get the addon output for the entity. From 267efe5d82fa62d8edcc678a355e39df48f9074e Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 1 Oct 2025 16:13:20 +0100 Subject: [PATCH 03/13] CU-869apb8ju: Do ontology mappings at model pack load time if needed --- medcat-v2/medcat/cat.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/medcat-v2/medcat/cat.py b/medcat-v2/medcat/cat.py index 4aade5e04..8415e5f3e 100644 --- a/medcat-v2/medcat/cat.py +++ b/medcat-v2/medcat/cat.py @@ -824,6 +824,8 @@ def load_model_pack(cls, model_pack_path: str, # will be dealt with upon pipeline creation automatically if not isinstance(cat, CAT): raise ValueError(f"Unable to load CAT. Got: {cat}") + # reset mapped ontologies at load time but after CDB load + cat._set_and_get_mapped_ontologies() return cat @classmethod From 1e3876e3c7e33d475381f5c9f3f238209972ecbe Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 1 Oct 2025 16:20:19 +0100 Subject: [PATCH 04/13] CU-869apb8ju: Make inferred ontologies only use non-empty ontology mappings --- medcat-v2/medcat/cat.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/medcat-v2/medcat/cat.py b/medcat-v2/medcat/cat.py index 8415e5f3e..f97de5347 100644 --- a/medcat-v2/medcat/cat.py +++ b/medcat-v2/medcat/cat.py @@ -556,13 +556,23 @@ def _get_entity(self, ent: MutableEntity, out_dict[ont] = ont_values # type: ignore return out_dict - def _set_and_get_mapped_ontologies(self) -> list[str]: + def _set_and_get_mapped_ontologies( + self, + ignore_list: list[str] = ["ontologies", "original_names", + "description", "group"], + ignore_empty: bool = True) -> list[str]: other_onts = self.config.general.map_to_other_ontologies if other_onts == "auto": self.config.general.map_to_other_ontologies = other_onts = [ - key.removeprefix("cui2") - for key in self.cdb.addl_info - if key.startswith("cui2") + npkey + for key, val in self.cdb.addl_info.items() + if key.startswith("cui2") and + # ignore empty if required / expected + (not ignore_empty or val) and + # these are things that get auto-populated in addl_info + # but don't generally contain ontology mapping information + # directly + (npkey := key.removeprefix("cui2")) not in ignore_list ] logger.info( "Automatically finding ontologies to map to: %s", other_onts) From 1c78a719de4e3bce54e0dfde0db2fd068cfa806e Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 1 Oct 2025 16:20:35 +0100 Subject: [PATCH 05/13] CU-869apb8ju: Add a few simple tests for ontology mappings --- medcat-v2/tests/test_cat.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/medcat-v2/tests/test_cat.py b/medcat-v2/tests/test_cat.py index 9ada8deae..250d23ea3 100644 --- a/medcat-v2/tests/test_cat.py +++ b/medcat-v2/tests/test_cat.py @@ -133,6 +133,16 @@ def test_can_merge_config(self): model.config.general.nlp.modelname, self.spacy_model_name) +class OntologiesMapTests(TrainedModelTests): + + def test_does_not_have_auto(self): + self.assertNotEqual(self.model.config.general.map_to_other_ontologies, + "auto") + + def test_is_empty(self): + self.assertFalse(self.model.config.general.map_to_other_ontologies) + + class InferenceFromLoadedTests(TrainedModelTests): def test_can_load_model(self): From 49808e8332beea731295135e9cc7e127d92c1076 Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 1 Oct 2025 16:25:20 +0100 Subject: [PATCH 06/13] CU-869apb8ju: Add a few more tests for automatic ontology mapping process --- medcat-v2/tests/test_cat.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/medcat-v2/tests/test_cat.py b/medcat-v2/tests/test_cat.py index 250d23ea3..e721fd035 100644 --- a/medcat-v2/tests/test_cat.py +++ b/medcat-v2/tests/test_cat.py @@ -143,6 +143,31 @@ def test_is_empty(self): self.assertFalse(self.model.config.general.map_to_other_ontologies) +class OntologiesMapWithOntologiesTests(TrainedModelTests): + MY_ONT_NAME = "My_Ontology" + MY_ONT_MAPPING = { + # mapping doens't matter here, really + "ABC": "BBC" + } + + @classmethod + def setUpClass(cls): + super().setUpClass() + # add "mapping" + cls.model.cdb.addl_info[f"cui2{cls.MY_ONT_NAME}"] = cls.MY_ONT_MAPPING + # set to auto + cls.model.config.general.map_to_other_ontologies = "auto" + # redo process + cls.model._set_and_get_mapped_ontologies() + + def test_has_my_ontology(self): + self.assertEqual( + len(self.model.config.general.map_to_other_ontologies), 1) + self.assertEqual( + self.model.config.general.map_to_other_ontologies, + [self.MY_ONT_NAME]) + + class InferenceFromLoadedTests(TrainedModelTests): def test_can_load_model(self): From 47f13b059a6761427b22c71ddee51d4a40e7023f Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 2 Oct 2025 11:45:01 +0100 Subject: [PATCH 07/13] CU-869apb8ju: Fix typo --- medcat-v2/medcat/config/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/medcat-v2/medcat/config/config.py b/medcat-v2/medcat/config/config.py index cd9685757..8b75a1caa 100644 --- a/medcat-v2/medcat/config/config.py +++ b/medcat-v2/medcat/config/config.py @@ -259,7 +259,7 @@ class General(SerialisableBaseModel): its outputs. It will use the mappings in `cdb.addl_info["cui2"]` are present. - If set to "auto" (or missign), the value will be inferred from available + If set to "auto" (or missing), the value will be inferred from available data at first init time. That is to say, it'll map to all ontologies available. From 8f9d13edba5d817b79bfc94523aee868efe7bee0 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 2 Oct 2025 11:45:57 +0100 Subject: [PATCH 08/13] CU-869apb8ju: Simplify logic a little bit --- medcat-v2/medcat/cat.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/medcat-v2/medcat/cat.py b/medcat-v2/medcat/cat.py index f97de5347..955b60d95 100644 --- a/medcat-v2/medcat/cat.py +++ b/medcat-v2/medcat/cat.py @@ -532,10 +532,8 @@ def _get_entity(self, ent: MutableEntity, # addons: out_dict.update(self.get_addon_output(ent)) # type: ignore # other ontologies - other_onts = self.config.general.map_to_other_ontologies + other_onts = self._set_and_get_mapped_ontologies() if other_onts: - if other_onts == "auto": - other_onts = self._set_and_get_mapped_ontologies() for ont in other_onts: if ont in out_dict: logger.warning( From b124fb27193b2fe11f5a0e115dceb5c6a6142d14 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 2 Oct 2025 11:54:18 +0100 Subject: [PATCH 09/13] CU-869apb8ju: List to set for ignore options --- medcat-v2/medcat/cat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/medcat-v2/medcat/cat.py b/medcat-v2/medcat/cat.py index 955b60d95..1b48afa77 100644 --- a/medcat-v2/medcat/cat.py +++ b/medcat-v2/medcat/cat.py @@ -556,8 +556,8 @@ def _get_entity(self, ent: MutableEntity, def _set_and_get_mapped_ontologies( self, - ignore_list: list[str] = ["ontologies", "original_names", - "description", "group"], + ignore_set: set[str] = {"ontologies", "original_names", + "description", "group"}, ignore_empty: bool = True) -> list[str]: other_onts = self.config.general.map_to_other_ontologies if other_onts == "auto": @@ -570,7 +570,7 @@ def _set_and_get_mapped_ontologies( # these are things that get auto-populated in addl_info # but don't generally contain ontology mapping information # directly - (npkey := key.removeprefix("cui2")) not in ignore_list + (npkey := key.removeprefix("cui2")) not in ignore_set ] logger.info( "Automatically finding ontologies to map to: %s", other_onts) From ff0002a59613f2123a5594615be2bfc51e95dd0b Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 2 Oct 2025 11:56:36 +0100 Subject: [PATCH 10/13] CU-869apb8ju: Slight refactor in tests --- medcat-v2/tests/test_cat.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/medcat-v2/tests/test_cat.py b/medcat-v2/tests/test_cat.py index e721fd035..bc5a378ac 100644 --- a/medcat-v2/tests/test_cat.py +++ b/medcat-v2/tests/test_cat.py @@ -151,15 +151,19 @@ class OntologiesMapWithOntologiesTests(TrainedModelTests): } @classmethod - def setUpClass(cls): - super().setUpClass() - # add "mapping" - cls.model.cdb.addl_info[f"cui2{cls.MY_ONT_NAME}"] = cls.MY_ONT_MAPPING + def reset_mappings(cls): # set to auto cls.model.config.general.map_to_other_ontologies = "auto" # redo process cls.model._set_and_get_mapped_ontologies() + @classmethod + def setUpClass(cls): + super().setUpClass() + # add "mapping" + cls.model.cdb.addl_info[f"cui2{cls.MY_ONT_NAME}"] = cls.MY_ONT_MAPPING + cls.reset_mappings() + def test_has_my_ontology(self): self.assertEqual( len(self.model.config.general.map_to_other_ontologies), 1) From 777999f3904568de4ac13f38c53756dcce2ebfd9 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 2 Oct 2025 12:10:07 +0100 Subject: [PATCH 11/13] CU-869apb8ju: Slight further refactor in tests --- medcat-v2/tests/test_cat.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/medcat-v2/tests/test_cat.py b/medcat-v2/tests/test_cat.py index bc5a378ac..253d57c37 100644 --- a/medcat-v2/tests/test_cat.py +++ b/medcat-v2/tests/test_cat.py @@ -145,6 +145,7 @@ def test_is_empty(self): class OntologiesMapWithOntologiesTests(TrainedModelTests): MY_ONT_NAME = "My_Ontology" + EXP_GET = [MY_ONT_NAME] MY_ONT_MAPPING = { # mapping doens't matter here, really "ABC": "BBC" @@ -164,12 +165,13 @@ def setUpClass(cls): cls.model.cdb.addl_info[f"cui2{cls.MY_ONT_NAME}"] = cls.MY_ONT_MAPPING cls.reset_mappings() - def test_has_my_ontology(self): + def test_has_correct_results(self): self.assertEqual( - len(self.model.config.general.map_to_other_ontologies), 1) + len(self.model.config.general.map_to_other_ontologies), + len(self.EXP_GET)) self.assertEqual( self.model.config.general.map_to_other_ontologies, - [self.MY_ONT_NAME]) + self.EXP_GET) class InferenceFromLoadedTests(TrainedModelTests): From 339f1a8a655660d0ea114780a34fe11547095a9f Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 2 Oct 2025 12:18:54 +0100 Subject: [PATCH 12/13] CU-869apb8ju: Slight further refactor in tests (again) --- medcat-v2/tests/test_cat.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/medcat-v2/tests/test_cat.py b/medcat-v2/tests/test_cat.py index 253d57c37..513b24dcd 100644 --- a/medcat-v2/tests/test_cat.py +++ b/medcat-v2/tests/test_cat.py @@ -166,12 +166,9 @@ def setUpClass(cls): cls.reset_mappings() def test_has_correct_results(self): - self.assertEqual( - len(self.model.config.general.map_to_other_ontologies), - len(self.EXP_GET)) - self.assertEqual( - self.model.config.general.map_to_other_ontologies, - self.EXP_GET) + got = sorted(self.model.config.general.map_to_other_ontologies) + self.assertEqual(len(got), len(self.EXP_GET)) + self.assertEqual(got, self.EXP_GET) class InferenceFromLoadedTests(TrainedModelTests): From fee860fda9a4e4c67b587ec9e57ccdda09a03409 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 2 Oct 2025 12:19:18 +0100 Subject: [PATCH 13/13] CU-869apb8ju: Aff a few more tests for other use cases of ontology mapping --- medcat-v2/tests/test_cat.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/medcat-v2/tests/test_cat.py b/medcat-v2/tests/test_cat.py index 513b24dcd..ff518f21a 100644 --- a/medcat-v2/tests/test_cat.py +++ b/medcat-v2/tests/test_cat.py @@ -171,6 +171,40 @@ def test_has_correct_results(self): self.assertEqual(got, self.EXP_GET) +class OntologiesMapWithOntologiesAndNoIgnoresTests( + OntologiesMapWithOntologiesTests): + EXTRA_ONTS = ["original_names"] + + @classmethod + def reset_mappings(cls): + # set to auto + cls.model.config.general.map_to_other_ontologies = "auto" + # redo process + cls.model._set_and_get_mapped_ontologies(ignore_set=set()) + + @classmethod + def setUpClass(cls): + super().setUpClass() + # I need to redefine for specific class + # instead of changing instance in base class + cls.EXP_GET = OntologiesMapWithOntologiesTests.EXP_GET.copy() + cls.EXP_GET.extend(cls.EXTRA_ONTS) + cls.EXP_GET.sort() + cls.reset_mappings() + + +class OntologiesMapWithOntologiesAndAllowEmpty( + OntologiesMapWithOntologiesAndNoIgnoresTests): + EXTRA_ONTS = ["icd10", "opcs4"] + + @classmethod + def reset_mappings(cls): + # set to auto + cls.model.config.general.map_to_other_ontologies = "auto" + # redo process + cls.model._set_and_get_mapped_ontologies(ignore_empty=False) + + class InferenceFromLoadedTests(TrainedModelTests): def test_can_load_model(self):