From cff9d6114dc080dda31096a1a6b14ec9dcaab120 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 25 Sep 2025 16:04:39 +0100 Subject: [PATCH 1/8] CU-869aknekf: Remove simple_hash config option. It's an unused relic from v1 --- medcat-v2/medcat/config/config.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/medcat-v2/medcat/config/config.py b/medcat-v2/medcat/config/config.py index 6f21bad0c..150c8c352 100644 --- a/medcat-v2/medcat/config/config.py +++ b/medcat-v2/medcat/config/config.py @@ -252,11 +252,6 @@ class General(SerialisableBaseModel): map_cui_to_group: bool = False """If the cdb.addl_info['cui2group'] is provided and this option enabled, each CUI will be mapped to the group""" - simple_hash: bool = False - """Whether to use a simple hash. - - NOTE: While using a simple hash is faster at save time, it is less - reliable due to not taking into account all the details of the changes.""" class Config: extra = 'allow' From d2fa14053b65ae7181ca95427e95381972af800a Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 25 Sep 2025 16:10:12 +0100 Subject: [PATCH 2/8] CU-869aknekf: Add config option for mapping to other ontologies --- medcat-v2/medcat/config/config.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/medcat-v2/medcat/config/config.py b/medcat-v2/medcat/config/config.py index 150c8c352..5c8c4261d 100644 --- a/medcat-v2/medcat/config/config.py +++ b/medcat-v2/medcat/config/config.py @@ -252,6 +252,17 @@ class General(SerialisableBaseModel): map_cui_to_group: bool = False """If the cdb.addl_info['cui2group'] is provided and this option enabled, each CUI will be mapped to the group""" + map_to_other_ontologies: list[str] = ["opcs4", "icd10"] + """Which other ontologies to map to if possible. + + This will force medcat to include mapping for other ontologies in + its outputs. It will use the mappings in `cdb.addl_info["cui2"]` + are present. + + NB! + This will only work if the `cdb.addl_info["cui2"]` exists. + Otherwise, no mapping will be done. + """ class Config: extra = 'allow' From e2c2ee8c39f40de2999d849ee1ab7a4c47131e23 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 25 Sep 2025 16:23:16 +0100 Subject: [PATCH 3/8] CU-869aknekf: Implement mapping to other ontologies --- medcat-v2/medcat/cat.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/medcat-v2/medcat/cat.py b/medcat-v2/medcat/cat.py index d89eafcac..ce4faf07d 100644 --- a/medcat-v2/medcat/cat.py +++ b/medcat-v2/medcat/cat.py @@ -539,6 +539,26 @@ def _get_entity(self, ent: MutableEntity, } # addons: out_dict.update(self.get_addon_output(ent)) # type: ignore + # other ontologies + if self.config.general.map_to_other_ontologies: + for ont in self.config.general.map_to_other_ontologies: + if ont in out_dict: + logger.warning( + "Trying to map to ontology '%s', but it already " + "exists in the out dict, so unable to add it. " + "If this is for an actual ontology that shares a " + "name with something else, cosider renaming the " + "mapping in `cdb.addl_info`") + continue + addl_info_name = f"cui2{ont}" + if addl_info_name not in self.cdb.addl_info: + logger.debug( + "Trying to map to ontology '%s' but it is not set in " + "addl_info so unable to do so", ont) + continue + ont_map = self.cdb.addl_info[addl_info_name] + ont_values = ont_map.get(cui, []) + out_dict[ont] = ont_values return out_dict def get_addon_output(self, ent: MutableEntity) -> dict[str, dict]: From 2e6929b3c4aee6432327912b882e07768040ecc2 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 25 Sep 2025 16:23:50 +0100 Subject: [PATCH 4/8] CU-869aknekf: Fix typing issue with ontology mappings --- medcat-v2/medcat/cat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/medcat-v2/medcat/cat.py b/medcat-v2/medcat/cat.py index ce4faf07d..cc02ce341 100644 --- a/medcat-v2/medcat/cat.py +++ b/medcat-v2/medcat/cat.py @@ -558,7 +558,7 @@ def _get_entity(self, ent: MutableEntity, continue ont_map = self.cdb.addl_info[addl_info_name] ont_values = ont_map.get(cui, []) - out_dict[ont] = ont_values + out_dict[ont] = ont_values # type: ignore return out_dict def get_addon_output(self, ent: MutableEntity) -> dict[str, dict]: From 4c9acce0bc3019d9dcc6832c70ed3f1128ff6bb4 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 25 Sep 2025 16:24:13 +0100 Subject: [PATCH 5/8] CU-869aknekf: Add a few tests to check mappings are appropriately added --- medcat-v2/tests/test_cat.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/medcat-v2/tests/test_cat.py b/medcat-v2/tests/test_cat.py index 7a44e42b9..8250a8dc7 100644 --- a/medcat-v2/tests/test_cat.py +++ b/medcat-v2/tests/test_cat.py @@ -159,6 +159,41 @@ def test_entities_in_correct_order(self): cur_start = ent.base.start_char_index +class InferenceIntoOntologyTests(TrainedModelTests): + ont_name = "FAKE_ONT" + + @classmethod + def setUpClass(cls): + super().setUpClass() + # create mapping + cls.ont_map = { + cui: [f"{cls.ont_name}:{cui}"] + for cui in cls.model.cdb.cui2info + } + # add to addl_info + cls.model.cdb.addl_info[f"cui2{cls.ont_name}"] = cls.ont_map + # ask to be mapped + cls.model.config.general.map_to_other_ontologies.append(cls.ont_name) + + def assert_has_mapping(self, ent: dict): + # has value + self.assertIn(self.ont_name, ent) + val = ent[self.ont_name] + # 1 value + self.assertEqual(len(val), 1) + # value in our map + self.assertIn(val, self.ont_map.values()) + + def test_gets_mappings(self): + ents = self.model.get_entities( + ConvertedFunctionalityTests.TEXT)['entities'] + for nr, ent in enumerate(ents.values()): + with self.subTest(f"{nr}"): + self.assert_has_mapping(ent) + + + + class CATIncludingTests(unittest.TestCase): TOKENIZING_PROVIDER = 'regex' EXPECT_TRAIN = {} From afa28707bf79c0588fb096dceb2fb48f1fd8e2d5 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 25 Sep 2025 16:24:57 +0100 Subject: [PATCH 6/8] CU-869aknekf: Remove extra whitespace --- medcat-v2/tests/test_cat.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/medcat-v2/tests/test_cat.py b/medcat-v2/tests/test_cat.py index 8250a8dc7..9ada8deae 100644 --- a/medcat-v2/tests/test_cat.py +++ b/medcat-v2/tests/test_cat.py @@ -192,8 +192,6 @@ def test_gets_mappings(self): self.assert_has_mapping(ent) - - class CATIncludingTests(unittest.TestCase): TOKENIZING_PROVIDER = 'regex' EXPECT_TRAIN = {} From caf52b95359133f8d48c0374f059b4da81ec2cbc Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 25 Sep 2025 16:27:06 +0100 Subject: [PATCH 7/8] CU-869aknekf: Removed TODO that has been addressed regarding output --- medcat-v2/medcat/cat.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/medcat-v2/medcat/cat.py b/medcat-v2/medcat/cat.py index cc02ce341..d341c6259 100644 --- a/medcat-v2/medcat/cat.py +++ b/medcat-v2/medcat/cat.py @@ -523,15 +523,7 @@ def _get_entity(self, ent: MutableEntity, 'context_similarity': ent.context_similarity, 'start': ent.base.start_char_index, 'end': ent.base.end_char_index, - # TODO: add additional info (i.e mappings) - # for addl in addl_info: - # tmp = self.cdb.addl_info.get(addl, {}).get(cui, []) - # out_ent[addl.split("2")[-1]] = list(tmp) if type(tmp) is - # set else tmp 'id': ent.id, - # TODO: add met annotations - # if hasattr(ent._, 'meta_anns') and ent._.meta_anns: - # out_ent['meta_anns'] = ent._.meta_anns 'meta_anns': {}, 'context_left': left_context, 'context_center': center_context, From c2ac753f586a40fbcecc11cb009565595a9f421e Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 29 Sep 2025 09:17:23 +0100 Subject: [PATCH 8/8] CU-869aknekf: Move logged message from debug to warning --- medcat-v2/medcat/cat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/medcat-v2/medcat/cat.py b/medcat-v2/medcat/cat.py index d341c6259..8b723648e 100644 --- a/medcat-v2/medcat/cat.py +++ b/medcat-v2/medcat/cat.py @@ -544,7 +544,7 @@ def _get_entity(self, ent: MutableEntity, continue addl_info_name = f"cui2{ont}" if addl_info_name not in self.cdb.addl_info: - logger.debug( + logger.warning( "Trying to map to ontology '%s' but it is not set in " "addl_info so unable to do so", ont) continue