diff --git a/medcat-v2/medcat/cat.py b/medcat-v2/medcat/cat.py index d89eafcac..8b723648e 100644 --- a/medcat-v2/medcat/cat.py +++ b/medcat-v2/medcat/cat.py @@ -523,15 +523,7 @@ def _get_entity(self, ent: MutableEntity, 'context_similarity': ent.context_similarity, 'start': ent.base.start_char_index, 'end': ent.base.end_char_index, - # TODO: add additional info (i.e mappings) - # for addl in addl_info: - # tmp = self.cdb.addl_info.get(addl, {}).get(cui, []) - # out_ent[addl.split("2")[-1]] = list(tmp) if type(tmp) is - # set else tmp 'id': ent.id, - # TODO: add met annotations - # if hasattr(ent._, 'meta_anns') and ent._.meta_anns: - # out_ent['meta_anns'] = ent._.meta_anns 'meta_anns': {}, 'context_left': left_context, 'context_center': center_context, @@ -539,6 +531,26 @@ def _get_entity(self, ent: MutableEntity, } # addons: out_dict.update(self.get_addon_output(ent)) # type: ignore + # other ontologies + if self.config.general.map_to_other_ontologies: + for ont in self.config.general.map_to_other_ontologies: + if ont in out_dict: + logger.warning( + "Trying to map to ontology '%s', but it already " + "exists in the out dict, so unable to add it. " + "If this is for an actual ontology that shares a " + "name with something else, cosider renaming the " + "mapping in `cdb.addl_info`") + continue + addl_info_name = f"cui2{ont}" + if addl_info_name not in self.cdb.addl_info: + logger.warning( + "Trying to map to ontology '%s' but it is not set in " + "addl_info so unable to do so", ont) + continue + ont_map = self.cdb.addl_info[addl_info_name] + ont_values = ont_map.get(cui, []) + out_dict[ont] = ont_values # type: ignore return out_dict def get_addon_output(self, ent: MutableEntity) -> dict[str, dict]: diff --git a/medcat-v2/medcat/config/config.py b/medcat-v2/medcat/config/config.py index 6f21bad0c..5c8c4261d 100644 --- a/medcat-v2/medcat/config/config.py +++ b/medcat-v2/medcat/config/config.py @@ -252,11 +252,17 @@ class General(SerialisableBaseModel): map_cui_to_group: bool = False """If the cdb.addl_info['cui2group'] is provided and this option enabled, each CUI will be mapped to the group""" - simple_hash: bool = False - """Whether to use a simple hash. + map_to_other_ontologies: list[str] = ["opcs4", "icd10"] + """Which other ontologies to map to if possible. - NOTE: While using a simple hash is faster at save time, it is less - reliable due to not taking into account all the details of the changes.""" + This will force medcat to include mapping for other ontologies in + its outputs. It will use the mappings in `cdb.addl_info["cui2"]` + are present. + + NB! + This will only work if the `cdb.addl_info["cui2"]` exists. + Otherwise, no mapping will be done. + """ class Config: extra = 'allow' diff --git a/medcat-v2/tests/test_cat.py b/medcat-v2/tests/test_cat.py index 7a44e42b9..9ada8deae 100644 --- a/medcat-v2/tests/test_cat.py +++ b/medcat-v2/tests/test_cat.py @@ -159,6 +159,39 @@ def test_entities_in_correct_order(self): cur_start = ent.base.start_char_index +class InferenceIntoOntologyTests(TrainedModelTests): + ont_name = "FAKE_ONT" + + @classmethod + def setUpClass(cls): + super().setUpClass() + # create mapping + cls.ont_map = { + cui: [f"{cls.ont_name}:{cui}"] + for cui in cls.model.cdb.cui2info + } + # add to addl_info + cls.model.cdb.addl_info[f"cui2{cls.ont_name}"] = cls.ont_map + # ask to be mapped + cls.model.config.general.map_to_other_ontologies.append(cls.ont_name) + + def assert_has_mapping(self, ent: dict): + # has value + self.assertIn(self.ont_name, ent) + val = ent[self.ont_name] + # 1 value + self.assertEqual(len(val), 1) + # value in our map + self.assertIn(val, self.ont_map.values()) + + def test_gets_mappings(self): + ents = self.model.get_entities( + ConvertedFunctionalityTests.TEXT)['entities'] + for nr, ent in enumerate(ents.values()): + with self.subTest(f"{nr}"): + self.assert_has_mapping(ent) + + class CATIncludingTests(unittest.TestCase): TOKENIZING_PROVIDER = 'regex' EXPECT_TRAIN = {}