Skip to content
28 changes: 20 additions & 8 deletions medcat-v2/medcat/cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,22 +523,34 @@ def _get_entity(self, ent: MutableEntity,
'context_similarity': ent.context_similarity,
'start': ent.base.start_char_index,
'end': ent.base.end_char_index,
# TODO: add additional info (i.e mappings)
# for addl in addl_info:
# tmp = self.cdb.addl_info.get(addl, {}).get(cui, [])
# out_ent[addl.split("2")[-1]] = list(tmp) if type(tmp) is
# set else tmp
'id': ent.id,
# TODO: add met annotations
# if hasattr(ent._, 'meta_anns') and ent._.meta_anns:
# out_ent['meta_anns'] = ent._.meta_anns
'meta_anns': {},
'context_left': left_context,
'context_center': center_context,
'context_right': right_context,
}
# addons:
out_dict.update(self.get_addon_output(ent)) # type: ignore
# other ontologies
if self.config.general.map_to_other_ontologies:
for ont in self.config.general.map_to_other_ontologies:
if ont in out_dict:
logger.warning(
"Trying to map to ontology '%s', but it already "
"exists in the out dict, so unable to add it. "
"If this is for an actual ontology that shares a "
"name with something else, cosider renaming the "
"mapping in `cdb.addl_info`")
continue
addl_info_name = f"cui2{ont}"
if addl_info_name not in self.cdb.addl_info:
logger.warning(
"Trying to map to ontology '%s' but it is not set in "
"addl_info so unable to do so", ont)
continue
ont_map = self.cdb.addl_info[addl_info_name]
ont_values = ont_map.get(cui, [])
out_dict[ont] = ont_values # type: ignore
return out_dict

def get_addon_output(self, ent: MutableEntity) -> dict[str, dict]:
Expand Down
14 changes: 10 additions & 4 deletions medcat-v2/medcat/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,11 +252,17 @@ class General(SerialisableBaseModel):
map_cui_to_group: bool = False
"""If the cdb.addl_info['cui2group'] is provided and this option enabled,
each CUI will be mapped to the group"""
simple_hash: bool = False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this config prop is now removed?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a relic from v1. It was never used in v2. v2 always used a faster hash than v1.

"""Whether to use a simple hash.
map_to_other_ontologies: list[str] = ["opcs4", "icd10"]
"""Which other ontologies to map to if possible.

NOTE: While using a simple hash is faster at save time, it is less
reliable due to not taking into account all the details of the changes."""
This will force medcat to include mapping for other ontologies in
its outputs. It will use the mappings in `cdb.addl_info["cui2<ont>"]`
are present.

NB!
This will only work if the `cdb.addl_info["cui2<ont>"]` exists.
Otherwise, no mapping will be done.
"""

class Config:
extra = 'allow'
Expand Down
33 changes: 33 additions & 0 deletions medcat-v2/tests/test_cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,39 @@ def test_entities_in_correct_order(self):
cur_start = ent.base.start_char_index


class InferenceIntoOntologyTests(TrainedModelTests):
ont_name = "FAKE_ONT"

@classmethod
def setUpClass(cls):
super().setUpClass()
# create mapping
cls.ont_map = {
cui: [f"{cls.ont_name}:{cui}"]
for cui in cls.model.cdb.cui2info
}
# add to addl_info
cls.model.cdb.addl_info[f"cui2{cls.ont_name}"] = cls.ont_map
# ask to be mapped
cls.model.config.general.map_to_other_ontologies.append(cls.ont_name)

def assert_has_mapping(self, ent: dict):
# has value
self.assertIn(self.ont_name, ent)
val = ent[self.ont_name]
# 1 value
self.assertEqual(len(val), 1)
# value in our map
self.assertIn(val, self.ont_map.values())

def test_gets_mappings(self):
ents = self.model.get_entities(
ConvertedFunctionalityTests.TEXT)['entities']
for nr, ent in enumerate(ents.values()):
with self.subTest(f"{nr}"):
self.assert_has_mapping(ent)


class CATIncludingTests(unittest.TestCase):
TOKENIZING_PROVIDER = 'regex'
EXPECT_TRAIN = {}
Expand Down
Loading