## Constraint-aware semantic resolution over OMOP vocabularies

* Pydantic classes (via linkml) produce structured LLM queries
* the LLM proposes candidates
* LinkML defines valid target spaces
* the KG enforces ontological correctness
* scoring / ranking resolves ambiguity

### Reasoners

1. Hierarchy reasoner: Strict checks for candidate âŠ‘ allowed_parent
2. Domain reasoner: Broader checks simply confirming if this concept is in the correct OMOP domain?
3. Vocabulary preferences: List of ordered vocab targets 
4. Standardness and validity
5. Specificity / depth: Out of concepts matching selected restrictions, which is at the most appropriate level of specificity?

In [1]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from dotenv import load_dotenv

from omop_graph.graph.scoring import find_shortest_paths, rank_paths, explain_path
from omop_graph.graph.traverse import traverse
from omop_graph.graph.paths import find_shortest_paths
from omop_graph.graph.kg import KnowledgeGraph
from omop_graph.graph.edges import PredicateKind
from omop_graph.render import (
    render_subgraph,
    render_trace,
    render_path,
    render_explained_path,
    bind_default_renderers,
)

from omop_alchemy import configure_logging, get_engine_name, TEST_PATH, ROOT_PATH
import sqlalchemy as sa
from omop_alchemy.cdm.model.vocabulary import Concept, Concept_Ancestor, Concept_Relationship, Concept_Synonym
import pandas as pd

In [2]:
configure_logging()
load_dotenv()

engine_string = get_engine_name()
engine = sa.create_engine(engine_string, future=True, echo=False)

2026-01-05 22:52:17,245 | INFO     | omop_alchemy.omop_alchemy.config | Database engine configured


In [3]:
Session = sessionmaker(bind=engine)

session = Session()
kg = KnowledgeGraph(session)
bind_default_renderers(kg)

In [4]:
# synonyms = pd.DataFrame(
#     session.query(
#         Concept_Synonym.concept_synonym_name, Concept_Synonym.concept_id, Concept_Synonym.language_concept_id
#         )
# )

In [5]:
kg.label_lookup("Heart attack")

(LabelMatch(input_label='heart attack', matched_label='Heart attack', concept_id=35819523, match_kind=<LabelMatchKind.DIRECT: 1>, is_standard=True, is_active=True),
 LabelMatch(input_label='heart attack', matched_label='Heart attack', concept_id=36210764, match_kind=<LabelMatchKind.DIRECT: 1>, is_standard=True, is_active=True),
 LabelMatch(input_label='heart attack', matched_label='Heart attack', concept_id=45466858, match_kind=<LabelMatchKind.DIRECT: 1>, is_standard=False, is_active=True))

In [6]:
sorted(
    kg.label_lookup("Coronary artery thrombosis", fuzzy=False)
)

[LabelMatch(input_label='coronary artery thrombosis', matched_label='Coronary artery thrombosis', concept_id=4134723, match_kind=<LabelMatchKind.DIRECT: 1>, is_standard=True, is_active=True),
 LabelMatch(input_label='coronary artery thrombosis', matched_label='Coronary artery thrombosis', concept_id=3309601, match_kind=<LabelMatchKind.DIRECT: 1>, is_standard=False, is_active=True),
 LabelMatch(input_label='coronary artery thrombosis', matched_label='Coronary Artery Thrombosis', concept_id=45921297, match_kind=<LabelMatchKind.DIRECT: 1>, is_standard=False, is_active=True),
 LabelMatch(input_label='coronary artery thrombosis', matched_label='Coronary artery thrombosis', concept_id=3161942, match_kind=<LabelMatchKind.DIRECT: 1>, is_standard=False, is_active=False),
 LabelMatch(input_label='coronary artery thrombosis', matched_label='Coronary artery thrombosis', concept_id=40623902, match_kind=<LabelMatchKind.DIRECT: 1>, is_standard=False, is_active=False)]

In [7]:
sorted(
    kg.label_lookup("Coronary artery thrombosis", fuzzy=True)
)

[LabelMatch(input_label='coronary artery thrombosis', matched_label='Coronary artery thrombosis', concept_id=4134723, match_kind=<LabelMatchKind.DIRECT: 1>, is_standard=True, is_active=True),
 LabelMatch(input_label='coronary artery thrombosis', matched_label='Left anterior descending coronary artery thrombosis', concept_id=4153091, match_kind=<LabelMatchKind.DIRECT: 1>, is_standard=True, is_active=True),
 LabelMatch(input_label='coronary artery thrombosis', matched_label='Left main coronary artery thrombosis', concept_id=4209308, match_kind=<LabelMatchKind.DIRECT: 1>, is_standard=True, is_active=True),
 LabelMatch(input_label='coronary artery thrombosis', matched_label='Right main coronary artery thrombosis', concept_id=4304192, match_kind=<LabelMatchKind.DIRECT: 1>, is_standard=True, is_active=True),
 LabelMatch(input_label='coronary artery thrombosis', matched_label='Right main coronary artery thrombosis', concept_id=3169041, match_kind=<LabelMatchKind.DIRECT: 1>, is_standard=False,

In [8]:
from omop_graph.reasoning.term_grounding import GroundingConstraints, ground_term
from omop_graph.reasoning.resolvers import ResolverPipeline, ExactLabelResolver, PartialLabelResolver, ExactSynonymResolver
from omop_graph.reasoning.term_grounding import _passes_constraints


In [9]:
constraints = GroundingConstraints(
    parent_ids=(4027255,),#(37153816,),         # malignant neoplasm
    allowed_domains=("Condition",), # redundant with parent_ids but perhaps we support combinations across domains eventually for more abstract grounding?
    allowed_vocabularies=("SNOMED", "ICD10CM"),
    require_standard=True,
    max_depth=4,
)

In [10]:
resolver_pipeline = ResolverPipeline(
    resolvers=(ExactLabelResolver(),ExactSynonymResolver(),PartialLabelResolver()),
    stop_after_confidence=PartialLabelResolver.confidence
)

In [11]:
ground_term(kg, "heart attack", constraints=constraints, resolver_pipeline=resolver_pipeline)

[GroundingCandidate(concept_id=4329847, label='Myocardial infarction', best_path_profile=PathProfile(hops=2, invalid_concepts=0, non_standard_concepts=0, vocab_switches=0, ontological_edges=2, mapping_edges=0, metadata_edges=0), reasons=(), paths=(GraphPath(steps=(PathStep(subject=4329847, predicate='Is a', object=4185932), PathStep(subject=4185932, predicate='Is a', object=4027255))),))]

In [12]:
e = ExactLabelResolver()
p = PartialLabelResolver()
s = ExactSynonymResolver()

In [13]:
hits = e.resolve(kg, "heart attack")

In [14]:
for h in hits:
    ok, reasons = _passes_constraints(kg, h.concept_id, constraints)
    print(f"Concept ID {h.concept_id} passes: {ok}, reasons: {reasons}")

Concept ID 35819523 passes: False, reasons: ["domain Observation not in ('Condition',)"]
Concept ID 36210764 passes: False, reasons: ["domain Meas Value not in ('Condition',)"]
Concept ID 45466858 passes: False, reasons: ['vocabulary Read not allowed']


In [15]:
hits = s.resolve(kg, "heart attack")

In [16]:
for h in hits:
    ok, reasons = _passes_constraints(kg, h.concept_id, constraints)
    print(f"Concept ID {h.concept_id} passes: {ok}, reasons: {reasons}")

Concept ID 45616764 passes: False, reasons: ['vocabulary MeSH not allowed']
Concept ID 40398435 passes: False, reasons: ['concept is non-standard']
Concept ID 4329847 passes: True, reasons: []
Concept ID 40623902 passes: False, reasons: ['concept is non-standard']
Concept ID 4347016 passes: False, reasons: ["domain Drug not in ('Condition',)"]
Concept ID 40345176 passes: False, reasons: ['concept is non-standard']
Concept ID 40323863 passes: False, reasons: ['concept is non-standard']


In [None]:
# 4329847 is the only concept that passes constraints and matches the selected grounding returned above