In [1]:
import pandas as pd
import sqlalchemy.orm as so
import sqlalchemy as sa

from dataclasses import asdict
from dotenv import load_dotenv
from orm_loader.helpers import configure_logging
from omop_alchemy import get_engine_name, TEST_PATH, ROOT_PATH
from omop_alchemy.cdm.model.vocabulary import Concept, Concept_Ancestor, Concept_Relationship, Concept_Synonym

from omop_graph.graph.kg import KnowledgeGraph
from omop_graph.render import (
    render_subgraph,
    render_trace,
    render_path,
    render_explained_path,
    bind_default_renderers,
)
from omop_graph.reasoning.phenotypes import (
    find_common_parents,
)

At the moment, this demo is input to a manual process of hierarchy resolution, because there are certainly arguments for allowing impurity (e.g. SNOMED cancer conditions do not meet the original ICDO3-specific filter if there is no corresponding ICDO3 code), as well as for supporting less than full coverage (e.g. valid but unusual/atypical classifications that introduce a lot of noise) - TBD the ideal level of final automation here, but irrespectively it is is still relatively useful for initial orientation steps...

In [2]:
configure_logging()
load_dotenv()

engine_string = get_engine_name()
engine = sa.create_engine(engine_string, future=True, echo=False)

2026-01-12 10:18:45,944 | INFO     | sql_loader.omop_alchemy.config | Database engine configured


In [3]:
Session = so.sessionmaker(bind=engine)

session = Session()
kg = KnowledgeGraph(session)
bind_default_renderers(kg)

In [4]:
seeds = [4173456, 4174251, 4174241, 4172043, 4173614, 4175026] # ecog 0 thru 5
parent_stats = find_common_parents(seeds=seeds, kg=kg)
candidate_df = pd.DataFrame([{'concept_id': c} | asdict(v) for c, v in parent_stats.items()])

In [5]:
candidate_df

Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
0,4308794,"{4174241, 4172043, 4174251, 4173614, 4173456, ...","{4174241, 4172043, 4174251, 4173614, 4173456, ...",6,0,1.0,1.0,1
1,432453,"{3655685, 4294662, 4032527, 4032528, 4032529, ...","{4174241, 4172043, 4174251, 4173614, 4173456, ...",6,986,1.0,0.006048,2
2,4089214,"{46284803, 4153350, 4120591, 3661842, 3661843,...","{4174241, 4172043, 4174251, 4173614, 4173456, ...",6,4195,1.0,0.001428,5
3,4041284,"{4259853, 4161577, 4161578, 4161579, 4128815, ...","{4174241, 4172043, 4174251, 4173614, 4173456, ...",6,7135,1.0,0.00084,5
4,4094294,"{4259853, 4161577, 4161578, 4161579, 4227116, ...","{4174241, 4172043, 4174251, 4173614, 4173456, ...",6,14450,1.0,0.000415,4


In [6]:
seeds = [4230556, 4085075, 434489, 4173456, 4174251, 4174241, 4172043, 4173614, 4175026] # alive, dead, at risk
parent_stats = find_common_parents(seeds=seeds, kg=kg)
candidate_df = pd.DataFrame([{'concept_id': c} | asdict(v) for c, v in parent_stats.items()])

In [7]:
candidate_df

Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
0,432453,"{3655685, 4294662, 46270485, 35610650, 4202524...","{4174241, 4172043, 4174251, 4173614, 4173456, ...",9,538,1.0,0.016453,2
1,4308794,"{4174241, 4172043, 4174251, 4173614, 4173456, ...","{4174241, 4172043, 4174251, 4173614, 4173456, ...",6,0,0.666667,1.0,1
2,4089214,"{46284803, 4153350, 4120591, 3661842, 3661843,...","{4174241, 4172043, 4174251, 4173614, 4173456, ...",6,4195,0.666667,0.001428,5
3,4041284,"{4259853, 4161577, 4161578, 4161579, 4128815, ...","{4174241, 4172043, 4174251, 4173614, 4173456, ...",6,7135,0.666667,0.00084,5
4,4094294,"{4259853, 4161577, 4161578, 4161579, 4227116, ...","{4174241, 4172043, 4174251, 4173614, 4173456, ...",6,14450,0.666667,0.000415,4


In [8]:
head_neck_conditions = [Concept.concept_code.ilike(f'%C{k:02d}%') for k in range(0, 15)]
head_neck_topog = [Concept.concept_code==f'C{k:02d}' for k in range(0, 15)]

In [9]:
topog_concepts = pd.DataFrame(
    session.query(
        *Concept.__table__.columns
    )
    .filter(Concept.vocabulary_id=='ICDO3')
    .filter(sa.or_(*head_neck_topog))
)

In [10]:
hn_concepts = pd.DataFrame(
    session.query(
        *Concept.__table__.columns
    )
    .filter(Concept.vocabulary_id=='ICDO3')
    .filter(Concept.concept_class_id=='ICDO Condition')
    .filter(sa.or_(*head_neck_conditions))
    .filter(Concept.concept_code.like('%/3-%'))
)

In [11]:
all_parents = {}
all_candidates = {}

for topography in topog_concepts.itertuples():
    code = topography.concept_code
    seeds = set(hn_concepts[hn_concepts.concept_code.str.contains(code)&hn_concepts.invalid_reason.isna()].concept_id)
    print(f"{code}: {topography.concept_name} ({len(seeds)} candidates)")
    all_parents[code] = find_common_parents(seeds=seeds, kg=kg)
    all_candidates[code] = pd.DataFrame([{'concept_id': c} | asdict(v) for c, v in all_parents[code].items()])
    best_result = all_candidates[code].sort_values(['coverage', 'purity']).tail(5)
    display(best_result)

C00: Lip (734 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
159,135858,"{36528135, 36519943, 36536330, 4145173, 365609...","{36562945, 36532227, 36546565, 36517893, 36519...",718,775,0.978202,0.480911,7
136,4116808,"{36528135, 36519943, 36536330, 36560921, 41779...","{36562945, 36532227, 36546565, 36517893, 36519...",718,694,0.978202,0.508499,6
135,37161350,"{36519943, 36528135, 36536330, 36560921, 36544...","{36562945, 36532227, 36546565, 36517893, 36519...",718,520,0.978202,0.579968,6
101,4054605,"{602113, 4313090, 602115, 36532227, 602114, 36...","{36562945, 36532227, 36546565, 36517893, 36519...",718,509,0.978202,0.585167,5
54,4180779,"{602113, 4313090, 602115, 36532227, 602114, 36...","{36562945, 36532227, 36546565, 36517893, 36519...",718,240,0.978202,0.749478,4


C01: Base of tongue (144 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
69,4178962,"{36560896, 36528130, 36552707, 36524036, 36536...","{36566017, 36562435, 36561419, 36555283, 36536...",142,1033,0.986111,0.120851,4
33,437498,"{36560896, 36528130, 36552707, 36536324, 36524...","{36566017, 36562435, 36561419, 36555283, 36536...",142,985,0.986111,0.125998,3
87,4054607,"{36566017, 4248065, 36562435, 4158473, 3656141...","{36566017, 36562435, 36561419, 36555283, 36536...",142,40,0.986111,0.78022,4
34,4181332,"{36566017, 4248065, 36562435, 4158473, 3656141...","{36566017, 36562435, 36561419, 36555283, 36536...",142,13,0.986111,0.916129,3
0,256633,"{36566017, 36562435, 36561419, 36555283, 36536...","{36566017, 36562435, 36561419, 36555283, 36536...",142,4,0.986111,0.972603,2


C02: Other and unspecified parts of tongue (972 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
204,432941,"{36560896, 36528130, 36552707, 36536324, 36519...","{36560896, 36528130, 36552707, 36536324, 36524...",962,555,0.989712,0.634146,8
176,4103982,"{36560896, 36528130, 36552707, 36536324, 36519...","{36560896, 36528130, 36552707, 36536324, 36524...",962,468,0.989712,0.672727,7
177,40481371,"{36560896, 36528130, 36552707, 36536324, 36519...","{36560896, 36528130, 36552707, 36536324, 36524...",962,414,0.989712,0.699128,7
139,4130375,"{36560896, 36528130, 36552707, 36536324, 36519...","{36560896, 36528130, 36552707, 36536324, 36524...",962,405,0.989712,0.703731,6
85,4178962,"{36560896, 36528130, 36552707, 36536324, 36524...","{36560896, 36528130, 36552707, 36536324, 36524...",962,213,0.989712,0.818723,5


C03: Gum (456 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
84,4246124,"{36546563, 36540420, 36548612, 602118, 602126,...","{36540420, 36548612, 36558862, 36544539, 44503...",450,165,0.986842,0.731707,3
154,37163090,"{36548612, 36540420, 36548614, 602118, 602126,...","{36548612, 36540420, 36558862, 36544539, 44503...",450,130,0.986842,0.775862,5
129,4054612,"{36548612, 36540420, 36548614, 602118, 3655886...","{36548612, 36540420, 36558862, 36544539, 44503...",450,112,0.986842,0.800712,4
82,4178963,"{36548612, 36540420, 602118, 602126, 36558862,...","{36548612, 36540420, 36558862, 36544539, 44503...",450,26,0.986842,0.945378,3
6,140950,"{36540420, 36548612, 602118, 36558862, 440335,...","{36540420, 36548612, 36558862, 36544539, 44503...",450,10,0.986842,0.978261,2


C04: Floor of mouth (608 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
74,4247357,"{36552704, 36560896, 36528130, 36552707, 36536...","{36556800, 36521986, 44498948, 36536325, 36556...",601,3795,0.988487,0.136715,3
75,439404,"{36552704, 36560896, 36528130, 36552707, 36536...","{36556800, 36521986, 44498948, 36536325, 36556...",601,3626,0.988487,0.142181,3
121,4130381,"{36556800, 36521986, 44498948, 36536325, 36556...","{36556800, 36521986, 44498948, 36536325, 36556...",601,321,0.988487,0.651844,4
76,4177101,"{36556800, 36521986, 44498948, 36536325, 36556...","{36556800, 36521986, 44498948, 36536325, 36556...",601,201,0.988487,0.749377,3
0,135750,"{36556800, 36521986, 44498948, 36536325, 36556...","{36556800, 36521986, 44498948, 36536325, 36556...",601,179,0.988487,0.770513,2


C05: Palate (762 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
166,4091365,"{602112, 36562944, 42512387, 45768712, 3653018...","{42512387, 36530184, 44501004, 36550670, 36524...",751,448,0.985564,0.626355,7
150,4028251,"{602112, 36562944, 42512387, 45768712, 3653018...","{42512387, 36530184, 44501004, 36550670, 36524...",751,424,0.985564,0.639149,6
151,37162644,"{602112, 36562944, 42512387, 36530184, 602123,...","{42512387, 36530184, 44501004, 36550670, 36524...",751,180,0.985564,0.80666,6
126,4131604,"{602112, 36562944, 42512387, 36530184, 602123,...","{42512387, 36530184, 44501004, 36550670, 36524...",751,166,0.985564,0.818975,5
85,4178964,"{602112, 42512387, 36530184, 602123, 44501004,...","{42512387, 36530184, 44501004, 36550670, 36524...",751,40,0.985564,0.949431,4


C06: Other and unspecified parts of mouth (761 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
177,4307122,"{36536324, 44498948, 36536325, 44498950, 36536...","{36552704, 36538370, 44500996, 36540422, 36560...",754,8475,0.990802,0.081699,8
150,4042502,"{36536324, 44498948, 36536325, 44498950, 36536...","{36552704, 36538370, 44500996, 36540422, 36560...",754,7978,0.990802,0.086349,7
93,43530620,"{36536324, 44498948, 36536325, 44498950, 36536...","{36552704, 36538370, 44500996, 36540422, 36560...",754,6399,0.990802,0.10541,6
192,4184252,"{44498945, 4096016, 4194323, 36536341, 4449896...","{36552704, 36538370, 44500996, 36540422, 36560...",754,6023,0.990802,0.111259,7
6,4130986,"{36536324, 44498948, 36536325, 44498950, 36536...","{36552704, 36538370, 44500996, 36540422, 36560...",754,5376,0.990802,0.123002,5


C07: Parotid glanid (191 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
108,4322566,"{44498950, 42512393, 36521996, 4096013, 365445...","{42512896, 42511876, 44498950, 36544007, 44500...",185,928,0.968586,0.166217,7
90,4122227,"{44498950, 42512393, 36521996, 36544530, 44498...","{42512896, 42511876, 44498950, 36544007, 44500...",185,902,0.968586,0.170193,6
91,4054610,"{44498950, 42512393, 36521996, 36544530, 44498...","{42512896, 42511876, 44498950, 36544007, 44500...",185,862,0.968586,0.176695,5
92,4202556,"{42512896, 4025347, 42511876, 44498950, 365440...","{42512896, 42511876, 44498950, 36544007, 44500...",185,58,0.968586,0.761317,5
68,4130984,"{42512896, 42511876, 44498950, 36544007, 44500...","{42512896, 42511876, 44498950, 36544007, 44500...",185,48,0.968586,0.793991,4


C08: Other and unspecified major salivary glands (699 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
196,25572,"{44498950, 42512393, 36521996, 4096013, 409601...","{36544530, 36562967, 36562968, 36528152, 36567...",688,526,0.984263,0.566722,7
177,4114483,"{44498950, 42512393, 36521996, 4096013, 409601...","{36544530, 36562967, 36562968, 36528152, 36567...",688,441,0.984263,0.609389,6
178,4322566,"{44498950, 42512393, 36521996, 4096013, 365445...","{36544530, 36562967, 36562968, 36528152, 36567...",688,425,0.984263,0.618149,6
155,4122227,"{44498950, 42512393, 36521996, 36544530, 44498...","{36544530, 36562967, 36562968, 36528152, 36567...",688,399,0.984263,0.632935,5
130,4054610,"{44498950, 42512393, 36521996, 36544530, 44498...","{36544530, 36562967, 36562968, 36528152, 36567...",688,359,0.984263,0.657116,4


C09: Tonsil (673 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
204,4177243,"{44498945, 36560900, 36519941, 36528141, 36560...","{36519941, 36562956, 36524045, 36530193, 36532...",666,3389,0.989599,0.164242,7
273,4054510,"{36519941, 36560915, 36536342, 36560929, 41779...","{36519941, 36562956, 36524045, 36530193, 36532...",666,1463,0.989599,0.312823,7
241,46270500,"{36519941, 36560915, 36536342, 36560929, 36519...","{36519941, 36562956, 36524045, 36530193, 36532...",666,1406,0.989599,0.321429,7
205,4054617,"{36519941, 36560915, 36536342, 36560929, 36519...","{36519941, 36562956, 36524045, 36530193, 36532...",666,1351,0.989599,0.330193,6
117,4181338,"{36519941, 36560915, 36536342, 36560929, 36519...","{36519941, 36562956, 36524045, 36530193, 36532...",666,1103,0.989599,0.376484,6


C10: Oropharynx (1174 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
123,4274025,"{4194320, 4194323, 4194328, 4194355, 4194365, ...","{36556803, 36554756, 36562951, 36558856, 36540...",1145,165228,0.975298,0.006882,12
124,4102111,"{36522894, 36403062, 36403063, 42598455, 42598...","{36556803, 36554756, 36562951, 36558856, 36540...",1145,68764,0.975298,0.016378,11
223,4266186,"{36403062, 36403063, 42598455, 42598456, 42598...","{36556803, 36554756, 36562951, 36558856, 36540...",1145,68495,0.975298,0.016442,9
1,438112,"{36403062, 36403063, 42598456, 42598461, 36403...","{36556803, 36554756, 36562951, 36558856, 36540...",1145,68127,0.975298,0.016529,10
122,443392,"{36524322, 42598456, 36559370, 36403071, 42598...","{36556803, 36554756, 36562951, 36558856, 36540...",1145,55071,0.975298,0.020368,9


C11: Nasopharynx (976 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
187,435836,"{44503040, 36526083, 36560900, 36524041, 60212...","{44503040, 36526083, 36560900, 36524041, 36542...",961,245,0.984631,0.796849,8
165,44783160,"{44503040, 36526083, 36560900, 36524041, 60212...","{44503040, 36526083, 36560900, 36524041, 36542...",961,211,0.984631,0.819966,7
164,4181960,"{44503040, 36526083, 36560900, 36524041, 60212...","{44503040, 36526083, 36560900, 36524041, 36542...",961,195,0.984631,0.831315,7
136,4054514,"{44503040, 36526083, 36560900, 36524041, 60212...","{44503040, 36526083, 36560900, 36524041, 36542...",961,185,0.984631,0.838569,6
96,4095312,"{44503040, 36526083, 36560900, 36524041, 60212...","{44503040, 36526083, 36560900, 36524041, 36542...",961,70,0.984631,0.932105,5


C12: Pyriform sinus (134 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
46,4181342,"{44498945, 36534274, 36558851, 36517896, 36556...","{44498945, 36567557, 36567567, 36539926, 36534...",133,710,0.992537,0.15777,5
31,439746,"{44498945, 36534274, 36558851, 36517896, 36556...","{44498945, 36567557, 36567567, 36539926, 36534...",133,676,0.992537,0.1644,4
38,4129252,"{44498945, 36560901, 36567557, 36567567, 43156...","{44498945, 36567557, 36567567, 36539926, 36534...",133,31,0.992537,0.810976,4
32,4180789,"{44498945, 36567557, 36567567, 4315668, 365399...","{44498945, 36567557, 36567567, 36539926, 36534...",133,5,0.992537,0.963768,4
1,435190,"{44498945, 36567557, 36567567, 36539926, 36534...","{44498945, 36567557, 36567567, 36539926, 36534...",133,1,0.992537,0.992537,2


C13: Hypopharynx (669 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
249,4129247,"{44498945, 36534274, 36558851, 36560901, 36517...","{36534274, 36558851, 36517896, 36556809, 36548...",651,378,0.973094,0.632653,8
212,44783159,"{44498945, 36534274, 36558851, 36560901, 36517...","{36534274, 36558851, 36517896, 36556809, 36548...",651,366,0.973094,0.640118,8
154,4129250,"{44498945, 36534274, 36558851, 36560901, 36517...","{36534274, 36558851, 36517896, 36556809, 36548...",651,359,0.973094,0.644554,6
99,4181342,"{44498945, 36534274, 36558851, 36517896, 36556...","{36534274, 36558851, 36517896, 36556809, 36548...",651,192,0.973094,0.772242,5
4,439746,"{44498945, 36534274, 36558851, 36517896, 36556...","{36534274, 36558851, 36517896, 36556809, 36548...",651,158,0.973094,0.804697,4


C14: Other and ill-defined sites in lip, oral cavity and pharynx (384 candidates)


Unnamed: 0,concept_id,descendants,found,coverage,pollution,completeness,purity,max_depth
132,4115106,"{42598582, 42598955, 42599092, 42599192, 43266...","{36564997, 36526091, 36528139, 36556815, 36522...",379,22528,0.986979,0.016545,7
17,4111017,"{4194462, 42598582, 4194827, 132258, 42599689,...","{36564997, 36526091, 36528139, 36556815, 36522...",379,22419,0.986979,0.016624,7
92,4028253,"{42598582, 42599844, 132565, 132573, 42599972,...","{36564997, 36526091, 36528139, 36556815, 36522...",379,22101,0.986979,0.016859,6
18,196931,"{44498945, 36536321, 44498948, 36536324, 44498...","{36564997, 36526091, 36528139, 36556815, 36522...",379,18788,0.986979,0.019774,5
128,4184252,"{44498945, 4096016, 4194323, 36536341, 4449896...","{36564997, 36526091, 36528139, 36556815, 36522...",379,6049,0.986979,0.058961,9
