This notebook is a simple demo to introduce some of the fundamental design patterns from the OMOP_Alchemy library 

In [1]:
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker
from omop_alchemy.cdm.model.vocabulary import Concept, ConceptView, Domain, Vocabulary, Concept_Class
from orm_loader.helpers import configure_logging, bootstrap, bulk_load_context
from omop_alchemy import get_engine_name, load_environment, TEST_PATH, ROOT_PATH
from omop_alchemy.cdm.model.clinical import Condition_Occurrence, Condition_OccurrenceView
from omop_alchemy.cdm.model.structural import EpisodeView, Episode_EventView

In [2]:
# this demo assumes that you have created a .env file in the ROOT_PATH with your database connection string - see .example_dotenv for details

configure_logging()
load_environment()
engine_string = get_engine_name()

engine = sa.create_engine(engine_string, future=True, echo=False)
bootstrap(engine, create=True)

2026-01-12 10:07:16,760 | INFO     | sql_loader.omop_alchemy.config | Environment variables loaded from .env file
2026-01-12 10:07:16,761 | INFO     | sql_loader.omop_alchemy.config | Database engine configured


In [3]:
Session = sessionmaker(bind=engine, future=True)
session = Session()

In [4]:
c = session.query(Concept).first()
c

<omop_alchemy.cdm.model.vocabulary.concept.Concept at 0x110211010>

In [5]:
c.to_dict()

{'concept_id': 1,
 'concept_name': 'Domain',
 'domain_id': 'Metadata',
 'vocabulary_id': 'Domain',
 'concept_class_id': 'Domain',
 'concept_code': 'OMOP generated',
 'valid_start_date': datetime.date(1970, 1, 1),
 'valid_end_date': datetime.date(2099, 12, 31)}

In [6]:
c.to_json()

'{"concept_class_id": "Domain", "concept_code": "OMOP generated", "concept_id": 1, "concept_name": "Domain", "domain_id": "Metadata", "valid_end_date": "2099-12-31", "valid_start_date": "1970-01-01", "vocabulary_id": "Domain"}'

In [7]:
standard_conditions = (
    session.query(Concept)
    .filter(
        Concept.domain_id == "Condition",
        Concept.standard_concept == "S",
    )
    .limit(5)
    .all()
)

[(c.concept_id, c.concept_name, c.standard_concept) for c in standard_conditions]


[(22274, 'Neoplasm of uncertain behavior of larynx', 'S'),
 (22281, 'Sickle cell-hemoglobin SS disease', 'S'),
 (22288, 'Hereditary elliptocytosis', 'S'),
 (22340, 'Esophageal varices without bleeding', 'S'),
 (22350, 'Edema of larynx', 'S')]

`Concept` is the basic class that you should be using for most ETL steps, but for introspection of relationships (including the triggering of lazy loads), `ConceptView` offers much richer expressions.

This is separated to ensure speed of base class is maintained, while optimising the potential benefits of fully-described object relationships

In [8]:
cv = session.query(ConceptView).first()
cv

<omop_alchemy.cdm.model.vocabulary.concept.ConceptView at 0x110211fd0>

`domain_id` is the actual string content of the column that was returned from the query already performed, where `cv.domain` returns a related Domain object

In [9]:
cv.domain_id, type(cv.domain_id), cv.domain, type(cv.domain), cv.vocabulary, type(cv.vocabulary)

('Metadata',
 str,
 <Domain Metadata - Metadata>,
 omop_alchemy.cdm.model.vocabulary.domain.Domain,
 <Vocabulary Domain>,
 omop_alchemy.cdm.model.vocabulary.vocabulary.Vocabulary)

In [10]:
# because concept ancestor and concept relationship are very large tables, ConceptView relationships have 
# been set to lazy='select', these relationships will not load until accessed

concepts = (
    session.query(ConceptView)
    .filter(ConceptView.vocabulary_id == 'SNOMED')
    .filter(ConceptView.standard_concept == 'S')
    .limit(30)
)

concepts[0].concept_name

'Hospital admission'

In [11]:
# get details about concept dynamically - ancestors, descendants, relationships

# because of the deferred loading strategy, these relationships will now be querying 
# those tables once for every print statement in the below loop - very efficient for
# single concepts, not for sets of concepts

for concept in concepts[:2]:
    print(
        concept.concept_id,
        concept.concept_name,
        len(concept.ancestors),
        len(concept.descendants),
        len(concept.incoming_relationships),
        len(concept.outgoing_relationships),
    )

8715 Hospital admission 5 219 361 361
9173 Inactive 5 1 7 7


In [12]:
# when known in advance that these relationships will be needed, use joined loading to
# load them in the original query and only hit the big table once

from sqlalchemy.orm import selectinload

def concept_hierarchy_bundle():
    return (
        selectinload(ConceptView.ancestors),
        selectinload(ConceptView.descendants),
    )

def concept_relationship_bundle():
    return (
        selectinload(ConceptView.incoming_relationships),
        selectinload(ConceptView.outgoing_relationships),
    )

concepts = (
    session.query(ConceptView)
    .filter(ConceptView.vocabulary_id == 'SNOMED')
    .filter(ConceptView.standard_concept == 'S')
    .options(
        *concept_hierarchy_bundle(),
        *concept_relationship_bundle()
    )
    .limit(30)
    .all()
)


In [13]:
for concept in concepts:
    print(
        concept.concept_id,
        concept.concept_name,
        len(concept.ancestors),
        len(concept.descendants),
        len(concept.incoming_relationships),
        len(concept.outgoing_relationships),
    )

8715 Hospital admission 5 219 361 361
9173 Inactive 5 1 7 7
9174 Obsolete 5 1 7 7
9176 Patient status determination, deceased 4 7 12 12
9177 Other 5 1 9 9
9181 Active 5 1 7 7
9189 Negative 4 1 184 184
9190 Not detected 4 3 213 213
9191 Positive 7 6 231 231
9192 Trace 6 1 20 20
22274 Neoplasm of uncertain behavior of larynx 36 45 49 49
22281 Sickle cell-hemoglobin SS disease 35 12 74 74
22288 Hereditary elliptocytosis 44 10 49 49
22340 Esophageal varices without bleeding 29 1 30 30
22350 Edema of larynx 16 9 39 39
22426 Congenital macrostomia 30 5 35 35
22492 Foreign body in pharynx 26 13 60 60
22557 Malignant tumor of submandibular gland 49 182 18 18
22665 Chronic peptic ulcer with hemorrhage AND with perforation but without obstruction 33 1 17 17
22666 Vomiting after gastrointestinal tract surgery 18 3 21 21
22722 Accessory salivary gland 33 2 17 17
22820 Tuberculosis of esophagus 36 1 26 26
22839 Overlapping malignant neoplasm of larynx 38 1 23 23
22856 Polyglandular dysfunction 6 21

In [14]:
row = (
    session.query(Condition_Occurrence, Concept)
    .join(Concept, Condition_Occurrence.condition_concept_id == Concept.concept_id)
    .first()
)

row[0].condition_concept_id, row[1].concept_name

(36402497, 'Round cell liposarcoma of unknown primary site')

we don't want to be needing to define joins every time, but equally we don't want to force the loading of relationships that are not required for simple queries.
this is why they are separated out into View classes, but they can be very useful for exploration, as well as for serialisation to downstream apis

In [15]:
row = (
    session.query(Condition_OccurrenceView)
    .first()
)

row.condition_concept_id, row.condition_concept.concept_name

(36402497, 'Round cell liposarcoma of unknown primary site')

In [16]:
from omop_alchemy.cdm.model.clinical import Person, PersonView
from omop_alchemy.cdm.model.health_system import Location, Provider, Care_Site

In [17]:
p = session.query(Person).first()
p

<Person 1>

In [18]:
# simple person class that just has the raw column data - flat, predictable, and cheap to load - no joins and no lazy relationships
p.__dict__

{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x110e47110>,
 'day_of_birth': None,
 'ethnicity_source_concept_id': None,
 'visit_occurrence_id': None,
 'birth_datetime': None,
 'location_id': None,
 'visit_detail_id': None,
 'gender_concept_id': 45518388,
 'provider_id': None,
 'race_concept_id': 45456238,
 'care_site_id': None,
 'ethnicity_concept_id': 38003564,
 'person_source_value': None,
 'year_of_birth': 1976,
 'gender_source_concept_id': None,
 'gender_source_value': None,
 'person_id': 1,
 'race_source_concept_id': None,
 'race_source_value': None,
 'month_of_birth': 12,
 'ethnicity_source_value': None}

In [19]:
# subtle in this example, but personview has actually loaded the gender concept relationship to print the label instead of the raw concept_id
pv = session.query(PersonView).first()
pv

<Person 1: G(50)>

In [20]:
pv.gender.concept_name, pv.race.concept_name, pv.ethnicity.concept_name

('Gender unknown', 'Ethnic category - 2001 census', 'Not Hispanic or Latino')

In [21]:
PersonView.__expected_domains__

{'gender_concept_id': <omop_alchemy.cdm.base.domain_checking.ExpectedDomain at 0x10fd05fd0>,
 'race_concept_id': <omop_alchemy.cdm.base.domain_checking.ExpectedDomain at 0x10ff8d310>,
 'ethnicity_concept_id': <omop_alchemy.cdm.base.domain_checking.ExpectedDomain at 0x10ff8d450>}

In [22]:
p = session.query(PersonView).first()
p

<Person 1: G(50)>

In [23]:
p.domain_violations

[]

In [24]:
wrong_concept = (
    session.query(Concept)
    .filter(Concept.domain_id == "Condition")
    .first()
)
wrong_concept

<omop_alchemy.cdm.model.vocabulary.concept.Concept at 0x110e86b30>

In [25]:
PersonView.collect_domain_rules()

[DomainRule(table='person', field='gender_concept_id', allowed_domains={'Gender'}, allowed_classes=None),
 DomainRule(table='person', field='race_concept_id', allowed_domains={'Race'}, allowed_classes=None),
 DomainRule(table='person', field='ethnicity_concept_id', allowed_domains={'Ethnicity'}, allowed_classes=None)]

In [26]:
p.gender_concept_id = wrong_concept.concept_id

In [27]:
p.is_domain_valid

False

In [28]:
# we can do application-side validation of domain rules 
# tbc if this can be made more efficient at scale to truly support ETL 
# so that we can move it to the base class?
p.domain_violations

["gender_concept_id not in domain(s): ['Gender']"]

In [29]:
# age as a hybrid property
from datetime import date
pv.age

50

In [30]:
pv.age_at(date(2020, 1, 1))

44

In [31]:
# because we are using a hybrid property, we can filter on it in queries - same logic but two execution modes
(
    session.query(PersonView)
    .filter(PersonView.age_at(date(2020, 1, 1)) >= 65)
    .limit(5)
    .all()
)

[<Person 2: A(75)>,
 <Person 38: O(75)>,
 <Person 49: S(71)>,
 <Person 52: S(71)>,
 <Person 72: S(72)>]

In [32]:
# if using the base Person class, we would need to do the age calculation in the query itself
from sqlalchemy import func
on = date(2020, 1, 1)
q = (
    session.query(Person)
    .filter((sa.func.extract("year", sa.literal(on)) - Person.year_of_birth) >= 65)
    .limit(5)
    .all()
)

In [33]:
# this is a trivial example in this case but in the instance of joined elements it can make a big difference in expressiveness / formalism of complex definitions
q

[<Person 2>, <Person 38>, <Person 49>, <Person 52>, <Person 72>]

In [34]:
session.query(PersonView).filter(PersonView.under_observation_on(date(2020, 6, 1))).all()[:5]

[<Person 1: G(50)>,
 <Person 4: F(68)>,
 <Person 10: G(41)>,
 <Person 11: G(14)>,
 <Person 13: S(58)>]

In [35]:
cohort = (
    session.query(PersonView)
    .filter(
        PersonView.age_at(date(2020, 1, 1)) >= 18,
        PersonView.is_deceased == True,
    )
    .limit(10)
    .all()
)

cohort

[<Person 1: G(50)>,
 <Person 69: G(46)>,
 <Person 92: T(68)>,
 <Person 106: G(29)>,
 <Person 129: G(26)>,
 <Person 131: T(27)>,
 <Person 154: F(36)>,
 <Person 160: U(24)>,
 <Person 197: S(69)>,
 <Person 221: F(47)>]

In [36]:
cohort[0].to_dict()

{'person_id': 1,
 'year_of_birth': 1976,
 'month_of_birth': 12,
 'gender_concept_id': 8689,
 'race_concept_id': 45456238,
 'ethnicity_concept_id': 38003564}

In [37]:
cohort[0].death

<omop_alchemy.cdm.model.clinical.death.Death at 0x110213770>

In [38]:
pv.observation_periods

[<omop_alchemy.cdm.model.derived.observation_period.Observation_Period at 0x110213620>]

In [39]:
q = (
    session.query(PersonView)
    .filter(PersonView.first_observation_date >= date(2020, 10, 1))
    .filter(PersonView.last_observation_date <= date(2021, 10, 31))
).all()


In [40]:
len(q)

96

In [41]:
ep = session.query(EpisodeView).first()
ep

<Episode 1: 32533 (2020-03-11)>

In [42]:
ep.episode_concept.concept_name, ep.episode_object_concept.concept_name

('Disease Episode', 'Round cell liposarcoma of unknown primary site')

In [43]:
ep.events

[<omop_alchemy.cdm.model.clinical.condition_occurrence.Condition_Occurrence at 0x110f38050>,
 <omop_alchemy.cdm.model.clinical.measurement.Measurement at 0x110ea4ad0>,
 <omop_alchemy.cdm.model.clinical.measurement.Measurement at 0x110f382d0>,
 <omop_alchemy.cdm.model.clinical.measurement.Measurement at 0x110f38410>]

In [44]:
events = (
    session.query(Episode_EventView)
    .filter(Episode_EventView.episode_id == ep.episode_id)
    .all()
)

# polymorphic relationship to clinical fact tables can be context aware and resolved dynamically
events

[<EpisodeEvent ep=1 Condition_Occurrence#1>,
 <EpisodeEvent ep=1 Measurement#1>,
 <EpisodeEvent ep=1 Measurement#2>,
 <EpisodeEvent ep=1 Measurement#3>]

In [45]:
events[0].event_table

'condition_occurrence'