This notebook is a simple demo to introduce some of the fundamental design patterns from the OMOP_Alchemy library 

In [1]:
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker
from omop_alchemy.model.vocabulary import Concept, ConceptView, Domain, Vocabulary, Concept_Class
from omop_alchemy import configure_logging, get_engine_name, load_environment, TEST_PATH, ROOT_PATH
from omop_alchemy.cdm.base import bootstrap
from omop_alchemy.model.clinical import Condition_Occurrence, Condition_OccurrenceView
from omop_alchemy.model.structural import EpisodeView, Episode_EventView

In [2]:
# this demo assumes that you have created a .env file in the ROOT_PATH with your database connection string - see .example_dotenv for details

configure_logging()
load_environment()
engine_string = get_engine_name()

engine = sa.create_engine(engine_string, future=True, echo=False)
bootstrap(engine, create=True)

2026-01-03 21:29:24,910 | INFO     | omop_alchemy.omop_alchemy.config | Environment variables loaded from .env file
2026-01-03 21:29:24,910 | INFO     | omop_alchemy.omop_alchemy.config | Database engine configured
2026-01-03 21:29:24,920 | INFO     | omop_alchemy.omop_alchemy.cdm.base.declarative | Bootstrapping OMOP schema (create=True)
2026-01-03 21:29:24,920 | INFO     | omop_alchemy.omop_alchemy.cdm.base.declarative | Schema creation enabled


In [3]:
Session = sessionmaker(bind=engine, future=True)
session = Session()

In [4]:
c = session.query(Concept).first()
c

<omop_alchemy.model.vocabulary.concept.Concept at 0x14d442120>

In [5]:
c.to_dict()

{'concept_id': 1,
 'concept_name': 'Domain',
 'domain_id': 'Metadata',
 'vocabulary_id': 'Domain',
 'concept_class_id': 'Domain',
 'concept_code': 'OMOP generated',
 'valid_start_date': datetime.date(1970, 1, 1),
 'valid_end_date': datetime.date(2099, 12, 31)}

In [6]:
c.to_json()

'{"concept_class_id": "Domain", "concept_code": "OMOP generated", "concept_id": 1, "concept_name": "Domain", "domain_id": "Metadata", "valid_end_date": "2099-12-31", "valid_start_date": "1970-01-01", "vocabulary_id": "Domain"}'

In [7]:
standard_conditions = (
    session.query(Concept)
    .filter(
        Concept.domain_id == "Condition",
        Concept.standard_concept == "S",
    )
    .limit(5)
    .all()
)

[(c.concept_id, c.concept_name, c.standard_concept) for c in standard_conditions]


[(604729, 'Agenesis of calcaneus', 'S'),
 (619070, 'Iris roseola caused by Treponema pallidum', 'S'),
 (751724, 'Ganglioneuroma of nervous system, NOS', 'S'),
 (1076329, 'Neurological complication following vaccination', 'S'),
 (1553072, 'Ewing sarcoma of unknown primary site', 'S')]

`Concept` is the basic class that you should be using for most ETL steps, but for introspection of relationships (including the triggering of lazy loads), `ConceptView` offers much richer expressions.

This is separated to ensure speed of base class is maintained, while optimising the potential benefits of fully-described object relationships

In [8]:
cv = session.query(ConceptView).first()
cv

<omop_alchemy.model.vocabulary.concept.ConceptView at 0x14d4430e0>

`domain_id` is the actual string content of the column that was returned from the query already performed, where `cv.domain` returns a related Domain object

In [9]:
cv.domain_id, type(cv.domain_id), cv.domain, type(cv.domain), cv.vocabulary, type(cv.vocabulary)

('Metadata',
 str,
 <Domain Metadata - Metadata>,
 omop_alchemy.model.vocabulary.domain.Domain,
 <Vocabulary Domain>,
 omop_alchemy.model.vocabulary.vocabulary.Vocabulary)

In [10]:
concepts = (
    session.query(ConceptView)
    .filter(ConceptView.vocabulary_id == 'SNOMED')
    .filter(ConceptView.standard_concept == 'S')
    .limit(10)
)

concepts[0].concept_name

'Negative'

In [11]:
# get details about concept dynamically - ancestors, descendants, relationships
for concept in concepts:
    print(concept.concept_id, concept.concept_name, len(concept.ancestors), len(concept.descendants), len(concept.incoming_relationships), len(concept.outgoing_relationships))

9189 Negative 1 1 4 4
9191 Positive 1 1 4 4
507263 Primary lower subciliary and transconjunctival blepharoplasty with skin, muscle and fat excision and canthal sling 2 1 4 4
600817 Sublabial 2 1 4 4
604729 Agenesis of calcaneus 1 1 4 4
607590 Body height 1 1 4 4
619070 Iris roseola caused by Treponema pallidum 1 1 4 4
762840 Arteriovenous graft 2 1 4 4
1073110 Structure of cartilaginous portion of left pharyngotympanic tube 2 1 4 4
1074685 Bone plate submitted as specimen 1 1 4 4


In [12]:

row = (
    session.query(Condition_Occurrence, Concept)
    .join(Concept, Condition_Occurrence.condition_concept_id == Concept.concept_id)
    .first()
)

row[0].condition_concept_id, row[1].concept_name


(44501475, 'Squamous cell carcinoma, NOS, of branchial cleft')

we don't always want to be using these kinds of implicit joins, which is why they are separated out into View classes, but they can be very useful for exploration, as well as for serialisation to downstream apis

In [13]:
row = (
    session.query(Condition_OccurrenceView)
    .first()
)

row.condition_concept_id, row.condition_concept.concept_name

(44501475, 'Squamous cell carcinoma, NOS, of branchial cleft')

In [14]:
from omop_alchemy.model.clinical import Person, PersonView
from omop_alchemy.model.health_system import Location, Provider, Care_Site

In [15]:
p = session.query(Person).first()
p

<Person 1>

In [16]:
# simple person class that just has the raw column data - flat, predictable, and cheap to load - no joins and no lazy relationships
p.__dict__

{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x14d9b2f30>,
 'ethnicity_concept_id': 38003564,
 'gender_source_value': None,
 'year_of_birth': 1974,
 'gender_source_concept_id': None,
 'race_source_value': None,
 'person_id': 1,
 'race_source_concept_id': None,
 'ethnicity_source_value': None,
 'month_of_birth': 1,
 'ethnicity_source_concept_id': None,
 'visit_occurrence_id': None,
 'day_of_birth': None,
 'location_id': None,
 'visit_detail_id': None,
 'birth_datetime': None,
 'provider_id': None,
 'gender_concept_id': 8507,
 'care_site_id': None,
 'race_concept_id': 38003611,
 'person_source_value': None}

In [17]:
# subtle in this example, but personview has actually loaded the gender concept relationship to print the label instead of the raw concept_id
pv = session.query(PersonView).first()
pv

<Person 1: M(52)>

In [18]:
pv.gender.concept_name, pv.race.concept_name, pv.ethnicity.concept_name

('MALE', 'Micronesian', 'Not Hispanic or Latino')

In [19]:
PersonView.__expected_domains__

{'gender_concept_id': <omop_alchemy.cdm.base.mixins.ExpectedDomain at 0x127d530e0>,
 'race_concept_id': <omop_alchemy.cdm.base.mixins.ExpectedDomain at 0x127fdda90>,
 'ethnicity_concept_id': <omop_alchemy.cdm.base.mixins.ExpectedDomain at 0x127fddbd0>}

In [20]:
p = session.query(PersonView).first()
p

<Person 1: M(52)>

In [21]:
p.domain_violations

[]

In [22]:
wrong_concept = (
    session.query(Concept)
    .filter(Concept.domain_id == "Condition")
    .first()
)
wrong_concept

<omop_alchemy.model.vocabulary.concept.Concept at 0x14d9fa970>

In [23]:
PersonView.collect_domain_rules()

[DomainRule(table='person', field='gender_concept_id', allowed_domains={'Gender'}, allowed_classes=None),
 DomainRule(table='person', field='race_concept_id', allowed_domains={'Race'}, allowed_classes=None),
 DomainRule(table='person', field='ethnicity_concept_id', allowed_domains={'Ethnicity'}, allowed_classes=None)]

In [24]:
p.gender_concept_id = wrong_concept.concept_id

In [25]:
p.is_domain_valid

False

In [26]:
# we can do application-side validation of domain rules 
# tbc if this can be made more efficient at scale to truly support ETL 
# so that we can move it to the base class?
p.domain_violations

["gender_concept_id not in domain(s): ['Gender']"]

In [27]:
# age as a hybrid property
from datetime import date
pv.age

52

In [28]:
pv.age_at(date(2020, 1, 1))

46

In [29]:
# because we are using a hybrid property, we can filter on it in queries - same logic but two execution modes
(
    session.query(PersonView)
    .filter(PersonView.age_at(date(2020, 1, 1)) >= 65)
    .limit(5)
    .all()
)

[<Person 2: F(73)>,
 <Person 4: M(73)>,
 <Person 11: F(75)>,
 <Person 21: F(76)>,
 <Person 38: F(73)>]

In [30]:
# if using the base Person class, we would need to do the age calculation in the query itself
from sqlalchemy import func
on = date(2020, 1, 1)
q = (
    session.query(Person)
    .filter((sa.func.extract("year", sa.literal(on)) - Person.year_of_birth) >= 65)
    .limit(5)
    .all()
)

In [31]:
# this is a trivial example in this case but in the instance of joined elements it can make a big difference in expressiveness / formalism of complex definitions
q

[<Person 2>, <Person 4>, <Person 11>, <Person 21>, <Person 38>]

In [32]:
session.query(PersonView).filter(PersonView.under_observation_on(date(2020, 6, 1))).all()[:5]

[<Person 11: F(75)>,
 <Person 13: F(31)>,
 <Person 15: M(23)>,
 <Person 21: F(76)>,
 <Person 23: F(12)>]

In [33]:
cohort = (
    session.query(PersonView)
    .filter(
        PersonView.age_at(date(2020, 1, 1)) >= 18,
        PersonView.is_deceased == True,
    )
    .limit(10)
    .all()
)

cohort

[<Person 84: F(56)>,
 <Person 192: F(29)>,
 <Person 200: F(49)>,
 <Person 241: M(57)>,
 <Person 244: M(60)>,
 <Person 270: F(43)>,
 <Person 276: M(71)>,
 <Person 334: M(39)>,
 <Person 343: F(25)>,
 <Person 369: F(30)>]

In [34]:
cohort[0].to_dict()

{'person_id': 84,
 'year_of_birth': 1970,
 'month_of_birth': 6,
 'gender_concept_id': 8532,
 'race_concept_id': 38003579,
 'ethnicity_concept_id': 38003563}

In [35]:
cohort[0].death

<omop_alchemy.model.clinical.death.Death at 0x14dba4050>

In [36]:
pv.observation_periods

[<omop_alchemy.model.derived.observation_period.Observation_Period at 0x14d9886e0>]

In [37]:
q = (
    session.query(PersonView)
    .filter(PersonView.first_observation_date >= date(2020, 10, 1))
    .filter(PersonView.last_observation_date <= date(2021, 10, 31))
).all()


In [38]:
len(q)

106

In [39]:
ep = session.query(EpisodeView).first()
ep

<Episode 1: 32533 (2020-10-06)>

In [40]:
ep.episode_concept.concept_name, ep.episode_object_concept.concept_name

('Disease Episode', 'Squamous cell carcinoma, NOS, of branchial cleft')

In [41]:
events = (
    session.query(Episode_EventView)
    .filter(Episode_EventView.episode_id == ep.episode_id)
    .all()
)

# polymorphic relationship to clinical fact tables can be context aware and resolved dynamically
events

[<EpisodeEvent ep=1 Condition_Occurrence#1>,
 <EpisodeEvent ep=1 Measurement#1>,
 <EpisodeEvent ep=1 Measurement#2>,
 <EpisodeEvent ep=1 Measurement#3>]

In [42]:
events[0].event_table

'condition_occurrence'