In [5]:
from pathlib import Path
import pandas as pd
from dotenv import load_dotenv
load_dotenv()
config_path = Path('../dash_config')

In [6]:
subq = pd.read_csv(config_path / 'subquery.csv')
queryr = pd.read_csv(config_path / 'query_rule.csv')
qr_lookup = pd.read_csv(config_path / 'query_rule_map.csv')
phen = pd.read_csv(config_path / 'phenotype.csv')
phen_def = pd.read_csv(config_path / 'phenotype_definition.csv')
meas = pd.read_csv(config_path / 'measure.csv')
meas_rel = pd.read_csv(config_path / 'measure_relationship.csv')
report = pd.read_csv(config_path / 'report.csv')
report_cohorts = pd.read_csv(config_path / 'report_cohort_map.csv')
report_ind = pd.read_csv(config_path / 'report_indicator_map.csv')
report_version = pd.read_csv(config_path / 'report_version.csv')
dash_cohort = pd.read_csv(config_path / 'dash_cohort.csv')
dash_cohort_map = pd.read_csv(config_path / 'dash_cohort_def_map.csv')

In [7]:
subq.subquery_target.value_counts()

subquery_target
dx_stage                         77
dx_primary                       39
meas_concept                      9
proc_concept                      4
tx_surgical                       3
demog_gender                      3
dx_mets                           2
tx_current_episode                2
obs_concept                       2
intent_rt                         1
demog_death                       1
referral_to_specialist_window     1
tx_concurrent                     1
tx_chemotherapy                   1
tx_to_death_window                1
Name: count, dtype: int64

In [8]:
for t in subq.subquery_target.unique():
    print(f"Target: {t}")
    print(subq[subq.subquery_target==t][['subquery_type', 'subquery_temporality']].value_counts())

Target: intent_rt
subquery_type  subquery_temporality
tx_rule        dt_rad                  1
Name: count, dtype: int64
Target: proc_concept
subquery_type  subquery_temporality
proc_rule      dt_proc_start           4
Name: count, dtype: int64
Target: dx_primary
subquery_type  subquery_temporality
dx_rule        dt_current_start        39
Name: count, dtype: int64
Target: tx_surgical
subquery_type  subquery_temporality
tx_rule        dt_surg                 3
Name: count, dtype: int64
Target: dx_mets
subquery_type  subquery_temporality
dx_rule        dt_mets                 2
Name: count, dtype: int64
Target: demog_gender
subquery_type  subquery_temporality
person_rule    dt_any                  3
Name: count, dtype: int64
Target: demog_death
subquery_type  subquery_temporality
person_rule    dt_death                1
Name: count, dtype: int64
Target: tx_current_episode
subquery_type  subquery_temporality
tx_rule        dt_treatment_start      2
Name: count, dtype: int64
Target: obs_c

In [9]:
from omop_constructs.alchemy.modifiers import (
    TStageMV, 
    NStageMV, 
    MStageMV, 
    GroupStageMV, 
    GradeModifierMV, 
    LateralityModifierMV, 
    SizeModifierMV,
    MetastaticDiseaseModifierMV,
    AllStageModifierMV,
    ModifiedCondition, 
)
from omop_constructs.alchemy.modifiers.condition_modifier_mv import (
    StageModifier, 
)   
from omop_constructs.alchemy.events import (
    WeightDxMV,
    WeightChangeDxMV,
    HeightDxMV,
    BSADxMV,
    CreatinineClearanceDxMV,
    EGFRDxMV,
    FEV1DxMV,
    DistressThermometerDxMV,
    ECOGDxMV,
    SmokingPYHDxMV,
)
from omop_constructs.alchemy.episodes import (
    OverarchingDiseaseEpisodeMV, TreatmentRegimenCycleMV, ConditionEpisodeMV
)
from omop_constructs.alchemy.demography import (
    PersonDemography
)
from omop_constructs.core.registry import ConstructRegistry



In [10]:
ALL_MVS = [
    TStageMV, NStageMV, MStageMV, GroupStageMV, 
    GradeModifierMV, LateralityModifierMV, SizeModifierMV, MetastaticDiseaseModifierMV,
    AllStageModifierMV, ModifiedCondition,     
    WeightDxMV, WeightChangeDxMV, HeightDxMV,
    BSADxMV, CreatinineClearanceDxMV, EGFRDxMV, FEV1DxMV,
    DistressThermometerDxMV, ECOGDxMV, SmokingPYHDxMV, 
    StageModifier, OverarchingDiseaseEpisodeMV, TreatmentRegimenCycleMV, 
    ConditionEpisodeMV, PersonDemography
]

mv_registry = ConstructRegistry(ALL_MVS)

In [11]:
for item in mv_registry.plan():
    print(f'{item.name:40s}deps = {item.deps}')

t_stage_mv                              deps = ()
n_stage_mv                              deps = ()
m_stage_mv                              deps = ()
group_stage_mv                          deps = ()
grade_modifier_mv                       deps = ()
laterality_modifier_mv                  deps = ()
size_modifier_mv                        deps = ()
metastatic_disease_modifier_mv          deps = ()
all_stage_modifier_mv                   deps = ()
overarching_disease_episode_mv          deps = ()
treatment_regimen_cycle_mv              deps = ()
condition_episode_mv                    deps = ()
modified_conditions_mv                  deps = ('t_stage_mv', 'n_stage_mv', 'm_stage_mv', 'group_stage_mv', 'grade_modifier_mv', 'size_modifier_mv', 'laterality_modifier_mv', 'metastatic_disease_modifier_mv')
stage_modifier_mv                       deps = ('all_stage_modifier_mv',)
weight_dx_mv                            deps = ('condition_episode_mv',)
weight_change_dx_mv                     deps

In [12]:
print(mv_registry.ascii_dag())

├─ t_stage_mv
│  └─ modified_conditions_mv
├─ n_stage_mv
│  └─ modified_conditions_mv
├─ m_stage_mv
│  └─ modified_conditions_mv
├─ group_stage_mv
│  └─ modified_conditions_mv
├─ grade_modifier_mv
│  └─ modified_conditions_mv
├─ laterality_modifier_mv
│  └─ modified_conditions_mv
├─ size_modifier_mv
│  └─ modified_conditions_mv
├─ metastatic_disease_modifier_mv
│  └─ modified_conditions_mv
├─ all_stage_modifier_mv
│  └─ stage_modifier_mv
├─ overarching_disease_episode_mv
├─ treatment_regimen_cycle_mv
└─ condition_episode_mv
   ├─ weight_dx_mv
   ├─ weight_change_dx_mv
   ├─ height_dx_mv
   ├─ bsa_dx_mv
   ├─ creatinine_clearance_dx_mv
   ├─ egfr_dx_mv
   ├─ fev1_dx_mv
   ├─ dtherm_dx_mv
   ├─ ecog_dx_mv
   ├─ smoking_pyh_dx_mv
   └─ person_demography_mv


In [13]:
from omop_constructs.semantics import registry, registry_engine

with registry_engine.begin() as conn:
    mv_registry.create_all(conn)

In [14]:
from oa_cohorts.measurables import get_measurable_registry
from oa_cohorts.core import RuleTarget, RuleTemporality

registry = get_measurable_registry()
print(registry[RuleTarget.dx_stage].event_date_col())

StagedConditionMeasurable.stage_date


In [15]:
from oa_cohorts.query.subquery import Subquery, subquery_rule_map
from oa_cohorts.query.query_rule import QueryRule, ExactRule
from oa_cohorts.query.phenotype import PhenotypeDefinition, Phenotype

In [16]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from orm_loader.helpers import Base  

Base.metadata.create_all(registry_engine)
Session = sessionmaker(bind=registry_engine, future=True)

In [17]:
import sqlalchemy.orm as so
with Session() as session:
    # Load ORM object
    subq_model = session.get(Subquery, 1)
    if subq_model is None:
        raise ValueError("Subquery with id=1 not found")

    # Compile SQL
    query = subq_model.select()
    print(query.compile(compile_kwargs={"literal_binds": True}))

SELECT stage_modifier_mv.person_id AS person_id, stage_modifier_mv.condition_episode AS episode_id, stage_modifier_mv.condition_episode AS measure_resolver, stage_modifier_mv.stage_date AS measure_date 
FROM stage_modifier_mv 
WHERE stage_modifier_mv.stage_concept_id IN (1633330, 1633355, 1633458, 1633542, 1633642, 1633652, 1633682, 1633707, 1633712, 1633713, 1633750, 1633754, 1633800, 1633828, 1633833, 1633852, 1634031, 1634049, 1634084, 1634172, 1634231, 1634242, 1634336, 1634359, 1634380, 1634408, 1634502, 1634531, 1634564, 1634592, 1634605, 1634686, 1634741, 1634777, 1634785, 1634787, 1634809, 1634848, 1634873, 1634946, 1634955, 1634956, 1634969, 1634991, 1635013, 1635189, 1635223, 1635268, 1635281, 1635369, 1635398, 1635410, 1635413, 1635434, 1635439, 1635511, 1635525, 1635577, 1635657, 1635658, 1635675, 1635691, 1635749, 1635754, 1635824, 1635842, 1635865)


In [18]:
queryr.query_matcher.value_counts()

query_matcher
exact                 627
hierarchy             227
substring             154
hierarchyexclusion     23
presence                8
scalar                  8
absence                 2
phenotype               1
Name: count, dtype: int64

In [19]:
def clean_dict(d, model_cls) -> dict[str, any]:
    keys = model_cls.__table__.columns.keys()
    return {
        k: v for k, v in d.items() if not pd.isna(v) and k in keys
    }

subqueries = [
    Subquery(**clean_dict(d, Subquery)) for d in subq.rename(
    columns={
        'subquery_target': 'target',   
        'subquery_temporality': 'temporality',
        'subquery_name': 'name',
        'subquery_short_name': 'short_name',
    }
).to_dict(orient='records')]

In [20]:
from oa_cohorts.core.enums import RuleMatcher
queryr['query_matcher_obj'] = queryr.query_matcher.map(lambda x: getattr(RuleMatcher, x) if not pd.isna(x) else None)

In [21]:
getattr(RuleMatcher, 'hierarchy')

<RuleMatcher.hierarchy: 'hierarchy'>

In [22]:
queryrules = [
    QueryRule(**clean_dict(d, QueryRule)) for d in queryr.rename(
    columns={
        'query_matcher_obj': 'matcher',
        'query_concept_id': 'concept_id',
        'query_notes': 'notes',
        'scalar_threshold': 'scalar_threshold'
 }
).to_dict(orient='records')]

In [23]:

for qr in queryrules:
    qr.matcher = getattr(RuleMatcher, qr.matcher)

In [24]:
queryr["query_matcher_obj"].dtype

<StringDtype(na_value=nan)>

In [25]:
# with Session() as session:
#     session.add_all(subqueries)
#     session.add_all(queryrules)
#     session.commit()

In [26]:
# rows = qr_lookup.to_dict(orient="records")

In [27]:
# with Session() as session:
#     session.execute(subquery_rule_map.insert(), rows)
#     session.commit()

In [28]:
s = session.get(Subquery, 85) 

In [29]:
s

Field,Value
ID,85
Name,Curative RT
Short name,curative_rt
Target,intent_rt
Temporality,dt_rad
Rule count,1

Field,Value
Type,presence
Concept,Curative - procedure intent (4162591)


In [30]:
phen.head()

phenotypes = [
    Phenotype(**clean_dict(d, Phenotype)) for d in phen.to_dict(orient='records')
]

phenotype_defs = [
    PhenotypeDefinition(**clean_dict(d, PhenotypeDefinition)) for d in phen_def.to_dict(orient='records')
]

In [31]:
# with Session() as session:
#     session.add_all(phenotypes)
#     session.add_all(phenotype_defs)
#     session.commit()

In [32]:
from omop_alchemy.cdm.model import Concept
with Session() as session:
    concepts = pd.DataFrame(
            session.query(
                *Concept.__table__.columns
            )
            .filter(Concept.concept_id.in_(list(set(phen_def.query_concept_id))))
    )

In [33]:
phen_def[~phen_def.query_concept_id.isin(concepts.concept_id)]

Unnamed: 0,phenotype_id,query_concept_id


In [34]:
','.join([str(v) for v in concepts[concepts.concept_class_id == 'Disorder'].iloc[0].to_dict().keys()])

'concept_id,concept_name,domain_id,vocabulary_id,concept_class_id,standard_concept,concept_code,valid_start_date,valid_end_date,invalid_reason'

In [None]:
#(43054909,'Tobacco smoking status','Observation','LOINC','Clinical Observation','S','72166-2','2013-09-01','2099-12-31','')

In [None]:
pheno = session.get(Phenotype, 1)
pheno

In [None]:
qr = session.get(QueryRule, 1)


In [None]:
import sqlalchemy as sa
stmt = (
    sa.select(QueryRule)
    .distinct(QueryRule.matcher)
    .order_by(QueryRule.matcher, QueryRule.query_rule_id)
)

rules = session.execute(stmt).scalars().all()
rules

In [None]:
from IPython.display import display

for r in rules:
    display(r)

In [None]:

stmt = (
    sa.select(Subquery)
    .join(Subquery.rules)
    .where(QueryRule.matcher.in_([
        RuleMatcher.exact,
        RuleMatcher.hierarchy,
        RuleMatcher.hierarchyexclusion,
        RuleMatcher.presence,
        RuleMatcher.absence,
        RuleMatcher.scalar,
        RuleMatcher.phenotype,
        RuleMatcher.substring,
    ]))
    .distinct(Subquery.subquery_id, QueryRule.matcher)
    .limit(10)
)

subqueries = session.scalars(stmt).all()
subqueries

In [None]:

for s in subqueries:
    display(s)

In [None]:
meas.head()

In [None]:
from oa_cohorts.query.measure import Measure, MeasureRelationship

In [None]:
measures = [
    Measure(**clean_dict(d, Measure)) for d in meas.rename(
    columns={
        'measure_name': 'name',
        'measure_combination': 'combination'
    }
).to_dict(orient='records')]

In [None]:
from oa_cohorts.core.enums import RuleCombination

for m in measures:
    m.combination = RuleCombination(m.combination.replace('rule_', ''))
    m.person_ep_override = m.person_ep_override == 't'

In [None]:
# with Session() as session:
#     session.add_all(measures)
#     session.commit()

In [None]:
measure_relationships = [
    MeasureRelationship(**clean_dict(d, MeasureRelationship)) for d in meas_rel.to_dict(orient='records')
]

In [None]:
# with Session() as session:
#     session.add_all(measure_relationships)
#     session.commit()

In [None]:
s = session.get(Measure, 5) 

In [None]:
s