In [1]:
import omop_alchemy as oa
from omop_alchemy.helpers.populate_db import populate_demo_db, to_load_vocabulary, to_load_clinical, to_load_health_system

In [2]:
# make sure to download a small athena zip file (suggested no more than 2-4 vocabs for demo purposes)
# and unzip to omop_alchemy/resources/ohdsi_vocabs before running this script. 

populate_demo_db(to_load_health_system)

/Users/georginakennedy/cloudstor/CBDRH/ACDN/OMOP_Alchemy/omop_alchemy/resources/demo_data
complete load for: CARE_SITE.csv
complete load for: LOCATION.csv
complete load for: PROVIDER.csv


In [3]:

populate_demo_db(to_load_clinical)

/Users/georginakennedy/cloudstor/CBDRH/ACDN/OMOP_Alchemy/omop_alchemy/resources/demo_data
Error loading data file PERSON.csv. Have you unzipped it in the correct location (/Users/georginakennedy/cloudstor/CBDRH/ACDN/OMOP_Alchemy/omop_alchemy/resources)?
complete load for: CONDITION_OCCURRENCE.csv
complete load for: MEASUREMENT.csv


In [4]:
populate_demo_db(to_load_vocabulary)

/Users/georginakennedy/cloudstor/CBDRH/ACDN/OMOP_Alchemy/omop_alchemy/resources/ohdsi_vocabs
complete load for: VOCABULARY.csv
complete load for: CONCEPT.csv
complete load for: CONCEPT_CLASS.csv
complete load for: DOMAIN.csv
complete load for: RELATIONSHIP.csv


In [None]:

import sqlalchemy.orm as so
import sqlalchemy.sql.sqltypes as sss
from omop_alchemy.db.config import engine, config
from omop_alchemy.tables.clinical import Person, Condition_Occurrence, Measurement, Observation

with so.Session(engine) as sess:
    people = sess.query(Person
                        ).options(so.joinedload(Person.conditions, 
                                                Condition_Occurrence.condition_concept)
                        ).options(so.joinedload(Person.conditions,
                                                Condition_Occurrence.modifiers,
                                                Measurement.measurement_concept)
                        ).options(so.joinedload(Person.conditions,
                                                Condition_Occurrence.related_obs,
                                                Observation.observation_concept)).all()

In [None]:
[[(c.condition_concept.concept_name, [m.measurement_concept for m in c.modifiers]) for c in p.conditions] for p in people]

In [None]:
[[(c.condition_concept.concept_name, [m.observation_concept for m in c.related_obs]) for c in p.conditions] for p in people]

In [None]:
from omop_alchemy.tables.clinical import Person, Condition_Occurrence, Measurement, Observation
from omop_alchemy.tables.vocabulary import Concept, Concept_Relationship

Regimen = so.aliased(Concept, name='regimen')
Drugs = so.aliased(Concept, name='drugs')

with so.Session(engine) as sess:

        query = sess.query(Regimen.concept_id.label('regimen_id'),
                          Regimen.concept_name.label('regimen_name'),
                          Concept_Relationship.relationship_id,
                          Drugs.concept_id.label('drug_id'),
                          Drugs.concept_name.label('drug_name')
                          ).join(Concept_Relationship, Concept_Relationship.concept_id_1==Regimen.concept_id
                          ).join(Drugs, Concept_Relationship.concept_id_2==Drugs.concept_id
                          ).filter(Regimen.vocabulary_id=='HemOnc'
                          ).filter(Regimen.concept_class_id=='Regimen'
                          ).filter(Drugs.vocabulary_id=='HemOnc'
                          ).filter(Drugs.concept_class_id=='Component'
                          ).order_by(Regimen.concept_id.desc())
        r = query.all()

In [None]:
r

In [None]:
import csv
from pathlib import Path
from datetime import datetime

import sqlalchemy as sa
import sqlalchemy.orm as so
import sqlalchemy.sql.sqltypes as sss

from omop_alchemy.db.config import engine, config
from omop_alchemy.tables.clinical import Person, Condition_Occurrence, Measurement
from omop_alchemy.tables.health_system import Care_Site, Location, Provider
from omop_alchemy.tables.vocabulary import Concept, Vocabulary, Concept_Class, Domain, Relationship, \
                                Concept_Relationship, Concept_Ancestor

# TODO: insert some validation and checks to make sure folk know how and why to do this (and the limits for demo purposes)

to_load_vocabulary = {'folder': 'ohdsi_vocabs',
                      'VOCABULARY.csv': Vocabulary, 
                      'CONCEPT.csv': Concept, 
                      'CONCEPT_CLASS.csv': Concept_Class, 
                      'DOMAIN.csv': Domain,
                      'RELATIONSHIP.csv': Relationship}
                      #'CONCEPT_RELATIONSHIP.csv': Concept_Relationship,
                      #'CONCEPT_ANCESTOR.csv': Concept_Ancestor}

to_load_health_system = {'folder': 'demo_data',
                         'CARE_SITE.csv': Care_Site,
                         'LOCATION.csv': Location,
                         'PROVIDER.csv': Provider}

to_load_clinical = {'folder': 'demo_data',
                    'PERSON.csv': Person,
                    'CONDITION_OCCURRENCE.csv': Condition_Occurrence,
                    'MEASUREMENT.csv': Measurement}#,
                    #'CONCEPT.csv': Concept}

# flexible loading of ohdsi vocab files downloaded to the path /data/ohdsi_vocabs

def datetime_conversion(dt, fmt):
    if dt != '':
        return datetime.strptime(dt, fmt)
    
def convert_date_col(dt):
    return datetime_conversion(dt, '%Y%m%d')
    
def convert_time_col(dt):
    return datetime_conversion(dt, '%H%M%S')

def convert_datetime_col(dt):
    return datetime_conversion(dt, '%Y%m%d%H%M%S')

def callable_pass(s):
    return s

def convert_int(i):
    try:
        return int(i)
    except:
        return 0
    
def convert_dec(i):
    try:
        return Decimal(i)
    except:
        return 0

type_map = {sss.BigInteger: convert_int, 
            sss.Integer: convert_int, 
            sss.Numeric: convert_dec, 
            sss.DateTime: convert_datetime_col, 
            sss.Time: convert_time_col, 
            sss.String: callable_pass, 
            sss.Date: convert_date_col}

def get_type_lookup(interface):
    return {c.key: type_map[type(c.type)] for c in interface.__table__._columns}



to_load = to_load_clinical

with so.Session(engine) as sess:
    folder = Path(config.VOCAB_PATH) / to_load['folder']
    print(folder)
    for ohdsi_file, interface in to_load.items():
        if interface != folder.name:
                with open(folder / ohdsi_file, 'r') as file:
                    reader = csv.DictReader(file, delimiter='\t')
                    field_map = get_type_lookup(interface)
                    
                    for row in reader:
                        record = {field:field_map[field](data) for field, data in row.items() if field in field_map}
                        o = interface(**record)
                        sess.add(o)

                    print(f'complete load for: {ohdsi_file}')
            
    sess.commit()


In [None]:
for m in Person.registry.mappers:
    print(m.class_)
    print('\t', m.c.keys())

In [None]:
Person.gender_concept.expression