## V2 OMOP Testing Cohort For Outpatient Referral

In [1]:
##Setting up Google sdk environment
import os 
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/home/wui/.config/gcloud/application_default_credentials.json' 
os.environ['GCLOUD_PROJECT'] = 'som-nero-phi-jonc101' 

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

##Setting up BQ API
from google.cloud import bigquery
client = bigquery.Client()
project_id = 'som-rit-phi-starr-prod'
dataset_id = 'starr_omop_cdm5_deid_latest'



In [13]:
# test cohort with extra primary care referrals (mostly added telephone encounters)
sql = """
    SELECT 
        person_id, 
        PrimaryCare_visit_id,
        PrimaryCare_DATETIME,
        Endo_visit_id as Specialty_visit_id,
        Endo_DATETIME as Specialty_DATETIME
    FROM 
        `som-nero-phi-jonc101.wui_omop_peds.V2_PrimaryCare_Office_Endocrine_2015_2019`
    WHERE 
        Endo_visit_rank = 1
    ORDER BY
        person_id, PrimaryCare_DATETIME
""".format_map({'project_id':project_id, 'dataset_id':dataset_id})

table_id = "som-nero-phi-jonc101.wui_omop_peds.V2_prelim_cohort"
job_config = bigquery.QueryJobConfig(destination=table_id)
client.delete_table(table_id, not_found_ok = True)
query_job = client.query(sql, job_config=job_config)  
query_job.result() 
print("Query results loaded to the table {}".format(table_id))


Query results loaded to the table som-nero-phi-jonc101.wui_omop_peds.V2_prelim_cohort


In [14]:
# since the same patient could have more than 1 endo referral overtime 
# select the earliest referral 
sql = """
WITH 
    rank_cohort AS
        (
        SELECT 
        *,
        ROW_NUMBER() OVER (PARTITION BY person_id ORDER BY PrimaryCare_DATETIME) AS visit_rank
        FROM 
       `som-nero-phi-jonc101.wui_omop_peds.V2_prelim_cohort`
        )
SELECT 
    * 
FROM 
    rank_cohort
WHERE 
    visit_rank = 1
ORDER BY
        person_id, PrimaryCare_DATETIME 
""".format_map({'project_id':project_id, 'dataset_id':dataset_id})

table_id = "som-nero-phi-jonc101.wui_omop_peds.V2_test_cohort"
job_config = bigquery.QueryJobConfig(destination=table_id)
client.delete_table(table_id, not_found_ok = True)
query_job = client.query(sql, job_config=job_config)  
query_job.result() 
print("Query results loaded to the table {}".format(table_id))

Query results loaded to the table som-nero-phi-jonc101.wui_omop_peds.V2_test_cohort


In [15]:
# measurement table
sql = """

WITH measurement_table AS
    (
        SELECT 
            m.person_id,
            m.visit_occurrence_id as visit_id,
            m.measurement_DATETIME,
            m.value_as_number,
            m.range_low,
            m.range_high,
            m.measurement_source_concept_id,
            m.measurement_source_value,
            m.measurement_concept_id,
        FROM 
            `{project_id}.{dataset_id}.measurement` m 
        INNER JOIN 
            `wui_omop_peds.V2_test_cohort` c 
            ON 
                (m.person_id = c.person_id) 
            AND 
                (
                (DATETIME_DIFF(c.PrimaryCare_DATETIME, m.measurement_DATETIME, MONTH) BETWEEN 0 AND 6) 
                 OR 
                (m.visit_occurrence_id = c.Specialty_visit_id)
                )
        LEFT JOIN 
            `{project_id}.{dataset_id}.visit_occurrence` v
            ON
              m.visit_occurrence_id = v.visit_occurrence_id
        WHERE
            v.visit_concept_id IN (0,9202)
    )
    
SELECT
    mt.*,
    c.concept_name
FROM measurement_table mt
    LEFT JOIN 
        `{project_id}.{dataset_id}.concept` c ON mt.measurement_concept_id = c.concept_id
ORDER BY 
    mt.person_id,
    mt.visit_id
""".format_map({'project_id':project_id, 'dataset_id':dataset_id})

table_id = "som-nero-phi-jonc101.wui_omop_peds.V2_test_measurement"
job_config = bigquery.QueryJobConfig(destination=table_id)
client.delete_table(table_id, not_found_ok = True)
query_job = client.query(sql, job_config=job_config)  
query_job.result() 
print("Query results loaded to the table {}".format(table_id))
    

Query results loaded to the table som-nero-phi-jonc101.wui_omop_peds.V2_test_measurement


In [22]:
# procedure table
sql = """

WITH procedure_table AS
    (
        SELECT 
            p.person_id,
            p.visit_occurrence_id as visit_id,
            p.procedure_DATETIME,
            p.procedure_source_concept_id,
            p.procedure_source_value,
            p.procedure_concept_id,
        FROM 
            `{project_id}.{dataset_id}.procedure_occurrence` p 
        INNER JOIN 
            `wui_omop_peds.V2_test_cohort` c 
            ON 
                p.person_id = c.person_id AND
                (
                (DATETIME_DIFF(c.PrimaryCare_DATETIME, p.procedure_DATETIME, MONTH) BETWEEN 0 AND 6) 
                 OR 
                (p.visit_occurrence_id = c.Specialty_visit_id)
                )
                
        LEFT JOIN 
            `{project_id}.{dataset_id}.visit_occurrence` v
            ON
              p.visit_occurrence_id = v.visit_occurrence_id
        WHERE
            v.visit_concept_id IN (0,9202)
    )
    
SELECT
    pt.*,
    c.concept_name
FROM procedure_table pt
    LEFT JOIN 
        `{project_id}.{dataset_id}.concept` c ON pt.procedure_concept_id = c.concept_id
ORDER BY 
    pt.person_id,
    pt.visit_id
""".format_map({'project_id':project_id, 'dataset_id':dataset_id})

table_id = "som-nero-phi-jonc101.wui_omop_peds.V2_test_procedure"
job_config = bigquery.QueryJobConfig(destination=table_id)
client.delete_table(table_id, not_found_ok = True)
query_job = client.query(sql, job_config=job_config)  
query_job.result() 
print("Query results loaded to the table {}".format(table_id))


Query results loaded to the table som-nero-phi-jonc101.wui_omop_peds.V2_test_procedure


In [24]:
# condition table 
sql = """

WITH condition_table AS 
(
        SELECT 
            x.person_id,
            x.visit_occurrence_id as visit_id,
            x.condition_start_DATETIME as condition_DATETIME,
            x.condition_source_concept_id,
            x.condition_source_value,
            x.condition_concept_id
        FROM 
            `{project_id}.{dataset_id}.condition_occurrence` x 
        INNER JOIN 
            `wui_omop_peds.V2_test_cohort` c 
            ON 
                x.person_id = c.person_id AND
                 (
                (DATETIME_DIFF(c.PrimaryCare_DATETIME, x.condition_start_DATETIME, MONTH) BETWEEN 0 AND 6) 
                 OR 
                (x.visit_occurrence_id = c.Specialty_visit_id)
                )
                
        LEFT JOIN 
            `{project_id}.{dataset_id}.visit_occurrence` v
            ON
              x.visit_occurrence_id = v.visit_occurrence_id
        WHERE
            v.visit_concept_id IN (0,9202)
)

SELECT
    t.*,
    c.concept_name
FROM condition_table t
    LEFT JOIN 
        `{project_id}.{dataset_id}.concept` c ON t.condition_concept_id = c.concept_id
ORDER BY 
    t.person_id,
    t.visit_id

""".format_map({'project_id':project_id, 'dataset_id':dataset_id})

table_id = "som-nero-phi-jonc101.wui_omop_peds.V2_test_condition"
job_config = bigquery.QueryJobConfig(destination=table_id)
client.delete_table(table_id, not_found_ok = True)
query_job = client.query(sql, job_config=job_config)  
query_job.result() 
print("Query results loaded to the table {}".format(table_id))

Query results loaded to the table som-nero-phi-jonc101.wui_omop_peds.V2_test_condition


In [25]:
# drug table 

sql = """

WITH drug_table AS 
(
        SELECT 
            x.person_id,
            x.visit_occurrence_id as visit_id,
            x.drug_exposure_start_DATETIME as drug_DATETIME,
            x.drug_exposure_end_DATETIME as drug_end_DATETIME,
            x.drug_exposure_id,
            x.drug_source_concept_id,
            x.drug_source_value,
            x.drug_concept_id
        FROM 
            `{project_id}.{dataset_id}.drug_exposure` x 
        INNER JOIN 
            `wui_omop_peds.V2_test_cohort` c 
            ON 
                x.person_id = c.person_id AND
                (
                (DATETIME_DIFF(c.PrimaryCare_DATETIME, x.drug_exposure_start_DATETIME, MONTH) BETWEEN 0 AND 6) 
                 OR 
                (x.visit_occurrence_id = c.Specialty_visit_id)
                )
                
        LEFT JOIN 
            `{project_id}.{dataset_id}.visit_occurrence` v
            ON
              x.visit_occurrence_id = v.visit_occurrence_id
        WHERE
            v.visit_concept_id IN (0,9202)
)

SELECT
    t.*,
    c.concept_name
FROM drug_table t
    LEFT JOIN 
        `{project_id}.{dataset_id}.concept` c ON t.drug_concept_id = c.concept_id
ORDER BY 
    t.person_id,
    t.visit_id

""".format_map({'project_id':project_id, 'dataset_id':dataset_id})

table_id = "som-nero-phi-jonc101.wui_omop_peds.V2_test_drug"
job_config = bigquery.QueryJobConfig(destination=table_id)
client.delete_table(table_id, not_found_ok = True)
query_job = client.query(sql, job_config=job_config)  
query_job.result() 
print("Query results loaded to the table {}".format(table_id))


Query results loaded to the table som-nero-phi-jonc101.wui_omop_peds.V2_test_drug


In [26]:
# device table 

sql = """

WITH device_table AS 
(
        SELECT 
            x.person_id,
            x.visit_occurrence_id as visit_id,
            x.device_exposure_start_DATETIME as device_DATETIME,
            x.device_exposure_end_DATETIME as device_end_DATETIME,
            x.device_exposure_id,
            x.device_concept_id,
        FROM 
            `{project_id}.{dataset_id}.device_exposure` x 
        INNER JOIN 
            `wui_omop_peds.V2_test_cohort` c 
            ON 
                (x.person_id = c.person_id )
                
                AND
                    (
                    (DATETIME_DIFF(c.PrimaryCare_DATETIME, x.device_exposure_start_DATETIME, MONTH) BETWEEN 0 AND 6) 
                     OR 
                    (x.visit_occurrence_id = c.Specialty_visit_id)
                    )
        
        LEFT JOIN 
            `{project_id}.{dataset_id}.visit_occurrence` v
            ON
              x.visit_occurrence_id = v.visit_occurrence_id
        WHERE
            v.visit_concept_id IN (0,9202)
)

SELECT
    t.*,
    c.concept_name
FROM device_table t
    LEFT JOIN 
        `{project_id}.{dataset_id}.concept` c ON t.device_concept_id = c.concept_id
ORDER BY 
    t.person_id,
    t.visit_id

""".format_map({'project_id':project_id, 'dataset_id':dataset_id})

table_id = "som-nero-phi-jonc101.wui_omop_peds.V2_test_device"
job_config = bigquery.QueryJobConfig(destination=table_id)
client.delete_table(table_id, not_found_ok = True)
query_job = client.query(sql, job_config=job_config)  
query_job.result() 
print("Query results loaded to the table {}".format(table_id))


Query results loaded to the table som-nero-phi-jonc101.wui_omop_peds.V2_test_device


In [4]:
# when rerunning this table on 5/31/2020, there are ~3000 less notes and ~50 fewer distinct patients 
# compared to previous run with the same code

# note table 

sql = """

WITH note_table AS 
(
        SELECT 
            x.person_id,
            x.visit_occurrence_id as visit_id,
            x.note_DATETIME,
            x.note_id,
            x.note_title,
            x.note_text,
            x.note_type_concept_id as note_concept_id
        FROM 
            `{project_id}.{dataset_id}.note` x 
        INNER JOIN 
            `wui_omop_peds.V2_test_cohort` c 
            ON 
                (x.person_id = c.person_id) 
                
                AND
             
                    (
                    (DATETIME_DIFF(c.PrimaryCare_DATETIME, x.note_DATETIME, MONTH) BETWEEN 0 AND 6) 
                     OR 
                    (x.visit_occurrence_id = c.Specialty_visit_id)
                    )
         LEFT JOIN 
            `{project_id}.{dataset_id}.visit_occurrence` v
            ON
              x.visit_occurrence_id = v.visit_occurrence_id
         
         WHERE
            v.visit_concept_id IN (0,9202, 581477) AND
             (lower(x.note_title) LIKE '%progress%' OR
              lower(x.note_title) LIKE '%tele%')
)

SELECT
    t.*,
    c.concept_name
FROM note_table t
    LEFT JOIN 
        `{project_id}.{dataset_id}.concept` c ON t.note_concept_id = c.concept_id
ORDER BY 
    t.person_id,
    t.visit_id
""".format_map({'project_id':project_id, 'dataset_id':dataset_id})

table_id = "som-nero-phi-jonc101.wui_omop_peds.V2_test_note"
job_config = bigquery.QueryJobConfig(destination=table_id)
client.delete_table(table_id, not_found_ok = True)
query_job = client.query(sql, job_config=job_config)  
query_job.result() 
print("Query results loaded to the table {}".format(table_id))


Query results loaded to the table som-nero-phi-jonc101.wui_omop_peds.V2_test_note


In [5]:
# note NLP table 

sql = """

WITH nlp_table AS 
(
        SELECT 
            n.person_id,
            n.visit_id,
            x.note_id,
            x.lexical_variant,
            n.note_title,
            n.note_DATETIME as nlp_DATETIME,
            x.term_exists,
            x.term_modifiers,
            x.note_nlp_concept_id as nlp_concept_id
        FROM 
            `{project_id}.{dataset_id}.note_nlp` x 
        INNER JOIN 
            `wui_omop_peds.V2_test_note` n 
            ON 
                x.note_id = n.note_id 
        WHERE 
            x.term_exists = 'Y'
        
)

SELECT
    t.*,
    c.concept_name
FROM nlp_table t
    LEFT JOIN 
        `{project_id}.{dataset_id}.concept` c ON t.nlp_concept_id = c.concept_id

ORDER BY 
    t.person_id,
    t.visit_id
""".format_map({'project_id':project_id, 'dataset_id':dataset_id})

table_id = "som-nero-phi-jonc101.wui_omop_peds.V2_test_nlp"
job_config = bigquery.QueryJobConfig(destination=table_id)
client.delete_table(table_id, not_found_ok = True)
query_job = client.query(sql, job_config=job_config)  
query_job.result() 
print("Query results loaded to the table {}".format(table_id))

Query results loaded to the table som-nero-phi-jonc101.wui_omop_peds.V2_test_nlp


In [33]:
# person table for patient demographics

sql = """

WITH person_table AS
    (
        SELECT 
            p.person_id,
            p.birth_DATETIME,
            p.gender_concept_id,
            p.race_concept_id,
            p.ethnicity_concept_id
        FROM 
            `{project_id}.{dataset_id}.person` p 
        INNER JOIN 
            (SELECT DISTINCT(person_id) FROM `wui_omop_peds.V2_test_cohort`) c 
            ON 
                p.person_id = c.person_id 
    )
    
SELECT
    pt.person_id,
    pt.birth_DATETIME,
    pt.gender_concept_id,
    c1.concept_name as gender,
    pt.race_concept_id,
    c2.concept_name as race,
    pt.ethnicity_concept_id,
    c3.concept_name as ethnicity
FROM person_table pt
    LEFT JOIN 
        `{project_id}.{dataset_id}.concept` c1 ON pt.gender_concept_id = c1.concept_id
    LEFT JOIN 
        `{project_id}.{dataset_id}.concept` c2 ON pt.race_concept_id = c2.concept_id
    LEFT JOIN 
        `{project_id}.{dataset_id}.concept` c3 ON pt.ethnicity_concept_id = c3.concept_id
ORDER BY 
    pt.person_id
""".format_map({'project_id':project_id, 'dataset_id':dataset_id})

table_id = "som-nero-phi-jonc101.wui_omop_peds.V2_test_demographic"
job_config = bigquery.QueryJobConfig(destination=table_id)
client.delete_table(table_id, not_found_ok = True)
query_job = client.query(sql, job_config=job_config)  
query_job.result() 
print("Query results loaded to the table {}".format(table_id))

Query results loaded to the table som-nero-phi-jonc101.wui_omop_peds.V2_test_demographic
