### V3 OMOP Pediatric Referral

In [55]:
##Setting up Google sdk environment
import os 
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/wip/.config/gcloud/application_default_credentials.json'
os.environ['GCLOUD_PROJECT'] = 'som-nero-phi-jonc101' 

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

##Setting up BQ API
from google.cloud import bigquery
client = bigquery.Client()
project_id = 'som-rit-phi-starr-prod'
dataset_id = 'starr_omop_cdm5_deid_latest'

In [61]:
# defining variables
year_start = '2014'
year_end = '2020'
prefix = 'V3'

save_project_id = 'som-nero-phi-jonc101'
save_dataset_id = 'wui_omop_peds'
map_dataset_id = 'starr_datalake2018'

table_id_outpt = prefix + '_Outpt_Cohort_' + year_start + '_' + year_end
table_id_endorefer =  prefix + '_Endorefer_Cohort_' + year_start + '_' + year_end

format_map_dict = {'project_id': project_id,
                   'dataset_id': dataset_id, 
                   'save_project_id': save_project_id,
                   'save_dataset_id': save_dataset_id,
                   'year_start':year_start,
                   'year_end':year_end,
                   'map_dataset_id': map_dataset_id
                    }


In [62]:
def save_query_table(sql, table_str):
    job_config = bigquery.QueryJobConfig(destination=table_str)
    client.delete_table(table_str, not_found_ok = True)
    query_job = client.query(sql, job_config=job_config)  
    query_job.result() 
    print("Query results loaded to the table {}".format(table_str))

In [58]:
sql = """
WITH 

PedEndoReferral AS
(SELECT 
    pr.person_id,
    pr.visit_occurrence_id,
    pr.procedure_DATETIME as referral_DATETIME,
    pr.procedure_source_value AS referral
 FROM 
     `{project_id}.{dataset_id}.procedure_occurrence` pr 
 LEFT JOIN 
    `{project_id}.{dataset_id}.person` p 
 ON 
     pr.person_id = p.person_id
 WHERE 
    REGEXP_CONTAINS(procedure_source_value, r'(?i)referral') AND
    REGEXP_CONTAINS(procedure_source_value, r'(?i)endo') AND
    (DATETIME_DIFF(pr.procedure_DATETIME, p.birth_DATETIME, DAY) BETWEEN 0 AND 6575)
),

PrimaryCare AS
(SELECT 
    person_id,
    visit_occurrence_id,
    visit_start_DATETIME
    FROM 
        `{project_id}.{dataset_id}.visit_occurrence` v
    LEFT JOIN
        `{project_id}.{dataset_id}.care_site` c 
    ON 
        v.care_site_id = c.care_site_id
    LEFT JOIN 
        `{save_project_id}.{map_dataset_id}.dep_map` m 
    ON 
        CAST(c.care_site_source_value AS NUMERIC) = m.department_id
    WHERE 
        (m.specialty_dep_c NOT IN ('7','81','106')) AND 
        v.visit_concept_id IN (0,9202,581477,5083) AND
        DATETIME_DIFF(v.visit_end_DATETIME, v.visit_start_DATETIME, DAY) = 0 AND
        EXTRACT(YEAR FROM v.visit_start_DATETIME) >= {year_start} AND
        EXTRACT(YEAR FROM v.visit_start_DATETIME) <= {year_end}
),

Endocrine AS 
(SELECT 
    person_id,
    visit_occurrence_id,
    visit_start_DATETIME,
FROM 
    `{project_id}.{dataset_id}.visit_occurrence` v
LEFT JOIN
    `{project_id}.{dataset_id}.care_site` c 
ON 
    v.care_site_id = c.care_site_id
LEFT JOIN 
    `{save_project_id}.{map_dataset_id}.dep_map` m 
ON 
    CAST(c.care_site_source_value AS NUMERIC) = m.department_id
WHERE 
    m.specialty_dep_c in ('7','81','106')
),

Office_Visit AS 
(SELECT visit_occurrence_id FROM `{project_id}.{dataset_id}.note` 
     WHERE note_class_concept_id = 3000735)

SELECT 
    r.person_id,
    r.visit_occurrence_id AS PrimaryCare_visit_id,
    ec.visit_occurrence_id AS Specialty_visit_id,
    r.referral_DATETIME,
    pc.visit_start_DATETIME AS PrimaryCare_DATETIME,
    ec.visit_start_DATETIME AS Specialty_DATETIME, 
    ROW_NUMBER() OVER (PARTITION BY r.visit_occurrence_id ORDER BY ec.visit_start_DATETIME) AS Endo_visit_rank
FROM 
    PedEndoReferral r
INNER JOIN 
    PrimaryCare pc
ON 
    (r.person_id = pc.person_id) AND 
    (r.visit_occurrence_id = pc.visit_occurrence_id)
INNER JOIN
    Endocrine ec
ON 
    (r.person_id = ec.person_id) AND
    (DATETIME_DIFF(ec.visit_start_DATETIME, pc.visit_start_DATETIME, DAY) BETWEEN 0 AND 180)
WHERE
    ec.visit_occurrence_id IN (SELECT * FROM Office_Visit)
ORDER BY
    r.person_id, pc.visit_start_DATETIME
""".format_map(format_map_dict)

table_str = save_project_id + '.' + save_dataset_id + '.' + table_id_endorefer
save_query_table(sql, table_str)

# m.specialty_dep_c in ('32','72') AND 

Query results loaded to the table som-nero-phi-jonc101.wui_omop_peds.V3_Endorefer_Cohort_2014_2020


In [63]:
# All outpatient visits in the same period
sql = """
    SELECT 
        person_id,
        visit_occurrence_id,
        visit_start_DATETIME
    FROM 
        `{project_id}.{dataset_id}.visit_occurrence` v
    WHERE 
        v.visit_concept_id IN (0,9202,581477,5083) AND
        DATETIME_DIFF(v.visit_end_DATETIME, v.visit_start_DATETIME, DAY) = 0 AND
        EXTRACT(YEAR FROM v.visit_start_DATETIME) >= {year_start} AND
        EXTRACT(YEAR FROM v.visit_start_DATETIME) <= {year_end}
""".format_map(format_map_dict)

table_str = save_project_id + '.' + save_dataset_id + '.' + table_id_outpt
save_query_table(sql, table_str)


Query results loaded to the table som-nero-phi-jonc101.wui_omop_peds.V3_Outpt_Cohort_2014_2020
