### V2 OMOP Peds Clinical Items Prevalence Outpatient 

In [None]:
# get counts (number of patients, visits and instances) per each clinical items
# in all outpatient pediatric visits 2015-2019

table_list =  [('measurement','measurement_concept_id'),
                 ('procedure_occurrence','procedure_concept_id'),
                 ('drug_exposure','drug_concept_id'),
                 ('condition_occurrence','condition_concept_id'),
                 ('device_exposure','device_concept_id')]

for t in table_list:
    (table, concept) = t
    
    sql = """
            WITH itemAllOutpt AS (        
                    SELECT 
                        x.person_id,
                        x.visit_occurrence_id,
                        x.{concept_id} as item_concept_id
                    FROM 
                        `som-rit-phi-starr-prod.starr_omop_cdm5_deid_latest.{table}` x
                    INNER JOIN 
                        `wui_omop_peds.V2_Outpt_Visit_2015_2019` c 
                        ON 
                            (x.person_id = c.person_id) AND
                            (x.visit_occurrence_id = c.visit_occurrence_id)
                  )

            SELECT item_concept_id,
                   COUNT(DISTINCT(person_id)) as num_pt, 
                   COUNT(DISTINCT(visit_occurrence_id)) as num_visit,
                   COUNT(*) as instance
            FROM itemAllOutpt 
            GROUP BY item_concept_id 
            ORDER BY num_pt DESC, num_visit DESC, instance DESC

    """.format_map({'concept_id':concept, 'table': table })

    table_id = "som-nero-phi-jonc101.wui_omop_peds.V2_count_" + table
    job_config = bigquery.QueryJobConfig(destination=table_id)
    client.delete_table(table_id, not_found_ok = True)
    query_job = client.query(sql, job_config=job_config)  
    query_job.result() 
    print("Query results loaded to the table {}".format(table_id))

In [None]:
from bigQueryUtil import BigQueryClient 

bq = BigQueryClient(project_id = 'som-nero-phi-jonc101', dataset_id = 'wui_omop_peds')

# read the above table in a python dictionary format 
table_list =  ['measurement','procedure_occurrence','drug_exposure',
                'condition_occurrence','device_exposure','nlp']
df_list = []
for t in table_list:
    print('reading...{}'.format(t))
    df = bq.readBQFile(tableName='V2_count_' + t)
    df_list.append(df)

In [None]:
import pandas as pd
combinedDF = pd.concat([df_list[0],df_list[1],df_list[2],df_list[3],df_list[4]])
# excluding NLP    

In [None]:
newDF = combinedDF[combinedDF['item_concept_id']!=0]

In [None]:
N = 445308 # total number of patients from V2_Outpt_Visit_2015_2019

In [None]:
newDF["prevalence"] = newDF["num_pt"].apply(lambda x : x*100/N)

In [None]:
newDF = newDF[["item_concept_id","prevalence"]]

In [None]:
newDF.sort_values(by="prevalence",ascending=False)
x = newDF.set_index("item_concept_id").to_dict()

In [None]:
prevalence_map = x['prevalence']


In [None]:
import json
with open('item_prevalence_map.json', 'w') as fp:
    json.dump(prevalence_map, fp)