In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pulp import *
import pandas as pd
import os, glob
import seaborn as sns
from scipy.stats import kruskal
import scikit_posthocs as sp
from scipy.stats import mannwhitneyu
from dotenv import load_dotenv
load_dotenv() 

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/fa/.config/gcloud/application_default_credentials.json'
os.environ['GCLOUD_PROJECT'] = 'som-nero-phi-jonc101'
%load_ext google.cloud.bigquery

from google.cloud import bigquery
client=bigquery.Client()
from google.cloud import bigquery_storage_v1

## Study Cohort

In [2]:
%%bigquery df_ED
#Create or replace table som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS 
WITH base_shc AS (
    SELECT DISTINCT
        op.anon_id, 
        op.pat_enc_csn_id_coded,
        op.order_proc_id_coded, 
        op.order_time_jittered,
        EXTRACT(YEAR FROM op.order_time_jittered) as order_year,
        op.ordering_mode,
        op.department_id
    FROM 
        `som-nero-phi-jonc101.shc_core_2023.order_proc` op
    INNER JOIN
        `som-nero-phi-jonc101.shc_core_2023.lab_result` lr
    ON
        op.order_proc_id_coded = lr.order_id_coded
    WHERE
        op.order_type LIKE "Microbiology%"
        AND op.description LIKE "%BLOOD%"
        AND NOT op.order_status LIKE ANY ('Discontinued', 'Canceled')
), 
ED_orders_shc AS (
    SELECT 
        b.anon_id,
        b.pat_enc_csn_id_coded,
        b.order_proc_id_coded,
        b.order_time_jittered AS blood_culture_order_datetime,
        b.order_year,
        b.ordering_mode,
        b.department_id,
        department_name
    FROM base_shc b 
    INNER JOIN `som-nero-phi-jonc101.shc_core_2023.dep_map` using(department_id)
    WHERE LOWER(department_name) LIKE '%emergency%'
), 
ED_arrival_orders_shc AS (
    SELECT 
        b.anon_id,
        b.pat_enc_csn_id_coded,
        b.order_proc_id_coded,
        b.blood_culture_order_datetime,
        b.order_year,
        ad.effective_time_jittered AS ed_arrival_datetime
    FROM 
        ED_orders_shc b
    INNER JOIN 
        `som-nero-phi-jonc101.shc_core_2023.adt` ad
    USING 
        (anon_id, pat_enc_csn_id_coded, department_id)
    WHERE event_type = 'Admission'
    AND b.blood_culture_order_datetime >= ad.effective_time_jittered
), 
ED_orders_shc_peds AS (
    SELECT 
        b.anon_id,
        b.pat_enc_csn_id_coded,
        b.order_proc_id_coded,
        b.blood_culture_order_datetime,
        d.birth_date_jittered,
        d.bmi
    FROM 
        ED_orders_shc b
    INNER JOIN 
        `som-nero-phi-jonc101.shc_core_2023.demographic` d
    USING (anon_id)
    WHERE DATE_DIFF(DATE(b.blood_culture_order_datetime), DATE(d.birth_date_jittered), DAY) / 365 <= 18
), 
base_lpch AS (
    SELECT DISTINCT
        op.anon_id, 
        op.pat_enc_csn_id_coded,
        op.order_proc_id_coded, 
        op.order_time_jittered,
        EXTRACT(YEAR FROM op.order_time_jittered) AS order_year,
        op.ordering_mode,
        op.department_id
    FROM 
        `som-nero-phi-jonc101.lpch_core_2023.lpch_order_proc` op
    INNER JOIN
        `som-nero-phi-jonc101.lpch_core_2023.lpch_lab_result` lr
    ON
        op.order_proc_id_coded = lr.order_id_coded
    WHERE
        op.order_type LIKE "Microbiology%"
        AND op.description LIKE "%BLOOD%"
        AND NOT op.order_status LIKE ANY ('Discontinued', 'Canceled')
), 
ED_orders_lpch AS (
    SELECT 
        b.anon_id,
        b.pat_enc_csn_id_coded,
        b.order_proc_id_coded,
        b.order_time_jittered AS blood_culture_order_datetime,
        b.order_year,
        b.ordering_mode,
        b.department_id,
        department_name
    FROM base_lpch b 
    INNER JOIN `som-nero-phi-jonc101.lpch_core_2023.lpch_dep_map` USING(department_id)
    WHERE LOWER(department_name) LIKE '%emergency%'
), 
ED_orders_lpch_peds AS (
    SELECT 
        b.anon_id,
        b.pat_enc_csn_id_coded,
        b.order_proc_id_coded,
        b.blood_culture_order_datetime,
        d.birth_date_jittered,
        d.bmi
    FROM 
        ED_orders_lpch b
    INNER JOIN 
        `som-nero-phi-jonc101.lpch_core_2023.lpch_demographic` d
    USING (anon_id)
    WHERE DATE_DIFF(DATE(b.blood_culture_order_datetime), DATE(d.birth_date_jittered), DAY) / 365 <= 18
)

SELECT * FROM ED_orders_shc_peds
UNION ALL 
SELECT * FROM ED_orders_lpch_peds

Query is running:   0%|          |

Downloading:   0%|          |

In [3]:
df_ED.head()

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,blood_culture_order_datetime,birth_date_jittered,bmi
0,JC2088164,316082836,726631078,2017-03-24 11:58:00,2017-02-11,
1,JC2259581,310449441,706071771,2014-11-17 15:42:00,2011-06-01,22.24
2,JC2259581,310575279,706657434,2014-12-12 18:15:00,2011-06-01,22.24
3,JC2259581,311105805,708558655,2015-03-08 07:25:00,2011-06-01,22.24
4,JC2259581,311233437,709249585,2015-03-28 17:40:00,2011-06-01,22.24


## Labeling

In [103]:
%%bigquery df_ed_labels
Create or replace table som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS
with all_labs as (
   select anon_id,pat_enc_csn_id_coded,order_id_coded,order_time_jittered,ord_value,extended_value_comment,extended_comp_comment from `som-nero-phi-jonc101.lpch_core_2023.lpch_lab_result`
where anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`)    
union all 
     select anon_id,pat_enc_csn_id_coded,order_id_coded,order_time_jittered,ord_value,extended_value_comment,extended_comp_comment from  `som-nero-phi-jonc101.shc_core_2023.lab_result`
where anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`)
),
all_sensitivity as (
    select anon_id,order_proc_id_coded, organism  from `som-nero-phi-jonc101.shc_core_2023.culture_sensitivity`
where anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`)
    union all
    select anon_id,order_proc_id_coded, organism from `som-nero-phi-jonc101.lpch_core_2023.lpch_culture_sensitivity`
where anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`)
),
culture as ( 
    SELECT DISTINCT
         c.*,
         lr.ord_value,
         coalesce(lr.extended_value_comment,lr.extended_comp_comment) as comment,
    FROM 
        `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c
    INNER JOIN
       all_labs lr
       on c.anon_id=lr.anon_id 
          and c.order_proc_id_coded=lr.order_id_coded
          and c.pat_enc_csn_id_coded=lr.pat_enc_csn_id_coded
          and c.blood_culture_order_datetime=lr.order_time_jittered
    ),
culture_growth as (
select op.anon_id,
       op.pat_enc_csn_id_coded,
       op.order_proc_id_coded,
       1 as was_pos,
from culture op  
                INNER join (SELECT DISTINCT anon_id,order_proc_id_coded, organism
                        FROM all_sensitivity
                        where organism is not null) cs USING (anon_id,order_proc_id_coded) 
WHERE not lower(ord_value) like any ("%no%grow%","%not%detect%","negative")
     AND not upper(comment) like any ('%NO%GROWTH%','%COAG%NEG%STAPH%','%GRAM%+%RODS%','%GRAM%POS%RODS%','%CONTAMIN%')
)
select c.*,
case when (cg.was_pos=1) then cg.was_pos else 0 end as positive_blood_culture 
from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c LEFT JOIN culture_growth cg 
using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)

Query is running:   0%|          |

In [104]:
%%bigquery df_ed_labels
Create or replace table  som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS
With order_posetive_next_week AS (
   SELECT DISTINCT
         c.anon_id,
         c.order_proc_id_coded,
         1 as positive_blood_culture_in_week
    FROM 
     som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort c 
     INNER JOIN
        som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort c2
    ON
        c.anon_id = c2.anon_id
    WHERE
        c2.positive_blood_culture=1
        AND TIMESTAMP_DIFF(c2.blood_culture_order_datetime,c.blood_culture_order_datetime, Hour) between 0 and 168
        group by anon_id,order_proc_id_coded
)
select c.*,
case when o.positive_blood_culture_in_week=1 then o.positive_blood_culture_in_week else 0 end as positive_blood_culture_in_week
from  som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort c 
left join order_posetive_next_week o 
using(anon_id,order_proc_id_coded)

Query is running:   0%|          |

## Earlist IV antibiotic

In [105]:
%%bigquery df_ed_labels
Create or replace table som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS
With AllorderMeds as (
      select  anon_id,
              pat_enc_csn_id_coded,
              med_description,
              order_start_time_jittered,
      FROM
      `som-nero-phi-jonc101.shc_core_2023.order_med`
    WHERE
      thera_class_name IN ('ANTIBIOTICS')
      AND LOWER(med_route) = 'intravenous'
      AND  anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`) 
union all
    select    anon_id,
              pat_enc_csn_id_coded,
              med_description,
              order_start_time_jittered,
      FROM
      `som-nero-phi-jonc101.lpch_core_2023.lpch_order_med`
    WHERE
      thera_class_name IN ('ANTIBIOTICS')
      AND LOWER(med_route) = 'intravenous'
       AND  anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`) 
),
ED_ABX as (
SELECT
  c.anon_id,
  c.pat_enc_csn_id_coded,
  c.order_proc_id_coded,
  #o.med_description,
  o.order_start_time_jittered AS earliest_iv_antibiotic_datetime
FROM
  `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c
INNER JOIN (
    SELECT
      anon_id,
      pat_enc_csn_id_coded,
      med_description,
      order_start_time_jittered,
      RANK() OVER (PARTITION BY anon_id, pat_enc_csn_id_coded ORDER BY order_start_time_jittered ASC) as rank
    FROM
     AllorderMeds
) o ON c.anon_id = o.anon_id AND c.pat_enc_csn_id_coded = o.pat_enc_csn_id_coded
WHERE
  o.rank = 1
  AND TIMESTAMP_DIFF(o.order_start_time_jittered, c.blood_culture_order_datetime, HOUR) BETWEEN -4 AND 24
GROUP BY
  c.anon_id, c.pat_enc_csn_id_coded, c.order_proc_id_coded, o.med_description, o.order_start_time_jittered 
)
select c.*,
ea.earliest_iv_antibiotic_datetime,
#ea.earliest_iv_antibiotic
from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c 
left join ED_ABX ea using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)
group by c.anon_id,
c.pat_enc_csn_id_coded,
c.birth_date_jittered,
c.order_proc_id_coded,
c.blood_culture_order_datetime,
c.positive_blood_culture,
c.positive_blood_culture_in_week,
ea.earliest_iv_antibiotic_datetime,
c.bmi

Query is running:   0%|          |

In [67]:
df_ed_labels[(df_ed_labels.positive_blood_culture==1)|(df_ed_labels.positive_blood_culture_in_week==1)][['anon_id','pat_enc_csn_id_coded','order_proc_id_coded']].drop_duplicates().shape

(1086, 3)

## Vitals

In [119]:
%%bigquery df_ed_features
Create or replace table som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS
WITH all_flowsheets as (
    select anon_id,
    recorded_time_jittered,
    row_disp_name,
    SAFE_CAST(numerical_val_1 AS NUMERIC) AS numerical_val_1,
    SAFE_CAST(numerical_val_2 AS NUMERIC) AS numerical_val_2,
    from `som-nero-phi-jonc101.shc_core_2023.flowsheet` 
     WHERE
   ((upper(trim(row_disp_name)) IN ('PULSE', 'HEART RATE') AND SAFE_CAST(numerical_val_1 AS numeric) >= 30) OR -- Heart rate
   (upper(trim(row_disp_name)) in ('RESP', 'RESP RATE') AND SAFE_CAST(numerical_val_1 AS numeric) >= 4 AND SAFE_CAST(numerical_val_1 AS numeric) <= 60) OR -- Respiratory rate
   (upper(trim(row_disp_name)) IN ('TEMP') AND SAFE_CAST(numerical_val_1 AS numeric) >= 90) OR -- Temperature in F
   (upper(trim(row_disp_name)) IN ('WEIGHT') AND SAFE_CAST(numerical_val_1 AS numeric) >= 480 AND SAFE_CAST(numerical_val_1 AS numeric) <= 8000)  OR -- Weight 
  (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_1 AS numeric) >= 40) OR -- Systolic BP
   (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_2 AS numeric) >= 30) -- diastolic BP
  )
   and anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`) 
union all
    select anon_id,
    recorded_time_jittered,
    row_disp_name, 
    numerical_val_1,
    numerical_val_2  
    from `som-nero-phi-jonc101.lpch_core_2023.lpch_flowsheet_numerical_measurment`
 WHERE
   ((upper(trim(row_disp_name)) IN ('PULSE', 'HEART RATE') AND SAFE_CAST(numerical_val_1 AS numeric) >= 30) OR -- Heart rate
   (upper(trim(row_disp_name)) in ('RESP', 'RESP RATE') AND SAFE_CAST(numerical_val_1 AS numeric) >= 4 AND SAFE_CAST(numerical_val_1 AS numeric) <= 60) OR -- Respiratory rate
   (upper(trim(row_disp_name)) IN ('TEMP') AND SAFE_CAST(numerical_val_1 AS numeric) >= 90) OR -- Temperature in F
   (upper(trim(row_disp_name)) IN ('WEIGHT') AND SAFE_CAST(numerical_val_1 AS numeric) >= 480 AND SAFE_CAST(numerical_val_1 AS numeric) <= 8000)  OR -- Weight 
  (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_1 AS numeric) >= 40) OR -- Systolic BP
   (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_2 AS numeric) >= 30) -- diastolic BP
  )
    and anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`) 
),
vitals as (
  SELECT c.anon_id,
         c.pat_enc_csn_id_coded,
         c.order_proc_id_coded,
  vitals.recorded_time_jittered,
  CASE WHEN upper(row_disp_name) IN ('PULSE', 'HEART RATE') THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as heartrate,
  CASE WHEN upper(row_disp_name) IN ('RESP', 'RESP RATE') THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as resprate,
  CASE WHEN upper(row_disp_name) IN ('TEMP') THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as temp,
  CASE WHEN upper(row_disp_name) IN ('WEIGHT') THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as weight,
    
  CASE WHEN (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_1 AS numeric) >= 40) THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as sysbp ,
  CASE WHEN (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_2 AS numeric) >= 30)  THEN round(SAFE_CAST(numerical_val_2 AS FLOAT64),2) end as diasbp,
  FROM
     `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c LEFT JOIN
 all_flowsheets as vitals
   ON vitals.anon_id = c.anon_id
  WHERE
  (TIMESTAMP_DIFF(vitals.recorded_time_jittered, c.blood_culture_order_datetime, hour) between -24 and 0 )
),
vital_records as (
select anon_id,
       pat_enc_csn_id_coded,
       order_proc_id_coded,
       ROUND(min(heartrate),2) as min_heartrate,
       ROUND(max(heartrate),2) as max_heartrate,
       ROUND(avg(heartrate),2) as avg_heartrate,
       ROUND(APPROX_QUANTILES(heartrate, 100)[OFFSET(50)],2) AS median_heartrate,
       ROUND(min(resprate),2) as min_resprate,
       ROUND(max(resprate),2) as max_resprate,
       ROUND(avg(resprate),2) as avg_resprate,
       ROUND(APPROX_QUANTILES(resprate, 100)[OFFSET(50)],2) AS median_resprate,
       ROUND(min(temp),2) as min_temp,
       ROUND(max(temp),2) as max_temp,
       ROUND(avg(temp),2) as avg_temp,
       ROUND(APPROX_QUANTILES(temp, 100)[OFFSET(50)],2) AS median_temp,
       ROUND(min(weight),2) as min_weight,
       ROUND(max(weight),2) as max_weight,
       ROUND(avg(weight),2) as avg_weight,
       ROUND(APPROX_QUANTILES(weight, 100)[OFFSET(50)],2) AS median_weight,
       ROUND(min(sysbp),2) as min_sysbp,
       ROUND(max(sysbp),2) as max_sysbp,
       ROUND(avg(sysbp),2) as avg_sysbp,
       ROUND(APPROX_QUANTILES(sysbp, 100)[OFFSET(50)],2) AS median_sysbp,
       ROUND(min(diasbp),2) as min_diasbp,
       ROUND(max(diasbp),2) as max_diasbp,
       ROUND(avg(diasbp)) as avg_diasbp,
       ROUND(APPROX_QUANTILES(diasbp, 100)[OFFSET(50)]) AS median_diasbp,
from vitals
group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded
)
select c.*,
    min_heartrate,
    max_heartrate,
    avg_heartrate,
    median_heartrate,
    min_resprate, 
    max_resprate, 
    avg_resprate,
    median_resprate,
    min_temp, 
    max_temp, 
    avg_temp,
    median_temp,
    min_weight, 
    max_weight, 
    avg_weight,
    median_weight,
    min_sysbp,
    max_sysbp, 
    avg_sysbp,
    median_sysbp,
    min_diasbp,
    max_diasbp, 
    avg_diasbp,
    median_diasbp,
from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c
left join vital_records using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)

Query is running:   0%|          |

## Labs

In [123]:
%%bigquery df_ed_features
Create or replace table som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS
with all_lab_result as (
    select anon_id,
    pat_enc_csn_id_coded,
    lab_name,
    base_name,
    ord_value,
    reference_unit,
    order_time_jittered,
    from `som-nero-phi-jonc101.shc_core_2023.lab_result`
    where anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`) 
    union all 
    select anon_id,
    pat_enc_csn_id_coded,
    lab_name,
    base_name,
    ord_value,
    reference_unit,
    order_time_jittered,
    from `som-nero-phi-jonc101.lpch_core_2023.lpch_lab_result`
where anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`) 
    
),
labs AS (
    SELECT 
        c.anon_id,
       c.pat_enc_csn_id_coded,
       c.order_proc_id_coded,
        CASE 
            WHEN (LOWER(lr.base_name) = 'wbc'
                    AND LOWER(lr.reference_unit) IN ('thousand/ul','k/ul','10x3/ul','10*3/ul','x10e3/ul')) THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'wbc'
                    AND lr.reference_unit = '/uL' 
                        THEN SAFE_CAST(lr.ord_value AS FLOAT64)/1000
            END AS wbc,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE '%neutrophils%' AND lr.reference_unit = '%' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS neutrophils,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE '%lymphocytes%' AND lr.reference_unit = '%' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS lymphocytes,
        -- CASE WHEN LOWER(lr.base_name) LIKE '%bands%' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS bands,
        CASE 
            WHEN LOWER(lr.base_name) = 'hgb' AND lr.reference_unit = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'hgb' AND LOWER(lr.reference_unit) = 'g/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64) * 1000
        END AS hgb,
        CASE 
            WHEN LOWER(lr.base_name) = 'plt' AND LOWER(lr.reference_unit) IN ('x10e3/ul','10x3/ul','k/ul','10*3/ul','thousand/ul') THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'plt' AND LOWER(lr.reference_unit) = 'ul' THEN SAFE_CAST(lr.ord_value AS FLOAT64) / 1000
        END AS plt,
        CASE 
            WHEN LOWER(lr.base_name) = 'na' AND LOWER(lr.reference_unit) = 'mmol/l' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS na,
        CASE 
            WHEN (LOWER(lr.base_name) = 'hco3' AND LOWER(lr.reference_unit) like any ('meq/l','mmol/l')) THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN (LOWER(lr.base_name) = 'co2' AND LOWER(lr.reference_unit) like any ('meq/l','mmol/l')) THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS hco3,      
        CASE 
            WHEN LOWER(lr.base_name) = 'bun' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS bun,
        CASE 
            WHEN LOWER(lr.base_name) = 'cr' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS cr,
        -- CASE WHEN LOWER(lr.base_name) LIKE 'glucose' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS glucose,
        CASE 
            WHEN LOWER(lr.base_name) = 'lac' AND LOWER(lr.reference_unit) IN ('mmol/l', 'mmole/l') THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS lactate,               
        CASE 
            WHEN LOWER(lr.base_name) = 'crp' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'crp' AND LOWER(lr.reference_unit) = 'mg/l' THEN SAFE_CAST(lr.ord_value AS FLOAT64) / 10
        END AS crp,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE 'procalcitonin' AND LOWER(lr.reference_unit) = 'ng/ml' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS procalcitonin
    FROM som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort c
    LEFT JOIN all_lab_result lr 
    USING (anon_id, pat_enc_csn_id_coded)
    WHERE TIMESTAMP_DIFF(lr.order_time_jittered, c.blood_culture_order_datetime, Day) BETWEEN -2 AND 0
),
lab_records as 
(
SELECT 
    anon_id,
    pat_enc_csn_id_coded,
    order_proc_id_coded,
    
    ROUND(MIN(wbc), 2) AS min_wbc,
    ROUND(MAX(wbc), 2) AS max_wbc,
    ROUND(AVG(wbc), 2) AS avg_wbc,
    ROUND(APPROX_QUANTILES(wbc, 100)[OFFSET(50)], 2) AS median_wbc,
    
    ROUND(MIN(neutrophils), 2) AS min_neutrophils,
    ROUND(MAX(neutrophils), 2) AS max_neutrophils,
    ROUND(AVG(neutrophils), 2) AS avg_neutrophils,
    ROUND(APPROX_QUANTILES(neutrophils, 100)[OFFSET(50)], 2) AS median_neutrophils,
    
    ROUND(MIN(lymphocytes), 2) AS min_lymphocytes,
    ROUND(MAX(lymphocytes), 2) AS max_lymphocytes,
    ROUND(AVG(lymphocytes), 2) AS avg_lymphocytes,
    ROUND(APPROX_QUANTILES(lymphocytes, 100)[OFFSET(50)], 2) AS median_lymphocytes,
    
    #ROUND(MIN(bands), 2) AS min_bands,
    #ROUND(MAX(bands), 2) AS max_bands,
    #ROUND(AVG(bands), 2) AS avg_bands,
    #ROUND(APPROX_QUANTILES(bands, 100)[OFFSET(50)], 2) AS median_bands,
    
    ROUND(MIN(hgb), 2) AS min_hgb,
    ROUND(MAX(hgb), 2) AS max_hgb,
    ROUND(AVG(hgb), 2) AS avg_hgb,
    ROUND(APPROX_QUANTILES(hgb, 100)[OFFSET(50)], 2) AS median_hgb,

    ROUND(MIN(plt), 2) AS min_plt,
    ROUND(MAX(plt), 2) AS max_plt,
    ROUND(AVG(plt), 2) AS avg_plt,
    ROUND(APPROX_QUANTILES(plt, 100)[OFFSET(50)], 2) AS median_plt,

    ROUND(MIN(na), 2) AS min_na,
    ROUND(MAX(na), 2) AS max_na,
    ROUND(AVG(na), 2) AS avg_na,
    ROUND(APPROX_QUANTILES(na, 100)[OFFSET(50)], 2) AS median_na,

    ROUND(MIN(hco3), 2) AS min_hco3,
    ROUND(MAX(hco3), 2) AS max_hco3,
    ROUND(AVG(hco3), 2) AS avg_hco3,
    ROUND(APPROX_QUANTILES(hco3, 100)[OFFSET(50)], 2) AS median_hco3,

    ROUND(MIN(bun), 2) AS min_bun,
    ROUND(MAX(bun), 2) AS max_bun,
    ROUND(AVG(bun), 2) AS avg_bun,
    ROUND(APPROX_QUANTILES(bun, 100)[OFFSET(50)], 2) AS median_bun,

    ROUND(MIN(cr), 2) AS min_cr,
    ROUND(MAX(cr), 2) AS max_cr,
    ROUND(AVG(cr), 2) AS avg_cr,
    ROUND(APPROX_QUANTILES(cr, 100)[OFFSET(50)], 2) AS median_cr,

    #ROUND(MIN(glucose), 2) AS min_glucose,
    #ROUND(MAX(glucose), 2) AS max_glucose,
    #ROUND(AVG(glucose), 2) AS avg_glucose,
    #ROUND(APPROX_QUANTILES(glucose, 100)[OFFSET(50)], 2) AS median_glucose,

    ROUND(MIN(lactate), 2) AS min_lactate,
    ROUND(MAX(lactate), 2) AS max_lactate,
    ROUND(AVG(lactate), 2) AS avg_lactate,
    ROUND(APPROX_QUANTILES(lactate, 100)[OFFSET(50)], 2) AS median_lactate,

    ROUND(MIN(procalcitonin), 2) AS min_procalcitonin,
    ROUND(MAX(procalcitonin), 2) AS max_procalcitonin,
    ROUND(AVG(procalcitonin), 2) AS avg_procalcitonin,
    ROUND(APPROX_QUANTILES(procalcitonin, 100)[OFFSET(50)], 2) AS median_procalcitonin

FROM labs
GROUP BY 
    anon_id,
    pat_enc_csn_id_coded,
    order_proc_id_coded
)
select c.*,
min_wbc,
max_wbc,
avg_wbc,
median_wbc,
min_neutrophils,
max_neutrophils,
avg_neutrophils,
median_neutrophils,
min_lymphocytes,
max_lymphocytes,
avg_lymphocytes,
median_lymphocytes,
min_hgb,
max_hgb,
avg_hgb,
median_hgb,
min_plt,
max_plt,
avg_plt,
median_plt,
min_na,
max_na,
avg_na,
median_na,
min_hco3,
max_hco3,
avg_hco3,
median_hco3,
min_bun,
max_bun,
avg_bun,
median_bun,
min_cr,
max_cr,
avg_cr,
median_cr,
min_lactate,
max_lactate,
avg_lactate,
median_lactate,
min_procalcitonin,
max_procalcitonin,
avg_procalcitonin,
median_procalcitonin,
from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c
left join lab_records using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)

Query is running:   0%|          |

## Demographics

In [129]:
%%bigquery demo_df
Create or replace table som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS
with all_demos as (
    select anon_id,gender,canonical_race
    from `som-nero-phi-jonc101.shc_core_2023.demographic`
    where anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`) 
union all
     select anon_id,gender,canonical_race
    from `som-nero-phi-jonc101.lpch_core_2023.lpch_demographic`
    where anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`) 
)
    SELECT c.*,
           demo.gender,
           demo.canonical_race as race
    FROM 
          som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort c
    LEFT JOIN 
        all_demos demo
    ON 
        c.anon_id = demo.anon_id

Query is running:   0%|          |

## ICD codes

In [133]:
%%bigquery icds 
Create or replace table som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS
with all_diagnosis as (
    select anon_id,
    icd10,
    icd9,
    start_date_jittered,
    end_date_jittered,
    from som-nero-phi-jonc101.shc_core_2023.diagnosis
    where anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`) 
union all    
    select anon_id,
    icd10,
    icd9,
    start_date_jittered,
    end_date_jittered,
    from som-nero-phi-jonc101.lpch_core_2023.lpch_diagnosis
    where anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`) 
),
icds as (
select *,
case when icd10 like any ('R78.81') or icd9 like any('790.7') then 1 end as bacteremia,
case when icd10 like any ('A41.9','R65.21','Z86.19') or icd9 like any('038.9','995.91','785.52','995.2') then 1 end as septic_shock,
case when icd10 like any ('I33.0') or icd9 like any('421.0') then 1 end as infective_endocarditis,
case when icd10 like any ('I80.9') or icd9 like any('451.9') then 1 end as septic_thrombophlebitis,
case when icd10 like any ('T82.7%A') or icd9 like any('996.61','996.62') then 1 end as vascular_graft_infection,
case when icd10 like any ('T80.211A') or icd9 like any('999.32') then 1 end as CRBSI,
case when icd10 like any ('M46.40') or icd9 like any('722.9') then 1 end as infectious_discitis,
case when icd10 like any ('G06.2','B96.89') or icd9 like any('041.89') then 1 end as epidural_abscess,
case when icd10 like any ('M00.9') or icd9 like any('711.00') then 1 end as septic_arthritis,
case when icd10 like any ('G03.9') or icd9 like any('322.9') then 1 end as meningitis,
case when icd10 like any ('G00.9') or icd9 like any('320.82','320.9') then 1 end as meningitis_bacteria,
case when icd10 like any ('K83.0') or icd9 like any('576.1') then 1 end as cholangitis,
case when icd10 like any ('K83.09','B96.89') or icd9 like any('576.1') then 1 else 0 end as bacterial_cholangitis,
case when icd10 like any ('N12') or icd9 like any('590.80') then 1 else 0 end as pyelonephritis,
case when icd10 like any ('N10','B96.89') or icd9 like any('590.10','590.11') then 1 else 0 end as acute_bacterial_pyelonephritis,
case when icd10 like any ('L03.90') or icd9 like any('486.0') then 1 else 0 end as severe_pneumonia,
case when icd10 like any ('M86.9') or icd9 like any('730.20','730.30','730.90','730.98') then 1 else 0 end as acute_hematogenous_osteomyelitis,
case when icd10 like any ('Q89.01') or icd9 like any('759.0') then 1 else 0 end as asplenia,
case when icd10 like any ('D84.9') or icd9 like any('279.3') then 1 else 0 end as immunocompromised_state,
case when icd10 like any ('L03.90') then 1 else 0 end as severe_cellulitis,
case when icd10 like any ('N30.90') or icd9 like any('595.9') then 1 else 0 end as cystitis,
case when icd10 like any ('N41.9') or icd9 like any('601.9') then 1 else 0 end as prostatitis,
case when icd10 like any ('J18.9') or icd9 like any('486') then 1 else 0 end as CAP,
case when icd10 like any ('E11.69','L08.9') or icd9 like any('250.80','686.9') then 1 else 0 end as diabetic_foot_infection,
case when icd10 like any ('A09','A04.72') or icd9 like any('686.9') then 1 else 0 end as colitis,
case when icd10 like any ('J69.0') or icd9 like any('507.0') then 1 else 0 end as aspiration_pneumonia,
case when icd10 like any ('K81.9') or icd9 like any('575.10') then 1 else 0 end as uncomplicated_cholecystitis,
case when icd10 like any ('K57.92') or icd9 like any('562.11') then 1 else 0 end as uncomplicated_diverticulitis,
case when icd10 like any ('K85.9') or icd9 like any('577.0') then 1 else 0 end as Uncomplicated_pancreatitis,
from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c
left join 
all_diagnosis diag
using (anon_id)
WHERE  
DATE_DIFF(CAST(c.blood_culture_order_datetime AS DATE), CAST(diag.start_date_jittered AS DATE), Day)>0
AND (diag.end_date_jittered is null or DATE_DIFF(CAST(c.blood_culture_order_datetime AS DATE), CAST(diag.end_date_jittered AS DATE), Day)<=0)
),
all_icds as (
select anon_id,
pat_enc_csn_id_coded,
order_proc_id_coded,
max(bacteremia) as bacteremia,
max(septic_shock) as septic_shock,
max(infective_endocarditis) as infective_endocarditis,
max(septic_thrombophlebitis) as septic_thrombophlebitis,
max(vascular_graft_infection) as vascular_graft_infection,
max(CRBSI) as CRBSI,
max(infectious_discitis) as infectious_discitis,
max(epidural_abscess) as epidural_abscess,
max(septic_arthritis) as septic_arthritis,
max(meningitis) as meningitis,
max(meningitis_bacteria) as meningitis_bacteria,
max(cholangitis) as cholangitis,
max(bacterial_cholangitis) as bacterial_cholangitis,
max(pyelonephritis) as pyelonephritis,
max(acute_bacterial_pyelonephritis) as acute_bacterial_pyelonephritis,
max(severe_pneumonia) as severe_pneumonia,
max(acute_hematogenous_osteomyelitis) as acute_hematogenous_osteomyelitis,
max(asplenia) as asplenia,
max(immunocompromised_state) as immunocompromised_state,
max(severe_cellulitis) as severe_cellulitis,
max(cystitis) as cystitis,
max(prostatitis) as prostatitis,
max(CAP) as CAP,
max(diabetic_foot_infection) as diabetic_foot_infection,
max(colitis) as colitis,
max(aspiration_pneumonia) as aspiration_pneumonia,
max(uncomplicated_cholecystitis) as uncomplicated_cholecystitis,
max(uncomplicated_diverticulitis) as uncomplicated_diverticulitis,
max(Uncomplicated_pancreatitis) as Uncomplicated_pancreatitis
from icds
group by anon_id,
pat_enc_csn_id_coded,
order_proc_id_coded
)
select c.*,
a.bacteremia,
a.septic_shock,
a.infective_endocarditis,
a.septic_thrombophlebitis,
a.vascular_graft_infection,
a.CRBSI,
a.infectious_discitis,
a.epidural_abscess,
a.septic_arthritis,
a.meningitis,
a.meningitis_bacteria,
a.cholangitis,
a.bacterial_cholangitis,
a.pyelonephritis,
a.acute_bacterial_pyelonephritis,
a.severe_pneumonia,
a.acute_hematogenous_osteomyelitis,
a.asplenia,
a.immunocompromised_state,
a.severe_cellulitis,
a.cystitis,
a.prostatitis,
a.CAP,
a.diabetic_foot_infection,
a.colitis,
a.aspiration_pneumonia,
a.uncomplicated_cholecystitis,
a.uncomplicated_diverticulitis,
a.Uncomplicated_pancreatitis
from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c
left join all_icds a using (anon_id,pat_enc_csn_id_coded,order_proc_id_coded)

Query is running:   0%|          |

## VANC + OZYSN 

In [139]:
%%bigquery df_ed_labels
CREATE OR REPLACE TABLE som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS
With AllorderMeds as (
      select  anon_id,
              pat_enc_csn_id_coded,
              med_description,
              order_start_time_jittered,
      FROM
      `som-nero-phi-jonc101.shc_core_2023.order_med`
    WHERE
      thera_class_name IN ('ANTIBIOTICS')
      AND LOWER(med_route) = 'intravenous'
      AND  anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`) 
union all
    select    anon_id,
              pat_enc_csn_id_coded,
              med_description,
              order_start_time_jittered,
      FROM
      `som-nero-phi-jonc101.lpch_core_2023.lpch_order_med`
    WHERE
      thera_class_name IN ('ANTIBIOTICS')
      AND LOWER(med_route) = 'intravenous'
       AND  anon_id in (select distinct(anon_id) from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`) 
),
all_mapped_meds as (
    select rxcui_str,
    rxcui,
    name,
    from  `som-nero-phi-jonc101.shc_core_2023.mapped_meds`
    WHERE  rxcui != '0'
    AND (LOWER(rxcui_str) LIKE 'vancomycin' OR 
         UPPER(name) LIKE '%ZOSYN%' OR 
         UPPER(name) LIKE '%PIPERACILLIN-TAZOBACTAM%')

Union all

    select rxcui_str,
    rxcui,
    name,
    from  `som-nero-phi-jonc101.lpch_core_2023.lpch_mapped_meds`
    WHERE  rxcui != '0'
    AND (LOWER(rxcui_str) LIKE 'vancomycin' OR 
         UPPER(name) LIKE '%ZOSYN%' OR 
         UPPER(name) LIKE '%PIPERACILLIN-TAZOBACTAM%')    
),
ED_ABX as(
SELECT
    c.anon_id,
    c.pat_enc_csn_id_coded,
    c.order_proc_id_coded,
    o.med_description
  FROM
    som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort c
  INNER JOIN AllorderMeds o on c.anon_id = o.anon_id AND c.pat_enc_csn_id_coded = o.pat_enc_csn_id_coded
  WHERE
    TIMESTAMP_DIFF(o.order_start_time_jittered, c.blood_culture_order_datetime, HOUR) BETWEEN -4 AND 24
),
ED_ABX_rxcui_str AS (
 SELECT 
    m.anon_id,
    m.pat_enc_csn_id_coded,
    m.order_proc_id_coded,
    MAX(CASE WHEN LOWER(rxcui_str) LIKE 'vancomycin' THEN 1 ELSE 0 END) AS vanc,
    MAX(CASE WHEN LOWER(rxcui_str) LIKE ANY ('tazobactam', 'piperacillin', 'glucose') THEN 1 ELSE 0 END) AS zosyn
  FROM 
    ED_ABX m 
  INNER JOIN 
   all_mapped_meds mm 
  ON 
    m.med_description = mm.name
  WHERE 
    rxcui != '0'
    AND (LOWER(rxcui_str) LIKE 'vancomycin' OR 
         UPPER(name) LIKE '%ZOSYN%' OR 
         UPPER(name) LIKE '%PIPERACILLIN-TAZOBACTAM%')
  GROUP BY 
    m.anon_id,
m.pat_enc_csn_id_coded,
m.order_proc_id_coded
)
SELECT 
  c.*,
  CASE WHEN ea.vanc = 1 THEN 1 ELSE 0 END AS vanc,
  CASE WHEN ea.zosyn = 1 THEN 1 ELSE 0 END AS zosyn,
  CASE WHEN ea.vanc = 1 AND ea.zosyn = 1 THEN 1 ELSE 0 END AS vanc_zosyn,
  CASE WHEN (ea.vanc = 0  or ea.vanc is null or ea.zosyn = 0 or  ea.zosyn is null) AND c.earliest_iv_antibiotic_datetime IS NOT NULL THEN 1 ELSE 0 END AS other_ABX
FROM 
  som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort c 
LEFT JOIN 
  ED_ABX_rxcui_str  ea
USING (anon_id, pat_enc_csn_id_coded,order_proc_id_coded)

Query is running:   0%|          |

In [138]:
df_ed_labels[['anon_id','pat_enc_csn_id_coded','order_proc_id_coded']].drop_duplicates().shape

(26829, 3)

##  Notes

In [26]:
%%bigquery df_notes
#CREATE OR REPLACE TABLE som-nero-phi-jonc101.PEDsblood_culture_stewardship.Notes AS
WITH all_notes as (
    SELECT anon_id,
    deid_note_text,
    jittered_note_date,
    note_type,
    note_type_desc,
    author_prov_map_id,
    FROM `som-nero-phi-jonc101-secure.Deid_Notes_Jchen.Deid_Notes_SHC_JChen` note
      #  INNER JOIN `som-nero-phi-jonc101.shc_core_2023.prov_map` m  ON m.shc_prov_id = CAST(SUBSTR(note.author_prov_map_id, 2) AS STRING)
   # WHERE 
    #     m.prov_type in ('STANFORD REFERRING PHYSICIAN','PHYSICIAN','RESIDENT','MEDICAL STUDENT','NURSE PRACTITIONER','FELLOW',
    #               'NP STUDENT','PHYSICIAN ASSISTANT','PA STUDENT')


    #type= History and physcial,c progress note inpatient, onsultation note, IP consult,   and consults follow up 
union all 

    SELECT anon_id,
    deid_note_text,
    jittered_note_date,
    note_type,
    note_type_desc,
    author_prov_map_id,
    FROM  `som-nero-phi-jonc101-secure.Deid_Notes_Jchen.Deid_Notes_LPCH_JChen` note
       # INNER JOIN  `som-nero-phi-jonc101.shc_core_2023.prov_map` m  ON m.lpch_prov_id = CAST(SUBSTR(note.author_prov_map_id, 2) AS STRING)
    #WHERE 
     #    m.prov_type in ('STANFORD REFERRING PHYSICIAN','PHYSICIAN','RESIDENT','MEDICAL STUDENT','NURSE PRACTITIONER','FELLOW',
     #              'NP STUDENT','PHYSICIAN ASSISTANT','PA STUDENT')
),
cohort AS (
    SELECT * FROM som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort
),
inpatients_notes as (
select inp.anon_id,
inp.pat_enc_csn_id_coded,
inp.order_proc_id_coded,
note.jittered_note_date as notedatetime,
note.deid_note_text,
note.note_type,
note.note_type_desc,
from cohort inp
inner join all_notes note on 
inp.anon_id=note.anon_id
where 
TIMESTAMP_DIFF(TIMESTAMP(inp.blood_culture_order_datetime), TIMESTAMP(note.jittered_note_date), Hour) >= -24 #0
and
TIMESTAMP_DIFF(TIMESTAMP(note.jittered_note_date), TIMESTAMP(inp.blood_culture_order_datetime), Day) >=-2 #>=-24 #0
)
select  anon_id,
pat_enc_csn_id_coded,
order_proc_id_coded,
notedatetime,
deid_note_text,
note_type,
note_type_desc,
from inpatients_notes
group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,notedatetime,note_type,deid_note_text,note_type_desc
order by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,notedatetime,note_type,deid_note_text,note_type_desc

Query is running:   0%|          |

Downloading:   0%|          |

In [27]:
df_notes[(df_notes.deid_note_text.notna())&(df_notes.note_type.isin(['ED Note','History and Physical','Progress Note, Inpatient'))].drop_duplicates().shape

(340778, 7)

In [24]:
df_notes.note_type.unique()

array(['ED Note', 'History and Physical', 'Other Note',
       'Discharge/Transfer Summary', 'Consultation Note',
       'Nursing Sign Out Note', 'Progress Note, Inpatient', 'IP Consult',
       'Operative/Procedure Report', 'Echo',
       'Progress/Discharge/Transfer Summary', 'Letter',
       'Progress Note, Outpatient', 'ECG', 'EEG', 'Cardiology', 'PFT',
       'REI', 'Telephone Encounter', 'Somnogram'], dtype=object)

In [149]:
df_notes.note_type.unique()# ED Notes, Consultation Note wnat the dpearmtne_specilty to be INFECTIOUS DISEASES, History and Physical

array(['ED Note', 'Consultation Note', 'Other Note',
       'History and Physical', 'Operative/Procedure Report',
       'Progress Note, Inpatient', 'Discharge/Transfer Summary', 'PFT',
       'Nursing Sign Out Note', 'Somnogram', 'Letter', 'Cardiology',
       'Telephone Encounter', 'Progress Note, Outpatient',
       'Progress/Discharge/Transfer Summary'], dtype=object)

## Urin Analysis

In [6]:
%%bigquery UA_DF 
Create or replace table som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS
WITH all_UA AS (
  SELECT anon_id,
         pat_enc_csn_id_coded,
         order_time_jittered,
         lab_name,
         ord_value,
         reference_unit,
         component_id
  FROM `som-nero-phi-jonc101.lpch_core_2023.lpch_lab_result`
  WHERE component_id IN (1230100515,1230100517,1230100518,1230100514)
    AND anon_id IN (
      SELECT DISTINCT anon_id
      FROM `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`
    )
  
  UNION ALL

  SELECT anon_id,
         pat_enc_csn_id_coded,
         order_time_jittered,
         lab_name,
         ord_value,
         reference_unit,
         component_id
  FROM `som-nero-phi-jonc101.shc_core_2023.lab_result`
  WHERE component_id IN (1230100515,1230100517,1230100518,1230100514)
    AND anon_id IN (
      SELECT DISTINCT anon_id
      FROM `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`
    )
    AND ord_value IS NOT NULL
),

UAs_features AS (
  SELECT
    anon_id,
    pat_enc_csn_id_coded,
    order_time_jittered,

    CASE 
      WHEN component_id = 1230100515 AND LOWER(ord_value) IN ('negative','neg','neh') THEN 'NEGATIVE'
      WHEN component_id = 1230100515 AND LOWER(ord_value) IN (
        'n/a, color interference','positive','large','moderate2.0',
        'small','trace','2+','125++','3+','4+','1+'
      ) THEN 'POSITIVE'
      ELSE NULL
    END AS `Leukocyte_Esterase`,

    CASE 
      WHEN component_id = 1230100517 AND reference_unit LIKE '%HPF%' THEN
        CASE 
          WHEN SAFE_CAST(REGEXP_EXTRACT(ord_value, r'(\d+)') AS NUMERIC) >= 5 THEN 'POSITIVE'
          WHEN SAFE_CAST(REGEXP_EXTRACT(ord_value, r'(\d+)') AS NUMERIC) < 5 THEN 'NEGATIVE'
          ELSE NULL
        END
      ELSE NULL
    END AS `WBC_urine`,

    CASE 
      WHEN component_id = 1230100518 AND LOWER(ord_value) IN ('none seen','no significant amount of bacteria detected.','none') THEN 'NEGATIVE'
      WHEN component_id = 1230100518 AND LOWER(ord_value) IN ('rare','occasional','many','moderate','few','41','profuse') THEN 'POSITIVE'
      ELSE NULL
    END AS `Bacteria_urine`,

    CASE 
      WHEN component_id = 1230100514 AND LOWER(ord_value) IN ('negative','neg','neh') THEN 'NEGATIVE'
      WHEN component_id = 1230100514 AND LOWER(ord_value) IN ('n/a, color interference','positive') THEN 'POSITIVE'
      ELSE NULL
    END AS `Nitrite_urine`

  FROM all_UA
  WHERE ord_value IS NOT NULL
),

UA_cohorts AS (
  SELECT
    c.anon_id,
    c.pat_enc_csn_id_coded,
    c.order_proc_id_coded,
    ua.Leukocyte_Esterase,
    ua.WBC_urine,
    ua.Bacteria_urine,
    ua.Nitrite_urine
  FROM `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c
  INNER JOIN UAs_features ua
  USING(anon_id, pat_enc_csn_id_coded)
  WHERE TIMESTAMP_DIFF(ua.order_time_jittered, c.blood_culture_order_datetime, HOUR) BETWEEN -48 AND 0
)

SELECT 
  c.*,
  ua.Leukocyte_Esterase,
  ua.WBC_urine,
  ua.Bacteria_urine,
  ua.Nitrite_urine
FROM `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c
LEFT JOIN UA_cohorts ua
USING(anon_id, pat_enc_csn_id_coded, order_proc_id_coded);


Query is running:   0%|          |

In [5]:
UA_DF[['anon_id','pat_enc_csn_id_coded','order_proc_id_coded']].drop_duplicates().shape

(26829, 3)

## LDA

In [15]:
%%bigquery LDA_DF 
Create or replace table som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS
WITH all_LDA AS (
  SELECT anon_id,
         pat_enc_csn_id_coded,
         placement_instant_jittered,
         removal_instant_jittered,
         description,
  FROM `som-nero-phi-jonc101.shc_core_2023.lda` 
  WHERE description like any ('%picc%','%ETT%','%CVC%', '%ECMO%','%EVD%','%ET%Tube%')
    or lower(description) like any ('%icu%line%','%tunel%catheter%','%surgical%ur%catheter%','%port%','%dialysis%catheter%')                           
    AND anon_id IN (
      SELECT DISTINCT anon_id
      FROM `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`
    )
  
  UNION ALL
  SELECT anon_id,
         pat_enc_csn_id_coded,
         placement_instant_jittered,
         removal_instant_jittered,
         description,
  FROM `som-nero-phi-jonc101.lpch_core_2023.lpch_lda` 
  WHERE description like any ('%picc%','%ETT%','%CVC%', '%ECMO%','%EVD%','%ET%Tube%')
    or lower(description) like any ('%icu%line%','%tunel%catheter%','%surgical%ur%catheter%','%port%','%dialysis%catheter%')                           
    AND anon_id IN (
      SELECT DISTINCT anon_id
      FROM `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`)
),
LDA_pres as (
    select c.anon_id,
    c.pat_enc_csn_id_coded,
    c.order_proc_id_coded,
     CASE 
      WHEN description LIKE '%EVD%' THEN 'EVD'
      WHEN description LIKE '%ET%Tube%' THEN 'ET_Tube'
      WHEN LOWER(description) LIKE '%surgical%ur%catheter%' THEN 'Surgical_Urin_Catheter'
      ELSE 'otherline'
    END AS `Line_Presense`
    from all_LDA l 
    inner join `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c using (anon_id,pat_enc_csn_id_coded)
    WHERE TIMESTAMP_DIFF(l.removal_instant_jittered, c.blood_culture_order_datetime, HOUR) >0
    and TIMESTAMP_DIFF(l.placement_instant_jittered, c.blood_culture_order_datetime, HOUR) < 0
)
select c.*,
    lda.Line_Presense
from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c 
left join LDA_pres lda using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)

Query is running:   0%|          |

In [14]:
LDA_DF[LDA_DF.Line_Presense.notna()][['anon_id','pat_enc_csn_id_coded','order_proc_id_coded']].drop_duplicates().shape[0]/LDA_DF[['anon_id','pat_enc_csn_id_coded','order_proc_id_coded']].drop_duplicates().shape[0]

0.008349174400834918

## STEM Cell and Bone marrow

In [10]:
%%bigquery LDA_DF 
Create or replace table som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS
with all_diagnosis as (
    select 
anon_id,
ordering_date_jittered as Dignosis_time ,
1 as Transplant
from `som-nero-phi-jonc101.lpch_core_2023.lpch_order_proc`
Where lower(description) like '%cellular%therapy%'
Union All
SELECT 
  anon_id,
ordering_date_jittered as Dignosis_time,
1 as Transplant
FROM `som-nero-phi-jonc101.shc_core_2023.order_proc`
WHERE description ='BMT REPEAT TRANSPLANT PANEL'
or lower(description) like any('%bmt%infu','bmt cellular product infusion')
Union All
select anon_id,
start_date_jittered  as Dignosis_time,
1 as Transplant
from  `som-nero-phi-jonc101.lpch_core_2023.lpch_procedure` where upper(description) like '%TRANSPLANT%'
union all
select anon_id,
start_date_jittered  as Dignosis_time,
1 as Transplant
 from  `som-nero-phi-jonc101.shc_core_2023.procedure` where upper(description) like '%TRANSPLANT%'
),
cohort_with_diagnosis as (
select c.anon_id,
    c.pat_enc_csn_id_coded,
    c.order_proc_id_coded,
d.Transplant
from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c 
inner join all_diagnosis d using(anon_id)
where Dignosis_time<=c.blood_culture_order_datetime
)
select c.*,
d.Transplant
from  `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c 
left join cohort_with_diagnosis d using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)

Query is running:   0%|          |

In [8]:
LDA_DF[['anon_id','pat_enc_csn_id_coded','order_proc_id_coded']].drop_duplicates().shape[0]#/LDA_DF[['anon_id','pat_enc_csn_id_coded','order_proc_id_coded']].drop_duplicates().shape[0]

26829

## ANC/ALC

In [20]:
%%bigquery LDA_DF 
Create or replace table som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS
with ANC as (
select anon_id,
    order_time_jittered,
    ord_value
from `som-nero-phi-jonc101.lpch_core_2023.lpch_lab_result`
        where lower(lab_name) like '%absolute%neutrophils%'
 and anon_id IN (
      SELECT DISTINCT anon_id
      FROM `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`
    )
 union all
select anon_id,
       order_time_jittered,
       ord_value 
from `som-nero-phi-jonc101.shc_core_2023.lab_result`
        where lower(lab_name) like '%absolute%neutrophils%'
    and  anon_id IN (
      SELECT DISTINCT anon_id
      FROM `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`
    )
),
cohort_anc as
(
    select c.anon_id,
    c.pat_enc_csn_id_coded,
    c.order_proc_id_coded,
    ROUND(MIN(CAST(ord_value AS FLOAT64)), 2) AS min_anc,
    ROUND(MAX(CAST(ord_value AS FLOAT64)), 2) AS max_anc,
    ROUND(AVG(CAST(ord_value AS FLOAT64)), 2) AS avg_anc,
    ROUND(APPROX_QUANTILES(CAST(ord_value AS FLOAT64), 100)[OFFSET(50)], 2) AS median_anc,

from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c 
    inner join ANC using(anon_id)
    WHERE TIMESTAMP_DIFF(ANC.order_time_jittered, c.blood_culture_order_datetime, Day) BETWEEN -2 AND 0
    group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded
)
select c.*,
d.min_anc,
d.max_anc,
d.avg_anc,
d.median_anc,
from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c 
left join cohort_anc d using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)

Query is running:   0%|          |

In [29]:
LDA_DF[['anon_id','pat_enc_csn_id_coded','order_proc_id_coded']].drop_duplicates().shape[0]#/LDA_DF[['anon_id','pat_enc_csn_id_coded','order_proc_id_coded']].drop_duplicates().shape[0]

26829

In [24]:
%%bigquery LDA_DF 
Create or replace table som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS
with ALC as (
select anon_id,
    order_time_jittered,
    ord_value
from `som-nero-phi-jonc101.lpch_core_2023.lpch_lab_result`
        where lower(lab_name) like '%absolute%lymphocyte%'
 and anon_id IN (
      SELECT DISTINCT anon_id
      FROM `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`
    )
 union all
select anon_id,
       order_time_jittered,
       ord_value 
from `som-nero-phi-jonc101.shc_core_2023.lab_result`
        where lower(lab_name) like '%absolute%lymphocyte%'
    and  anon_id IN (
      SELECT DISTINCT anon_id
      FROM `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`
    )
),
cohort_alc as
(
    select c.anon_id,
    c.pat_enc_csn_id_coded,
    c.order_proc_id_coded,
    ROUND(MIN(CAST(ord_value AS FLOAT64)), 2) AS min_alc,
    ROUND(MAX(CAST(ord_value AS FLOAT64)), 2) AS max_alc,
    ROUND(AVG(CAST(ord_value AS FLOAT64)), 2) AS avg_alc,
    ROUND(APPROX_QUANTILES(CAST(ord_value AS FLOAT64), 100)[OFFSET(50)], 2) AS median_alc,

from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c 
    inner join ALC using(anon_id)
    WHERE TIMESTAMP_DIFF(ALC.order_time_jittered, c.blood_culture_order_datetime, Day) BETWEEN -2 AND 0
    group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded
)
select c.*,
d.min_alc,
d.max_alc,
d.avg_alc,
d.median_alc,
from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c 
left join cohort_alc d using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)

Query is running:   0%|          |

In [22]:
LDA_DF

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,blood_culture_order_datetime,birth_date_jittered,bmi,positive_blood_culture,positive_blood_culture_in_week,earliest_iv_antibiotic_datetime,min_heartrate,...,Line_Presense,Transplant,min_anc,max_anc,avg_anc,median_anc,min_alc,max_alc,avg_alc,median_alc
0,JC2240701,131013359356,382419098,2011-04-17 21:46:00,2011-02-25,11.17,0,0,NaT,167.0,...,,,,,,,,,,
1,JC2242183,131013460045,382823707,2011-04-18 04:05:00,2011-04-10,17.02,0,0,NaT,131.0,...,,,,,,,,,,
2,JC2233463,131013181018,381842787,2011-04-27 13:57:00,2009-12-25,15.97,0,0,NaT,154.0,...,,,,,,,,,,
3,JC2233463,131013181018,381842787,2011-04-27 13:57:00,2009-12-25,15.97,0,0,NaT,154.0,...,,,,,,,,,,
4,JC1307726,131013190292,381859169,2011-04-29 21:32:00,1995-09-09,18.51,0,0,NaT,123.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
842694,JC2540779,131262759299,591201800,2019-02-27 09:41:00,2004-02-09,25.35,0,0,2019-02-27 09:50:00,126.0,...,,,,,,,,,,
842695,JC2540779,321458362,744669014,2019-02-27 10:11:00,2004-02-09,25.35,0,0,NaT,115.0,...,,,,,,,,,,
842696,JC2540779,321458362,744669014,2019-02-27 10:11:00,2004-02-09,25.35,0,0,NaT,115.0,...,,,,,,,,,,
842697,JC2540779,321458362,744669015,2019-02-27 10:23:00,2004-02-09,25.35,0,0,NaT,115.0,...,,,,,,,,,,


## O2Sat

In [30]:
%%bigquery LDA_DF 
Create or replace table som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort AS
with all_spo as (
    select anon_id,recorded_time_jittered,meas_value 
    from `som-nero-phi-jonc101.shc_core_2023.flowsheet`
    where lower(row_disp_name) like 'spo2'
    and anon_id IN (
      SELECT DISTINCT anon_id
      FROM `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`
    )
union all
    select anon_id,recorded_time_jittered,meas_value 
        from `som-nero-phi-jonc101.lpch_core_2023.lpch_flowsheet`
    where lower(row_disp_name) like 'spo2'
         and anon_id IN (
      SELECT DISTINCT anon_id
      FROM `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort`
    )
),
cohort_spo2 as
(
    select c.anon_id,
    c.pat_enc_csn_id_coded,
    c.order_proc_id_coded,
    ROUND(MIN(CAST(REGEXP_REPLACE(meas_value, r'[^0-9\.]', '') AS FLOAT64)), 2) AS min_spo2,
    ROUND(MAX(CAST(REGEXP_REPLACE(meas_value, r'[^0-9\.]', '') AS FLOAT64)), 2) AS max_spo2,
    ROUND(AVG(CAST(REGEXP_REPLACE(meas_value, r'[^0-9\.]', '') AS FLOAT64)), 2) AS avg_spo2,
    ROUND(APPROX_QUANTILES(CAST(REGEXP_REPLACE(meas_value, r'[^0-9\.]', '') AS FLOAT64), 100)[OFFSET(50)], 2) AS median_spo2

from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c 
    inner join all_spo using(anon_id)
    WHERE TIMESTAMP_DIFF(all_spo.recorded_time_jittered, c.blood_culture_order_datetime, Day) BETWEEN -2 AND 0
    group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded
)
select c.*,
d.min_spo2,
d.max_spo2,
d.avg_spo2,
d.median_spo2,
from `som-nero-phi-jonc101.PEDsblood_culture_stewardship.cohort` c 
left join cohort_spo2 d using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)

Query is running:   0%|          |