In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pulp import *
import pandas as pd
import os, glob
import seaborn as sns
from scipy.stats import kruskal
import scikit_posthocs as sp
from scipy.stats import mannwhitneyu

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/fa/Downloads/google-cloud-sdk/som-nero-phi-jonc101-2116f9f7bc68.json' 
os.environ['GCLOUD_PROJECT'] = 'som-nero-phi-jonc101' 
%load_ext google.cloud.bigquery

from google.cloud import bigquery
client=bigquery.Client()
from google.cloud import bigquery_storage_v1

The google.cloud.bigquery extension is already loaded. To reload it, use:
  %reload_ext google.cloud.bigquery


# Study Cohort

In [13]:
%%bigquery df_ED
Create or replace table som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
with base as (
SELECT DISTINCT
        op.anon_id, 
        op.pat_enc_csn_id_coded,
        op.order_proc_id_coded, 
        op.order_time_jittered_utc,
        EXTRACT(YEAR FROM op.order_time_jittered_utc) as order_year ,  
        op.ordering_mode,
        op.department_id,
    
    FROM 
        `som-nero-phi-jonc101.shc_core_2023.order_proc` op
     INNER JOIN
        `som-nero-phi-jonc101.shc_core_2023.lab_result` lr
    ON
        op.order_proc_id_coded = lr.order_id_coded
    WHERE
        op.order_type LIKE "Microbiology%"
        AND op.description LIKE "%BLOOD%"
        AND not op.order_status like any ('Discontinued','Canceled')
), 
ED_orders as(
select 
b.anon_id,
b.pat_enc_csn_id_coded,
b.order_proc_id_coded,
b.order_time_jittered_utc,
b.order_year,
b.ordering_mode,
b.department_id,
department_name,
from base b inner join `som-nero-phi-jonc101.shc_core_2023.dep_map` using(department_id)
where lower(department_name) like '%emergency%'
group by b.anon_id,b.pat_enc_csn_id_coded,b.order_proc_id_coded,b.order_time_jittered_utc,b.order_year,b.ordering_mode,b.department_id,department_name
)
SELECT 
  b.anon_id,
  b.pat_enc_csn_id_coded,
  b.order_proc_id_coded,
  b.order_time_jittered_utc as blood_culture_order_datetime,
  b.order_year,
  ad.effective_time_jittered_utc as ed_arrival_datetime,
FROM 
  ED_orders b
INNER JOIN 
  `som-nero-phi-jonc101.shc_core_2023.adt` ad
USING 
  (anon_id,pat_enc_csn_id_coded,department_id)
Where event_type='Admission'
and b.order_time_jittered_utc>=ad.effective_time_jittered_utc
GROUP BY b.anon_id,b.pat_enc_csn_id_coded,b.order_proc_id_coded,b.order_time_jittered_utc,b.order_year,ad.effective_time_jittered_utc

Query is running:   0%|          |

# Labels

In [32]:
%%bigquery df_ed_labels
Create or replace table som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
With culture as ( 
    SELECT DISTINCT
         c.anon_id,
         c.pat_enc_csn_id_coded,
         c.order_proc_id_coded,
         lr.ord_value,
         coalesce(lr.extended_value_comment,lr.extended_comp_comment) as comment,
    FROM 
        `som-nero-phi-jonc101.blood_culture_stewardship.cohort` c
    INNER JOIN
        `som-nero-phi-jonc101.shc_core_2023.lab_result` lr
       on c.anon_id=lr.anon_id 
          and c.order_proc_id_coded=lr.order_id_coded
          and c.pat_enc_csn_id_coded=lr.pat_enc_csn_id_coded
          and c.blood_culture_order_datetime=lr.order_time_jittered_utc
    ),
culture_growth as (
select op.anon_id,
       op.pat_enc_csn_id_coded,
       op.order_proc_id_coded,
       1 as was_pos,
from culture op  
                INNER join (SELECT DISTINCT anon_id,order_proc_id_coded, organism
                        FROM `som-nero-phi-jonc101.shc_core_2023.culture_sensitivity`
                        where organism is not null) cs USING (anon_id,order_proc_id_coded) 
WHERE not lower(ord_value) like any ("%no%grow%","%not%detect%","negative")
     AND not upper(comment) like any ('%NO%GROWTH%','%COAG%NEG%STAPH%','%GRAM%+%RODS%','%GRAM%POS%RODS%','%CONTAMIN%')
)
select c.*,
case when (cg.was_pos=1) then cg.was_pos else 0 end as positive_blood_culture 
from `som-nero-phi-jonc101.blood_culture_stewardship.cohort` c LEFT JOIN culture_growth cg 
using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)

Query is running:   0%|          |

In [43]:
%%bigquery df_ed_labels
Create or replace table som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
With order_posetive_next_week AS (
   SELECT DISTINCT
         c.anon_id,
         c.order_proc_id_coded,
         1 as positive_blood_culture_in_week
    FROM 
       `som-nero-phi-jonc101.blood_culture_stewardship.cohort` c 
     INNER JOIN
        `som-nero-phi-jonc101.blood_culture_stewardship.cohort` c2
    ON
        c.anon_id = c2.anon_id
    WHERE
        c2.positive_blood_culture=1
        AND TIMESTAMP_DIFF(c2.blood_culture_order_datetime,c.blood_culture_order_datetime, Hour) between 72 and 168
        group by anon_id,order_proc_id_coded
)
select c.*,
case when o.positive_blood_culture_in_week=1 then o.positive_blood_culture_in_week else 0 end as positive_blood_culture_in_week
from  `som-nero-phi-jonc101.blood_culture_stewardship.cohort` c  left join order_posetive_next_week o using(anon_id,order_proc_id_coded)

Query is running:   0%|          |

In [57]:
%%bigquery df_ed_labels
Create or replace table som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
WITH ED_ABX as (
SELECT
  c.anon_id,
  c.pat_enc_csn_id_coded,
  c.order_proc_id_coded,
  o.med_description,
  o.order_start_time_jittered_utc AS earliest_iv_antibiotic_datetime
FROM
  `som-nero-phi-jonc101.blood_culture_stewardship.cohort` c
INNER JOIN (
    SELECT
      anon_id,
      pat_enc_csn_id_coded,
      med_description,
      order_start_time_jittered_utc,
      RANK() OVER (PARTITION BY anon_id, pat_enc_csn_id_coded ORDER BY order_start_time_jittered_utc ASC) as rank
    FROM
      `som-nero-phi-jonc101.shc_core_2023.order_med`
    WHERE
      thera_class_name IN ('ANTIBIOTICS')
      AND LOWER(med_route) = 'intravenous'
) o ON c.anon_id = o.anon_id AND c.pat_enc_csn_id_coded = o.pat_enc_csn_id_coded
WHERE
  o.rank = 1
  AND TIMESTAMP_DIFF(o.order_start_time_jittered_utc, c.blood_culture_order_datetime, HOUR) BETWEEN -4 AND 24
GROUP BY
  c.anon_id, c.pat_enc_csn_id_coded, c.order_proc_id_coded, o.med_description, o.order_start_time_jittered_utc    
),
ED_ABX_rxcui_str as (
select m.anon_id,
m.pat_enc_csn_id_coded,
m.order_proc_id_coded,
m.earliest_iv_antibiotic_datetime,
mm.rxcui_str as earliest_iv_antibiotic
from ED_ABX m 
inner join `som-nero-phi-jonc101.shc_core_2023.mapped_meds` mm on m.med_description=mm.name
where not rxcui='0'
group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,earliest_iv_antibiotic_datetime,earliest_iv_antibiotic
)
select c.*,
ea.earliest_iv_antibiotic_datetime,
ea.earliest_iv_antibiotic
from `som-nero-phi-jonc101.blood_culture_stewardship.cohort` c left join ED_ABX_rxcui_str ea using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)
group by c.anon_id,c.pat_enc_csn_id_coded,c.order_proc_id_coded,c.blood_culture_order_datetime,c.order_year,c.ed_arrival_datetime,
c.positive_blood_culture,c.positive_blood_culture_in_week,ea.earliest_iv_antibiotic_datetime,ea.earliest_iv_antibiotic


Query is running:   0%|          |

# Features

In [90]:
%%bigquery df_ed_features
Create or replace table som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
WITH vitals as (
  SELECT c.*,
  vitals.recorded_time_jittered_utc,
  CASE WHEN upper(row_disp_name) IN ('PULSE', 'HEART RATE') THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as heartrate,
  CASE WHEN upper(row_disp_name) IN ('RESP', 'RESP RATE') THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as resprate,
  CASE WHEN upper(row_disp_name) IN ('TEMP') THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as temp,
  CASE WHEN (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_1 AS numeric) >= 40) THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as sysbp ,
  CASE WHEN (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_2 AS numeric) >= 30)  THEN round(SAFE_CAST(numerical_val_2 AS FLOAT64),2) end as diasbp,
  FROM
    `som-nero-phi-jonc101.blood_culture_stewardship.cohort` c LEFT JOIN
  `som-nero-phi-jonc101.shc_core_2023.flowsheet` as vitals
   ON vitals.anon_id = c.anon_id
  WHERE
   ((upper(trim(row_disp_name)) IN ('PULSE', 'HEART RATE') AND SAFE_CAST(numerical_val_1 AS numeric) >= 30) OR -- Heart rate
   (upper(trim(row_disp_name)) in ('RESP', 'RESP RATE') AND SAFE_CAST(numerical_val_1 AS numeric) >= 4 AND SAFE_CAST(numerical_val_1 AS numeric) <= 60) OR -- Respiratory rate
   (upper(trim(row_disp_name)) IN ('TEMP') AND SAFE_CAST(numerical_val_1 AS numeric) >= 90) OR -- Temperature in F
   (upper(trim(row_disp_name)) IN ('WEIGHT') AND SAFE_CAST(numerical_val_1 AS numeric) >= 480 AND SAFE_CAST(numerical_val_1 AS numeric) <= 8000)  OR -- Weight 
  (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_1 AS numeric) >= 40) OR -- Systolic BP
   (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_2 AS numeric) >= 30) -- diastolic BP
  )
  AND
  (TIMESTAMP_DIFF(vitals.recorded_time_jittered_utc, c.blood_culture_order_datetime, hour) between -24 and 2 )
)
select anon_id,
       pat_enc_csn_id_coded,
       order_proc_id_coded,
       blood_culture_order_datetime,
       order_year,
       ed_arrival_datetime,
       positive_blood_culture,
       positive_blood_culture_in_week,
       earliest_iv_antibiotic_datetime,
       earliest_iv_antibiotic,
       ROUND(min(heartrate),2) as min_heartrate,
       ROUND(max(heartrate),2) as max_heartrate,
       ROUND(avg(heartrate),2) as avg_heartrate,
       ROUND(APPROX_QUANTILES(heartrate, 100)[OFFSET(50)],2) AS median_heartrate,
       ROUND(min(resprate),2) as min_resprate,
       ROUND(max(resprate),2) as max_resprate,
       ROUND(avg(resprate),2) as avg_resprate,
       ROUND(APPROX_QUANTILES(resprate, 100)[OFFSET(50)],2) AS median_resprate,
       ROUND(min(temp),2) as min_temp,
       ROUND(max(temp),2) as max_temp,
       ROUND(avg(temp),2) as avg_temp,
       ROUND(APPROX_QUANTILES(temp, 100)[OFFSET(50)],2) AS median_temp,
       ROUND(min(sysbp),2) as min_sysbp,
       ROUND(max(sysbp),2) as max_sysbp,
       ROUND(avg(sysbp),2) as avg_sysbp,
       ROUND(APPROX_QUANTILES(sysbp, 100)[OFFSET(50)],2) AS median_sysbp,
       ROUND(min(diasbp),2) as min_diasbp,
       ROUND(max(diasbp),2) as max_diasbp,
       ROUND(avg(diasbp)) as avg_diasbp,
       ROUND(APPROX_QUANTILES(diasbp, 100)[OFFSET(50)]) AS median_diasbp,
from vitals
group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,blood_culture_order_datetime,order_year,ed_arrival_datetime,positive_blood_culture,positive_blood_culture_in_week,earliest_iv_antibiotic_datetime,earliest_iv_antibiotic

Query is running:   0%|          |

# Labs

In [112]:
%%bigquery df_ed_features
Create or replace table som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
WITH labs AS (
    SELECT 
        c.*,
        CASE WHEN LOWER(lr.lab_name) LIKE '%wbc%' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS wbc,
        CASE WHEN LOWER(lr.lab_name) LIKE '%neutrophils%' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS neutrophils,
        CASE WHEN LOWER(lr.lab_name) LIKE '%lymphocytes%' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS lymphocytes,
        #CASE WHEN (LOWER(lr.base_name) LIKE '%bands%' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS bands,
        CASE WHEN LOWER(lr.base_name) LIKE '%hgb%' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS hgb,
        CASE WHEN LOWER(lr.base_name) LIKE 'plt' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS plt,
        CASE WHEN LOWER(lr.base_name) LIKE 'na' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS na,
        CASE WHEN LOWER(lr.base_name) LIKE 'hco3' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS hco3,
        CASE WHEN LOWER(lr.base_name) LIKE 'bun' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS bun,
        CASE WHEN LOWER(lr.base_name) LIKE 'cr' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS cr,
        #CASE WHEN LOWER(lr.base_name) LIKE 'glucose' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS glucose,
        CASE WHEN LOWER(lr.base_name) LIKE 'lac' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS lactate,
        CASE WHEN LOWER(lr.base_name) LIKE 'crp' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS crp,
        CASE WHEN LOWER(lr.lab_name) LIKE 'procalcitonin' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS procalcitonin
    FROM `som-nero-phi-jonc101.blood_culture_stewardship.cohort` c
    LEFT JOIN `som-nero-phi-jonc101.shc_core_2023.lab_result` lr 
    USING (anon_id, pat_enc_csn_id_coded)
    WHERE TIMESTAMP_DIFF(lr.order_time_jittered_utc, c.blood_culture_order_datetime, HOUR) BETWEEN -24 AND 2
)
SELECT 
    anon_id,
    pat_enc_csn_id_coded,
    order_proc_id_coded,
    blood_culture_order_datetime,
    order_year,
    ed_arrival_datetime,
    positive_blood_culture,
    positive_blood_culture_in_week,
    earliest_iv_antibiotic_datetime,
    earliest_iv_antibiotic,
    min_heartrate,
    max_heartrate,
    avg_heartrate,
    median_heartrate,
    min_resprate, 
    max_resprate, 
    avg_resprate,
    median_resprate,
    min_temp, 
    max_temp, 
    avg_temp,
    median_temp,
    min_sysbp,
    max_sysbp, 
    avg_sysbp,
    median_sysbp,
    min_diasbp,
    max_diasbp, 
    avg_diasbp,
    median_diasbp,

    ROUND(MIN(wbc), 2) AS min_wbc,
    ROUND(MAX(wbc), 2) AS max_wbc,
    ROUND(AVG(wbc), 2) AS avg_wbc,
    ROUND(APPROX_QUANTILES(wbc, 100)[OFFSET(50)], 2) AS median_wbc,
    
    ROUND(MIN(neutrophils), 2) AS min_neutrophils,
    ROUND(MAX(neutrophils), 2) AS max_neutrophils,
    ROUND(AVG(neutrophils), 2) AS avg_neutrophils,
    ROUND(APPROX_QUANTILES(neutrophils, 100)[OFFSET(50)], 2) AS median_neutrophils,
    
    ROUND(MIN(lymphocytes), 2) AS min_lymphocytes,
    ROUND(MAX(lymphocytes), 2) AS max_lymphocytes,
    ROUND(AVG(lymphocytes), 2) AS avg_lymphocytes,
    ROUND(APPROX_QUANTILES(lymphocytes, 100)[OFFSET(50)], 2) AS median_lymphocytes,
    
    #ROUND(MIN(bands), 2) AS min_bands,
    #ROUND(MAX(bands), 2) AS max_bands,
    #ROUND(AVG(bands), 2) AS avg_bands,
    #ROUND(APPROX_QUANTILES(bands, 100)[OFFSET(50)], 2) AS median_bands,
    
    ROUND(MIN(hgb), 2) AS min_hgb,
    ROUND(MAX(hgb), 2) AS max_hgb,
    ROUND(AVG(hgb), 2) AS avg_hgb,
    ROUND(APPROX_QUANTILES(hgb, 100)[OFFSET(50)], 2) AS median_hgb,

    ROUND(MIN(plt), 2) AS min_plt,
    ROUND(MAX(plt), 2) AS max_plt,
    ROUND(AVG(plt), 2) AS avg_plt,
    ROUND(APPROX_QUANTILES(plt, 100)[OFFSET(50)], 2) AS median_plt,

    ROUND(MIN(na), 2) AS min_na,
    ROUND(MAX(na), 2) AS max_na,
    ROUND(AVG(na), 2) AS avg_na,
    ROUND(APPROX_QUANTILES(na, 100)[OFFSET(50)], 2) AS median_na,

    ROUND(MIN(hco3), 2) AS min_hco3,
    ROUND(MAX(hco3), 2) AS max_hco3,
    ROUND(AVG(hco3), 2) AS avg_hco3,
    ROUND(APPROX_QUANTILES(hco3, 100)[OFFSET(50)], 2) AS median_hco3,

    ROUND(MIN(bun), 2) AS min_bun,
    ROUND(MAX(bun), 2) AS max_bun,
    ROUND(AVG(bun), 2) AS avg_bun,
    ROUND(APPROX_QUANTILES(bun, 100)[OFFSET(50)], 2) AS median_bun,

    ROUND(MIN(cr), 2) AS min_cr,
    ROUND(MAX(cr), 2) AS max_cr,
    ROUND(AVG(cr), 2) AS avg_cr,
    ROUND(APPROX_QUANTILES(cr, 100)[OFFSET(50)], 2) AS median_cr,

    #ROUND(MIN(glucose), 2) AS min_glucose,
    #ROUND(MAX(glucose), 2) AS max_glucose,
    #ROUND(AVG(glucose), 2) AS avg_glucose,
    #ROUND(APPROX_QUANTILES(glucose, 100)[OFFSET(50)], 2) AS median_glucose,

    ROUND(MIN(lactate), 2) AS min_lactate,
    ROUND(MAX(lactate), 2) AS max_lactate,
    ROUND(AVG(lactate), 2) AS avg_lactate,
    ROUND(APPROX_QUANTILES(lactate, 100)[OFFSET(50)], 2) AS median_lactate,

    ROUND(MIN(procalcitonin), 2) AS min_procalcitonin,
    ROUND(MAX(procalcitonin), 2) AS max_procalcitonin,
    ROUND(AVG(procalcitonin), 2) AS avg_procalcitonin,
    ROUND(APPROX_QUANTILES(procalcitonin, 100)[OFFSET(50)], 2) AS median_procalcitonin

FROM labs
GROUP BY 
    anon_id,
    pat_enc_csn_id_coded,
    order_proc_id_coded,
    blood_culture_order_datetime,
    order_year,
    ed_arrival_datetime,
    positive_blood_culture,
    positive_blood_culture_in_week,
    earliest_iv_antibiotic_datetime,
    earliest_iv_antibiotic,
    min_heartrate,
    max_heartrate,
    avg_heartrate,
    median_heartrate,
    min_resprate, 
    max_resprate, 
    avg_resprate,
    median_resprate,
    min_temp, 
    max_temp, 
    avg_temp,
    median_temp,
    min_sysbp,
    max_sysbp, 
    avg_sysbp,
    median_sysbp,
    min_diasbp,
    max_diasbp, 
    avg_diasbp,
    median_diasbp

Query is running:   0%|          |