In [36]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pulp import *
import pandas as pd
import os, glob
import seaborn as sns
from scipy.stats import kruskal
import scikit_posthocs as sp
from scipy.stats import mannwhitneyu
from dotenv import load_dotenv
load_dotenv() 

# os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
# os.environ['GCLOUD_PROJECT'] = str(os.getenv("GCLOUD_PROJECT"))
# %load_ext google.cloud.bigquery

from google.cloud import bigquery
from google.cloud import bigquery_storage_v1
project_id = os.getenv('GOOGLE_CLOUD_PROJECT')
client = bigquery.Client(project=project_id)

# Study Cohort
## Questions

2. **how many orders are in department ED?** 
total we have 39580 + 68554 + 104818 = 212952 orders
```sql
SELECT DISTINCT 
  department_name, 
  COUNT(*) AS order_count 
FROM ED_orders 
GROUP BY department_name 
ORDER BY order_count;
```

**Results:**

| Row | department_name | order_count |
|-----|----------------|-------------|
| 1 | EMERGENCY DEPARTMENT PLEASANTON | 39,580 |
| 2 | ADULT EMERGENCY DEPARTMENT | 68,554 |
| 3 | PEDIATRIC EMERGENCY DEPARTMENT | 104,818 |

### 3. **Is event_type admission correct?**

```sql
SELECT DISTINCT event_type 
FROM `som-nero-phi-jonc101.shc_core_2023.adt` ad;
```

**Results:**

| Row | event_type | Description |
|-----|------------|-------------|
| 1 | Hospital Outpatient | Patient visits without admission |
| 2 | Admission | Patient admitted to hospital |
| 3 | Leave of Absence Out | Patient temporarily leaves hospital |
| 4 | Patient Update | Patient information modifications |
| 5 | Census | Current patient count snapshot |
| 6 | Discharge | Patient released from hospital |
| 7 | Transfer Out | Patient moved to different facility |
| 8 | Transfer In | Patient received from different facility |
| 9 | Leave of Absence Census | Count of patients on temporary leave |
| 10 | Leave of Absence Return | Patient returns from temporary leave |

In [None]:
%%bigquery df_ED
# Create or replace table som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
with base as (
SELECT DISTINCT
        op.anon_id, 
        op.pat_enc_csn_id_coded,
        op.order_proc_id_coded, 
        op.order_time_jittered_utc,
        EXTRACT(YEAR FROM op.order_time_jittered_utc) as order_year ,  
        op.ordering_mode,
        op.department_id,
    
    FROM 
        `som-nero-phi-jonc101.shc_core_2023.order_proc` op
     INNER JOIN
        `som-nero-phi-jonc101.shc_core_2023.lab_result` lr
    ON
        op.order_proc_id_coded = lr.order_id_coded
    WHERE
        op.order_type LIKE "Microbiology%"
        AND op.description LIKE "%BLOOD%"
        AND not op.order_status like any ('Discontinued','Canceled')
), 

# all the blood orders for microbiology, exlucding discontinued and canceled orders

ED_orders as(
select 
b.anon_id,
b.pat_enc_csn_id_coded,
b.order_proc_id_coded,
b.order_time_jittered_utc,
b.order_year,
b.ordering_mode,
b.department_id,
department_name,
from base b inner join `som-nero-phi-jonc101.shc_core_2023.dep_map` using(department_id)
where lower(department_name) like '%emergency%'
group by b.anon_id,b.pat_enc_csn_id_coded,b.order_proc_id_coded,b.order_time_jittered_utc,b.order_year,b.ordering_mode,b.department_id,department_name
)

# ED is defined by regrex matching with "emergence "

SELECT 
  b.anon_id,
  b.pat_enc_csn_id_coded,
  b.order_proc_id_coded,
  b.order_time_jittered_utc as blood_culture_order_datetime,
  b.order_year,
  ad.effective_time_jittered_utc as ed_arrival_datetime,
FROM 
  ED_orders b
INNER JOIN 
  `som-nero-phi-jonc101.shc_core_2023.adt` ad 
USING 
  (anon_id,pat_enc_csn_id_coded,department_id)
Where event_type='Admission'
and b.order_time_jittered_utc>=ad.effective_time_jittered_utc  # confirmed that each admission encounter has only one admission time
GROUP BY b.anon_id,b.pat_enc_csn_id_coded,b.order_proc_id_coded,b.order_time_jittered_utc,b.order_year,ad.effective_time_jittered_utc

In [14]:
refactor_query ="""
with base as (
SELECT DISTINCT
        op.anon_id, 
        op.pat_enc_csn_id_coded,
        op.order_proc_id_coded, 
        op.order_time_jittered_utc,
        EXTRACT(YEAR FROM op.order_time_jittered_utc) as order_year ,  
        op.ordering_mode,
        op.department_id,
        op.order_status
    
    FROM 
        `som-nero-phi-jonc101.shc_core_2023.order_proc` op
     INNER JOIN
        `som-nero-phi-jonc101.shc_core_2023.lab_result` lr
    ON
        op.order_proc_id_coded = lr.order_id_coded
    WHERE
        op.order_type LIKE "Microbiology%"
        AND op.description LIKE "%BLOOD%"
        AND not op.order_status like any ('Discontinued','Canceled')
)
select * from base
"""
base_only_blood_orders = client.query(refactor_query).to_dataframe()
assert base_only_blood_orders["order_status"].value_counts().iloc[0] == 606984 # all blood cultures are completed
# This code block confirms that all blood cultures are completed


In [None]:
refactor_query ="""
with base as (
SELECT DISTINCT
        op.anon_id, 
        op.pat_enc_csn_id_coded,
        op.order_proc_id_coded, 
        op.order_time_jittered_utc,
        EXTRACT(YEAR FROM op.order_time_jittered_utc) as order_year ,  
        op.ordering_mode,
        op.department_id,
    
    FROM 
        `som-nero-phi-jonc101.shc_core_2023.order_proc` op
     INNER JOIN
        `som-nero-phi-jonc101.shc_core_2023.lab_result` lr
    ON
        op.order_proc_id_coded = lr.order_id_coded
    WHERE
        op.order_type LIKE "Microbiology%"
        AND op.description LIKE "%BLOOD%"
        AND not op.order_status like any ('Discontinued','Canceled')
), 
ED_orders as(
select 
b.anon_id,
b.pat_enc_csn_id_coded,
b.order_proc_id_coded,
b.order_time_jittered_utc,
b.order_year,
b.ordering_mode,
b.department_id,
department_name,
from base b inner join `som-nero-phi-jonc101.shc_core_2023.dep_map` using(department_id)
where lower(department_name) like '%emergency%'
group by b.anon_id,b.pat_enc_csn_id_coded,b.order_proc_id_coded,b.order_time_jittered_utc,b.order_year,b.ordering_mode,b.department_id,department_name
)
SELECT 
  b.anon_id,
  b.pat_enc_csn_id_coded,
  b.order_proc_id_coded,
  b.order_time_jittered_utc as blood_culture_order_datetime,
  b.order_year,
  ad.effective_time_jittered_utc as ed_arrival_datetime,
FROM 
  ED_orders b
INNER JOIN 
  `som-nero-phi-jonc101.shc_core_2023.adt` ad
USING 
  (anon_id,pat_enc_csn_id_coded,department_id)
Where event_type='Admission'
and b.order_time_jittered_utc>=ad.effective_time_jittered_utc # 
GROUP BY b.anon_id,b.pat_enc_csn_id_coded,b.order_proc_id_coded,b.order_time_jittered_utc,b.order_year,ad.effective_time_jittered_utc
"""
refactor_base_cohort = client.query(refactor_query).to_dataframe()
# assert len(refactor_base_cohort) == 183935


In [None]:
# Query using BigQuery client
query = """
select * from som-nero-phi-jonc101.blood_culture_stewardship.cohort
"""
ref_base_cohort = client.query(query).to_dataframe()
# assert len(refactor_base_cohort) == 183935, "refactor_base_cohort should have 183935 rows"
# assert len(ref_base_cohort) == 183935, "base_cohort should have 183935 rows"


159217

In [None]:
assert len(refactor_base_cohort) == 183935, "refactor_base_cohort should have 183935 rows"
assert len(ref_base_cohort) == 159217, "base_cohort should have 159217 rows"
assert len(refactor_base_cohort) == len(ref_base_cohort), "refactor_base_cohort and base_cohort should have the same number of rows"


AssertionError: refactor_base_cohort and base_cohort should have the same number of rows

In [18]:
query = """
select * from som-nero-phi-jonc101.blood_culture_stewardship.cohort_test
"""
test_cohort = client.query(query).to_dataframe()
test_cohort

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,blood_culture_order_datetime,birth_date_jittered,bmi,positive_blood_culture,positive_blood_culture_in_week,earliest_iv_antibiotic_datetime
0,JC1017009,320998317,743001219,2018-11-01 23:20:00,2004-10-15,,0,0,NaT
1,JC1017009,131260247489,584492996,2018-11-01 22:25:00,2004-10-15,,0,0,NaT
2,JC1019680,131312146671,729971535,2021-05-30 19:51:00,2004-11-18,,0,0,NaT
3,JC1019680,131312146671,729971534,2021-05-30 19:51:00,2004-11-18,,0,0,NaT
4,JC1019680,329551305,772102393,2021-05-30 20:11:00,2004-11-18,,0,0,NaT
...,...,...,...,...,...,...,...,...,...
26824,JC2330634,311678703,710801345,2015-05-30 23:03:00,2015-01-28,285.14,0,0,NaT
26825,JC2330634,131091419933,466752787,2015-05-30 22:21:00,2015-01-28,285.14,0,0,NaT
26826,JC6553474,131367534396,924003379,2023-11-27 15:34:00,2016-12-24,2174.85,0,0,NaT
26827,JC6155783,131320870046,756976310,2021-11-17 13:10:00,2018-10-04,12121.21,0,0,NaT


# Labels

# Questions
1. **How is cohort_test generated? and why do we need to label this first?**
    `som-nero-phi-jonc101.blood_culture_stewardship.cohort_test`

In [None]:
%%bigquery df_ed_labels
# Create or replace table som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
With culture as ( 
    SELECT DISTINCT
         c.anon_id,
         c.pat_enc_csn_id_coded,
         c.order_proc_id_coded,
         lr.ord_value,
         coalesce(lr.extended_value_comment,lr.extended_comp_comment) as comment,
    FROM 
        `som-nero-phi-jonc101.blood_culture_stewardship.cohort_test` c
    INNER JOIN
        `som-nero-phi-jonc101.shc_core_2023.lab_result` lr
       on c.anon_id=lr.anon_id 
          and c.order_proc_id_coded=lr.order_id_coded
          and c.pat_enc_csn_id_coded=lr.pat_enc_csn_id_coded
          and c.blood_culture_order_datetime=lr.order_time_jittered_utc
    ),
culture_growth as (
select op.anon_id,
       op.pat_enc_csn_id_coded,
       op.order_proc_id_coded,
       1 as was_pos,
from culture op  
                INNER join (SELECT DISTINCT anon_id,order_proc_id_coded, organism
                        FROM `som-nero-phi-jonc101.shc_core_2023.culture_sensitivity`
                        where organism is not null) cs USING (anon_id,order_proc_id_coded) 
WHERE not lower(ord_value) like any ("%no%grow%","%not%detect%","negative")
     AND not upper(comment) like any ('%NO%GROWTH%','%COAG%NEG%STAPH%','%GRAM%+%RODS%',
                                      '%GRAM%POS%RODS%','%CONTAMIN%')
)
select c.*,
case when (cg.was_pos=1) then cg.was_pos else 0 end as positive_blood_culture 
from `som-nero-phi-jonc101.blood_culture_stewardship.cohort_test` c LEFT JOIN culture_growth cg 
using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)

In [None]:
positive_blood_culture_in_week_Span1=336 # 2 weeks, unit is hour
positive_blood_culture_in_week_Span2=168 # 1 weeks, unit is hour

In [None]:
# # Upload refactor_base_cohort to BigQuery as a temporary table
table_id_refactor_base_cohort_temp = f"{project_id}.blood_culture_stewardship.refactor_base_cohort_temp"

# # Upload the DataFrame to BigQuery
# refactor_base_cohort.to_gbq(
#     destination_table=table_id_refactor_base_cohort_temp,
#     project_id=project_id,
#     if_exists='replace'  # This will replace the table if it exists
# )

# print(f"Uploaded refactor_base_cohort to {table_id_refactor_base_cohort_temp}")

# Now use the uploaded table in your query
refactor_query = f"""
With culture as ( 
    SELECT DISTINCT
         c.anon_id,
         c.pat_enc_csn_id_coded,
         c.order_proc_id_coded,
         lr.ord_value,
         coalesce(lr.extended_value_comment,lr.extended_comp_comment) as comment,
    FROM 
        `{table_id_refactor_base_cohort_temp}` c
    INNER JOIN
        `som-nero-phi-jonc101.shc_core_2023.lab_result` lr
       on c.anon_id=lr.anon_id 
          and c.order_proc_id_coded=lr.order_id_coded
          and c.pat_enc_csn_id_coded=lr.pat_enc_csn_id_coded
          and c.blood_culture_order_datetime=lr.order_time_jittered_utc
    ) ,
    # confimred that could be multiple rows (multiple organisms growth from lab_result table for each blood culture order
    # select * from culture == 277982 results rows (from order number n = 183935)

culture_growth as (
select op.anon_id,
       op.pat_enc_csn_id_coded,
       op.order_proc_id_coded,
       1 as was_pos,
from culture op  
                INNER join (SELECT DISTINCT anon_id,order_proc_id_coded, organism
                        FROM `som-nero-phi-jonc101.shc_core_2023.culture_sensitivity`
                        where organism is not null) cs USING (anon_id,order_proc_id_coded) 
WHERE not lower(ord_value) like any ("%no%grow%","%not%detect%","negative")
     AND not upper(comment) like any ('%NO%GROWTH%','%COAG%NEG%STAPH%','%GRAM%+%RODS%',
                                      '%GRAM%POS%RODS%','%CONTAMIN%')
)
select distinct c.*, # i fixed this distinct , otherwise we have duplciate of 185924
case when (cg.was_pos=1) then cg.was_pos else 0 end as positive_blood_culture 
from `{table_id_refactor_base_cohort_temp}` c LEFT JOIN culture_growth cg 
using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)
"""

# Execute the query
refactor_cohort_with_labels = client.query(refactor_query).to_dataframe()
print(f"Created refactor_cohort_with_labels with {len(refactor_cohort_with_labels)} rows")

Created refactor_cohort_with_labels with 183935 rows


In [None]:
# Upload refactor_base_cohort to BigQuery as a temporary table
table_id_refactor_cohort_with_labels_temp = f"{project_id}.blood_culture_stewardship.refactor_cohort_with_labels_temp"

# Upload the DataFrame to BigQuery
refactor_cohort_with_labels.to_gbq(
    destination_table=table_id_refactor_cohort_with_labels_temp,
    project_id=project_id,
    if_exists='replace'  # This will replace the table if it exists
)

print(f"Uploaded refactor_cohort_with_labels to {table_id_refactor_cohort_with_labels_temp}")

  refactor_cohort_with_labels.to_gbq(


Uploaded refactor_cohort_with_labels to som-nero-phi-jonc101.blood_culture_stewardship.refactor_cohort_with_labels_temp


In [None]:
%%bigquery df_ed_labels
# Create or replace table som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
With order_posetive_next_week AS (
   SELECT DISTINCT
         c.anon_id,
         c.order_proc_id_coded,
         1 as positive_blood_culture_in_week
    FROM 
       `som-nero-phi-jonc101.blood_culture_stewardship.cohort_test` c 
     INNER JOIN
        `som-nero-phi-jonc101.blood_culture_stewardship.cohort_test` c2
    ON
        c.anon_id = c2.anon_id
    WHERE
        c2.positive_blood_culture=1
        AND TIMESTAMP_DIFF(c2.blood_culture_order_datetime,c.blood_culture_order_datetime, Hour) between 0 and 168
        group by anon_id,order_proc_id_coded
)
select c.*,
case when o.positive_blood_culture_in_week=1 then o.positive_blood_culture_in_week else 0 end as positive_blood_culture_in_week
from  `som-nero-phi-jonc101.blood_culture_stewardship.cohort_test` c  left join order_posetive_next_week o using(anon_id,order_proc_id_coded)

In [132]:
refactor_query = f"""
With order_posetive_next_week AS (
   SELECT DISTINCT
         c.anon_id,
         c.order_proc_id_coded,
         1 as positive_blood_culture_in_week
    FROM 
       `{table_id_refactor_cohort_with_labels_temp}` c 
     INNER JOIN
       `{table_id_refactor_cohort_with_labels_temp}` c2
    ON
        c.anon_id = c2.anon_id
    WHERE
        c2.positive_blood_culture=1
        AND TIMESTAMP_DIFF(c2.blood_culture_order_datetime,c.blood_culture_order_datetime, Hour) between 0 and 168
        group by anon_id,order_proc_id_coded
)
select c.*,
case when o.positive_blood_culture_in_week=1 then o.positive_blood_culture_in_week else 0 end as positive_blood_culture_in_week
from  `{table_id_refactor_cohort_with_labels_temp}` c  left join order_posetive_next_week o using(anon_id,order_proc_id_coded)
"""
order_positive_next_week_cohort = client.query(refactor_query).to_dataframe()

In [159]:
refactor_query = f"""
WITH subsequent_positive AS (
  SELECT c.anon_id, c.order_proc_id_coded, 1 AS flag
  FROM `{table_id_refactor_cohort_with_labels_temp}` c
  JOIN `{table_id_refactor_cohort_with_labels_temp}` c2
    ON c.anon_id = c2.anon_id
  WHERE c2.positive_blood_culture = 1
    AND c2.order_proc_id_coded != c.order_proc_id_coded
    AND TIMESTAMP_DIFF(
          c2.blood_culture_order_datetime,
          c.blood_culture_order_datetime,
          HOUR
        ) BETWEEN 0 AND 168 # i would love to  relabel the negative at the same timestamp as the positive 
  GROUP BY c.anon_id, c.order_proc_id_coded
),
self_positive AS (
  SELECT anon_id, order_proc_id_coded, 1 AS flag
  FROM `{table_id_refactor_cohort_with_labels_temp}`
  WHERE positive_blood_culture = 1
),
order_positive_next_week AS (
  SELECT anon_id, order_proc_id_coded, 1 AS positive_blood_culture_in_week
  FROM (
    SELECT * FROM subsequent_positive
    UNION DISTINCT
    SELECT * FROM self_positive
  )
)
SELECT
  c.*,
  IFNULL(o.positive_blood_culture_in_week, 0) AS positive_blood_culture_in_week
FROM `{table_id_refactor_cohort_with_labels_temp}` c
LEFT JOIN order_positive_next_week o USING (anon_id, order_proc_id_coded)
"""
refactor_order_positive_next_week_cohort = client.query(refactor_query).to_dataframe()
refactor_order_positive_next_week_cohort["positive_blood_culture_in_week"].value_counts()

positive_blood_culture_in_week
0    174058
1      9877
Name: count, dtype: Int64

# Earliest Iv Antibiotic

In [160]:
# Upload refactor_base_cohort to BigQuery as a temporary table
table_id_refactor_order_positive_next_week_cohort_temp = f"{project_id}.blood_culture_stewardship.order_positive_next_week_cohort_temp"

# Upload the DataFrame to BigQuery
refactor_order_positive_next_week_cohort.to_gbq(
    destination_table=table_id_refactor_order_positive_next_week_cohort_temp,
    project_id=project_id,
    if_exists='replace'  # This will replace the table if it exists
)

print(f"Uploaded new_order_positive_next_week_cohort to {table_id_refactor_order_positive_next_week_cohort_temp}")

  refactor_order_positive_next_week_cohort.to_gbq(


Uploaded new_order_positive_next_week_cohort to som-nero-phi-jonc101.blood_culture_stewardship.order_positive_next_week_cohort_temp


In [None]:
%%bigquery df_ed_labels
# Create or replace table som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
WITH ED_ABX as (
SELECT
  c.anon_id,
  c.pat_enc_csn_id_coded,
  c.order_proc_id_coded,
  #o.med_description,
  o.order_start_time_jittered_utc AS earliest_iv_antibiotic_datetime
FROM
  `som-nero-phi-jonc101.blood_culture_stewardship.cohort_test` c
INNER JOIN (
    SELECT
      anon_id,
      pat_enc_csn_id_coded,
      med_description,
      order_start_time_jittered_utc,
      RANK() OVER (PARTITION BY anon_id, pat_enc_csn_id_coded ORDER BY order_start_time_jittered_utc ASC) as rank
    FROM
      `som-nero-phi-jonc101.shc_core_2023.order_med` #don't we want to include MAR table for actual administration? 
    WHERE
      thera_class_name IN ('ANTIBIOTICS')
      AND LOWER(med_route) = 'intravenous'
) o ON c.anon_id = o.anon_id AND c.pat_enc_csn_id_coded = o.pat_enc_csn_id_coded
WHERE
  o.rank = 1
  AND TIMESTAMP_DIFF(o.order_start_time_jittered_utc, c.blood_culture_order_datetime, HOUR) BETWEEN -4 AND 24
GROUP BY
  c.anon_id, c.pat_enc_csn_id_coded, c.order_proc_id_coded, o.med_description, o.order_start_time_jittered_utc    
)
/*
,ED_ABX_rxcui_str as (
select m.anon_id,
m.pat_enc_csn_id_coded,
m.order_proc_id_coded,
m.earliest_iv_antibiotic_datetime,
mm.rxcui_str as earliest_iv_antibiotic
from ED_ABX m 
inner join `som-nero-phi-jonc101.shc_core_2023.mapped_meds` mm on m.med_description=mm.name
where not rxcui='0'
group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,earliest_iv_antibiotic_datetime,earliest_iv_antibiotic
)

select c.*,
ea.earliest_iv_antibiotic_datetime,
#ea.earliest_iv_antibiotic
from `som-nero-phi-jonc101.blood_culture_stewardship.cohort_test` c left join ED_ABX_rxcui_str ea using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)
group by c.anon_id,c.pat_enc_csn_id_coded,c.order_proc_id_coded,c.blood_culture_order_datetime,c.order_year,c.ed_arrival_datetime,
c.positive_blood_culture,c.positive_blood_culture_in_week,ea.earliest_iv_antibiotic_datetime,ea.earliest_iv_antibiotic
*/
select c.*,
ea.earliest_iv_antibiotic_datetime,
#ea.earliest_iv_antibiotic
from `som-nero-phi-jonc101.blood_culture_stewardship.cohort_test` c 
left join ED_ABX ea using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)
group by c.anon_id,c.pat_enc_csn_id_coded,c.order_proc_id_coded,c.blood_culture_order_datetime,c.order_year,c.ed_arrival_datetime,
c.positive_blood_culture,c.positive_blood_culture_in_week,ea.earliest_iv_antibiotic_datetime

In [None]:
refactor_query = f"""
WITH ED_ABX as (
SELECT
  c.anon_id,
  c.pat_enc_csn_id_coded,
  c.order_proc_id_coded,
  #o.med_description,
  o.order_start_time_jittered_utc AS earliest_iv_antibiotic_datetime
FROM
  `{table_id_refactor_order_positive_next_week_cohort}` c
INNER JOIN (
    SELECT
      anon_id,
      pat_enc_csn_id_coded,
      med_description,
      order_start_time_jittered_utc,
      RANK() OVER (PARTITION BY anon_id, pat_enc_csn_id_coded ORDER BY order_start_time_jittered_utc ASC) as rank
    FROM
      `som-nero-phi-jonc101.shc_core_2023.order_med` #don't we want to include MAR table for actual administration? 
    WHERE
      thera_class_name IN ('ANTIBIOTICS')
    #   AND LOWER(med_route) = 'intravenous' # this is too strict 
      AND LOWER(med_route) LIKE '%intravenous%'

) o ON c.anon_id = o.anon_id AND c.pat_enc_csn_id_coded = o.pat_enc_csn_id_coded
WHERE
  o.rank = 1
  AND TIMESTAMP_DIFF(o.order_start_time_jittered_utc, c.blood_culture_order_datetime, HOUR) BETWEEN -4 AND 24
GROUP BY
  c.anon_id, c.pat_enc_csn_id_coded, c.order_proc_id_coded, o.med_description, o.order_start_time_jittered_utc    
)
/*
,ED_ABX_rxcui_str as (
select m.anon_id,
m.pat_enc_csn_id_coded,
m.order_proc_id_coded,
m.earliest_iv_antibiotic_datetime,
mm.rxcui_str as earliest_iv_antibiotic
from ED_ABX m 
inner join `som-nero-phi-jonc101.shc_core_2023.mapped_meds` mm on m.med_description=mm.name
where not rxcui='0'
group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,earliest_iv_antibiotic_datetime,earliest_iv_antibiotic
)

select c.*,
ea.earliest_iv_antibiotic_datetime,
#ea.earliest_iv_antibiotic
from `{table_id_refactor_order_positive_next_week_cohort}` c left join ED_ABX_rxcui_str ea using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)
group by c.anon_id,c.pat_enc_csn_id_coded,c.order_proc_id_coded,c.blood_culture_order_datetime,c.order_year,c.ed_arrival_datetime,
c.positive_blood_culture,c.positive_blood_culture_in_week,ea.earliest_iv_antibiotic_datetime,ea.earliest_iv_antibiotic
*/
select c.*,
ea.earliest_iv_antibiotic_datetime,
#ea.earliest_iv_antibiotic
from `{table_id_refactor_order_positive_next_week_cohort}` c
left join ED_ABX ea using(anon_id,pat_enc_csn_id_coded,order_proc_id_coded)
group by c.anon_id,c.pat_enc_csn_id_coded,c.order_proc_id_coded,c.blood_culture_order_datetime,c.order_year,c.ed_arrival_datetime,
c.positive_blood_culture,c.positive_blood_culture_in_week,ea.earliest_iv_antibiotic_datetime
"""
ed_earliest_abx = client.query(refactor_query).to_dataframe()
ed_earliest_abx["earliest_iv_antibiotic_datetime"].notnull().sum()


In [None]:
refactor_query = f"""
-- 1) For each culture order, collect IV antibiotics in the same encounter
--    that fall within [-4h, +24h] of the culture time.
WITH windowed_abx AS (
  SELECT
    c.anon_id,
    c.pat_enc_csn_id_coded,
    c.order_proc_id_coded,
    o.order_start_time_jittered_utc AS iv_abx_time,
    ROW_NUMBER() OVER (
      PARTITION BY c.anon_id, c.pat_enc_csn_id_coded, c.order_proc_id_coded
      ORDER BY o.order_start_time_jittered_utc ASC
    ) AS rn
  FROM `{table_id_refactor_order_positive_next_week_cohort_temp}` c
  JOIN `som-nero-phi-jonc101.shc_core_2023.order_med` o
    ON o.anon_id = c.anon_id
   AND o.pat_enc_csn_id_coded = c.pat_enc_csn_id_coded
  WHERE
    o.thera_class_name = 'ANTIBIOTICS'
    -- be flexible on route text (e.g., 'IV', 'IVPB', 'Intravenous (IV)')
    AND (
      CONTAINS_SUBSTR(LOWER(o.med_route), 'intravenous')
      OR LOWER(o.med_route) IN ('iv', 'ivpb')
    )
    AND TIMESTAMP_DIFF(
          o.order_start_time_jittered_utc,
          c.blood_culture_order_datetime,
          HOUR
        ) BETWEEN -4 AND 24
),

-- 2) Reduce to the earliest IV antibiotic *within the window* for that culture order
ED_ABX AS (
  SELECT
    anon_id,
    pat_enc_csn_id_coded,
    order_proc_id_coded,
    iv_abx_time AS earliest_iv_antibiotic_datetime
  FROM windowed_abx
  WHERE rn = 1
)

-- 3) Attach that timestamp back to the cohort
SELECT
  c.*,
  ea.earliest_iv_antibiotic_datetime
FROM `{table_id_refactor_order_positive_next_week_cohort_temp}` c
LEFT JOIN ED_ABX ea
  USING (anon_id, pat_enc_csn_id_coded, order_proc_id_coded)
"""
refactor_earliest_abx = client.query(refactor_query).to_dataframe()
refactor_earliest_abx["earliest_iv_antibiotic_datetime"].notnull().sum()


np.int64(95374)

# Vitals

In [None]:
# Upload refactor_base_cohort to BigQuery as a temporary table
table_id_refactor_earliest_abx_temp = f"{project_id}.blood_culture_stewardship.refactor_earliest_abx_temp"

# Upload the DataFrame to BigQuery
refactor_earliest_abx.to_gbq(
    destination_table=table_id_refactor_earliest_abx_temp,
    project_id=project_id,
    if_exists='replace'  # This will replace the table if it exists
)

print(f"Uploaded refactor_earliest_abx to {table_id_refactor_earliest_abx_temp}")

In [167]:
refactor_earliest_abx["earliest_iv_antibiotic_datetime"].notnull().sum()

np.int64(95374)

In [None]:
%%bigquery df_ed_features
# Create or replace table som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
WITH vitals as (
  SELECT c.*,
  vitals.recorded_time_jittered_utc,
  CASE WHEN upper(row_disp_name) IN ('PULSE', 'HEART RATE') THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as heartrate,
  CASE WHEN upper(row_disp_name) IN ('RESP', 'RESP RATE') THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as resprate,
  CASE WHEN upper(row_disp_name) IN ('TEMP') THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as temp,
  CASE WHEN (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_1 AS numeric) >= 40) THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as sysbp ,
  CASE WHEN (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_2 AS numeric) >= 30)  THEN round(SAFE_CAST(numerical_val_2 AS FLOAT64),2) end as diasbp,
  FROM
    `som-nero-phi-jonc101.blood_culture_stewardship.cohort_test` c LEFT JOIN
  `som-nero-phi-jonc101.shc_core_2023.flowsheet` as vitals
   ON vitals.anon_id = c.anon_id
  WHERE
   ((upper(trim(row_disp_name)) IN ('PULSE', 'HEART RATE') AND SAFE_CAST(numerical_val_1 AS numeric) >= 30) OR -- Heart rate
   (upper(trim(row_disp_name)) in ('RESP', 'RESP RATE') AND SAFE_CAST(numerical_val_1 AS numeric) >= 4 AND SAFE_CAST(numerical_val_1 AS numeric) <= 60) OR -- Respiratory rate
   (upper(trim(row_disp_name)) IN ('TEMP') AND SAFE_CAST(numerical_val_1 AS numeric) >= 90) OR -- Temperature in F
   (upper(trim(row_disp_name)) IN ('WEIGHT') AND SAFE_CAST(numerical_val_1 AS numeric) >= 480 AND SAFE_CAST(numerical_val_1 AS numeric) <= 8000)  OR -- Weight 
  (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_1 AS numeric) >= 40) OR -- Systolic BP
   (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_2 AS numeric) >= 30) -- diastolic BP
  )
  AND
  (TIMESTAMP_DIFF(vitals.recorded_time_jittered_utc, c.blood_culture_order_datetime, hour) between -24 and 0 )
)
select anon_id,
       pat_enc_csn_id_coded,
       order_proc_id_coded,
       blood_culture_order_datetime,
       order_year,
       ed_arrival_datetime,
       positive_blood_culture,
       positive_blood_culture_in_week,
       earliest_iv_antibiotic_datetime,
       #earliest_iv_antibiotic,
       ROUND(min(heartrate),2) as min_heartrate,
       ROUND(max(heartrate),2) as max_heartrate,
       ROUND(avg(heartrate),2) as avg_heartrate,
       ROUND(APPROX_QUANTILES(heartrate, 100)[OFFSET(50)],2) AS median_heartrate,
       ROUND(min(resprate),2) as min_resprate,
       ROUND(max(resprate),2) as max_resprate,
       ROUND(avg(resprate),2) as avg_resprate,
       ROUND(APPROX_QUANTILES(resprate, 100)[OFFSET(50)],2) AS median_resprate,
       ROUND(min(temp),2) as min_temp,
       ROUND(max(temp),2) as max_temp,
       ROUND(avg(temp),2) as avg_temp,
       ROUND(APPROX_QUANTILES(temp, 100)[OFFSET(50)],2) AS median_temp,
       ROUND(min(sysbp),2) as min_sysbp,
       ROUND(max(sysbp),2) as max_sysbp,
       ROUND(avg(sysbp),2) as avg_sysbp,
       ROUND(APPROX_QUANTILES(sysbp, 100)[OFFSET(50)],2) AS median_sysbp,
       ROUND(min(diasbp),2) as min_diasbp,
       ROUND(max(diasbp),2) as max_diasbp,
       ROUND(avg(diasbp)) as avg_diasbp,
       ROUND(APPROX_QUANTILES(diasbp, 100)[OFFSET(50)]) AS median_diasbp,
from vitals
group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,blood_culture_order_datetime,
order_year,ed_arrival_datetime,positive_blood_culture,positive_blood_culture_in_week,earliest_iv_antibiotic_datetime#,earliest_iv_antibiotic

In [164]:
refactor_query = f"""
WITH vitals as (
  SELECT c.*,
  vitals.recorded_time_jittered_utc,
  CASE WHEN upper(row_disp_name) IN ('PULSE', 'HEART RATE') THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as heartrate,
  CASE WHEN upper(row_disp_name) IN ('RESP', 'RESP RATE') THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as resprate,
  CASE WHEN upper(row_disp_name) IN ('TEMP') THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as temp,
  CASE WHEN (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_1 AS numeric) >= 40) THEN round(SAFE_CAST(numerical_val_1 AS FLOAT64),2) end as sysbp ,
  CASE WHEN (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_2 AS numeric) >= 30)  THEN round(SAFE_CAST(numerical_val_2 AS FLOAT64),2) end as diasbp,
  FROM
    `{table_id_refactor_earliest_abx_temp}` c
    LEFT JOIN
  `som-nero-phi-jonc101.shc_core_2023.flowsheet` as vitals
   ON vitals.anon_id = c.anon_id
  WHERE
   ((upper(trim(row_disp_name)) IN ('PULSE', 'HEART RATE') AND SAFE_CAST(numerical_val_1 AS numeric) >= 30) OR -- Heart rate
   (upper(trim(row_disp_name)) in ('RESP', 'RESP RATE') AND SAFE_CAST(numerical_val_1 AS numeric) >= 4 AND SAFE_CAST(numerical_val_1 AS numeric) <= 60) OR -- Respiratory rate
   (upper(trim(row_disp_name)) IN ('TEMP') AND SAFE_CAST(numerical_val_1 AS numeric) >= 90) OR -- Temperature in F
   (upper(trim(row_disp_name)) IN ('WEIGHT') AND SAFE_CAST(numerical_val_1 AS numeric) >= 480 AND SAFE_CAST(numerical_val_1 AS numeric) <= 8000)  OR -- Weight 
  (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_1 AS numeric) >= 40) OR -- Systolic BP
   (upper(trim(row_disp_name)) IN ('BP', 'NIBP') AND SAFE_CAST(numerical_val_2 AS numeric) >= 30) -- diastolic BP
  )
  AND
  (TIMESTAMP_DIFF(vitals.recorded_time_jittered_utc, c.blood_culture_order_datetime, hour) between -24 and 0 )
)
select anon_id,
       pat_enc_csn_id_coded,
       order_proc_id_coded,
       blood_culture_order_datetime,
       order_year,
       ed_arrival_datetime,
       positive_blood_culture,
       positive_blood_culture_in_week,
       earliest_iv_antibiotic_datetime,
       #earliest_iv_antibiotic,
       ROUND(min(heartrate),2) as min_heartrate,
       ROUND(max(heartrate),2) as max_heartrate,
       ROUND(avg(heartrate),2) as avg_heartrate,
       ROUND(APPROX_QUANTILES(heartrate, 100)[OFFSET(50)],2) AS median_heartrate,
       ROUND(min(resprate),2) as min_resprate,
       ROUND(max(resprate),2) as max_resprate,
       ROUND(avg(resprate),2) as avg_resprate,
       ROUND(APPROX_QUANTILES(resprate, 100)[OFFSET(50)],2) AS median_resprate,
       ROUND(min(temp),2) as min_temp,
       ROUND(max(temp),2) as max_temp,
       ROUND(avg(temp),2) as avg_temp,
       ROUND(APPROX_QUANTILES(temp, 100)[OFFSET(50)],2) AS median_temp,
       ROUND(min(sysbp),2) as min_sysbp,
       ROUND(max(sysbp),2) as max_sysbp,
       ROUND(avg(sysbp),2) as avg_sysbp,
       ROUND(APPROX_QUANTILES(sysbp, 100)[OFFSET(50)],2) AS median_sysbp,
       ROUND(min(diasbp),2) as min_diasbp,
       ROUND(max(diasbp),2) as max_diasbp,
       ROUND(avg(diasbp)) as avg_diasbp,
       ROUND(APPROX_QUANTILES(diasbp, 100)[OFFSET(50)]) AS median_diasbp,
from vitals
group by anon_id,pat_enc_csn_id_coded,order_proc_id_coded,blood_culture_order_datetime,
order_year,ed_arrival_datetime,positive_blood_culture,positive_blood_culture_in_week,earliest_iv_antibiotic_datetime#,earliest_iv_antibiotic
"""
refactor_vital = client.query(refactor_query).to_dataframe()


In [169]:
# Upload refactor_base_cohort to BigQuery as a temporary table
table_id_refactor_vital_temp = f"{project_id}.blood_culture_stewardship.refactor_vital_temp"

# Upload the DataFrame to BigQuery
refactor_vital.to_gbq(
    destination_table=table_id_refactor_vital_temp,
    project_id=project_id,
    if_exists='replace'  # This will replace the table if it exists
)

print(f"Uploaded refactor_vital to {table_id_refactor_vital_temp}")

  refactor_vital.to_gbq(


Uploaded refactor_vital to som-nero-phi-jonc101.blood_culture_stewardship.refactor_vital_temp


# Labs

In [None]:
# %%bigquery df_ed_features
# CREATE OR REPLACE TABLE som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
WITH labs AS (
    SELECT 
        c.*,
        CASE 
            WHEN (LOWER(lr.base_name) = 'wbc' AND LOWER(lr.reference_unit) IN ('thousand/ul','k/ul','10x3/ul','10*3/ul','x10e3/ul')) THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'wbc' AND lr.reference_unit = '/uL' THEN SAFE_CAST(lr.ord_value AS FLOAT64)/1000
        END AS wbc,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE '%neutrophils%' AND lr.reference_unit = '%' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS neutrophils,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE '%lymphocytes%' AND lr.reference_unit = '%' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS lymphocytes,
        -- CASE WHEN LOWER(lr.base_name) LIKE '%bands%' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS bands,
        CASE 
            WHEN LOWER(lr.base_name) = 'hgb' AND lr.reference_unit = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'hgb' AND LOWER(lr.reference_unit) = 'g/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64) * 1000
        END AS hgb,
        CASE 
            WHEN LOWER(lr.base_name) = 'plt' AND LOWER(lr.reference_unit) IN ('x10e3/ul','10x3/ul','k/ul','10*3/ul','thousand/ul') THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'plt' AND LOWER(lr.reference_unit) = 'ul' THEN SAFE_CAST(lr.ord_value AS FLOAT64) / 1000
        END AS plt,
        CASE 
            WHEN LOWER(lr.base_name) = 'na' AND LOWER(lr.reference_unit) = 'mmol/l' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS na,
        CASE 
            WHEN (LOWER(lr.base_name) = 'hco3' AND LOWER(lr.reference_unit) like any ('meq/l','mmol/l')) THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN (LOWER(lr.base_name) = 'co2' AND LOWER(lr.reference_unit) like any ('meq/l','mmol/l')) THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS hco3,      
        CASE 
            WHEN LOWER(lr.base_name) = 'bun' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS bun,
        CASE 
            WHEN LOWER(lr.base_name) = 'cr' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS cr,
        -- CASE WHEN LOWER(lr.base_name) LIKE 'glucose' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS glucose,
        CASE 
            WHEN LOWER(lr.base_name) = 'lac' AND LOWER(lr.reference_unit) IN ('mmol/l', 'mmole/l') THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS lactate,               
        CASE 
            WHEN LOWER(lr.base_name) = 'crp' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'crp' AND LOWER(lr.reference_unit) = 'mg/l' THEN SAFE_CAST(lr.ord_value AS FLOAT64) / 10
        END AS crp,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE 'procalcitonin' AND LOWER(lr.reference_unit) = 'ng/ml' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS procalcitonin
    FROM `som-nero-phi-jonc101.blood_culture_stewardship.cohort_test` c
    LEFT JOIN `som-nero-phi-jonc101.shc_core_2023.lab_result` lr 
    USING (anon_id, pat_enc_csn_id_coded)
    WHERE TIMESTAMP_DIFF(lr.order_time_jittered_utc, c.blood_culture_order_datetime, HOUR) BETWEEN -24 AND 0
)
SELECT 
    anon_id,
    pat_enc_csn_id_coded,
    order_proc_id_coded,
    blood_culture_order_datetime,
    order_year,
    ed_arrival_datetime,
    positive_blood_culture,
    positive_blood_culture_in_week,
    earliest_iv_antibiotic_datetime,
    #earliest_iv_antibiotic,
    min_heartrate,
    max_heartrate,
    avg_heartrate,
    median_heartrate,
    min_resprate, 
    max_resprate, 
    avg_resprate,
    median_resprate,
    min_temp, 
    max_temp, 
    avg_temp,
    median_temp,
    min_sysbp,
    max_sysbp, 
    avg_sysbp,
    median_sysbp,
    min_diasbp,
    max_diasbp, 
    avg_diasbp,
    median_diasbp,

    ROUND(MIN(wbc), 2) AS min_wbc,
    ROUND(MAX(wbc), 2) AS max_wbc,
    ROUND(AVG(wbc), 2) AS avg_wbc,
    ROUND(APPROX_QUANTILES(wbc, 100)[OFFSET(50)], 2) AS median_wbc,
    
    ROUND(MIN(neutrophils), 2) AS min_neutrophils,
    ROUND(MAX(neutrophils), 2) AS max_neutrophils,
    ROUND(AVG(neutrophils), 2) AS avg_neutrophils,
    ROUND(APPROX_QUANTILES(neutrophils, 100)[OFFSET(50)], 2) AS median_neutrophils,
    
    ROUND(MIN(lymphocytes), 2) AS min_lymphocytes,
    ROUND(MAX(lymphocytes), 2) AS max_lymphocytes,
    ROUND(AVG(lymphocytes), 2) AS avg_lymphocytes,
    ROUND(APPROX_QUANTILES(lymphocytes, 100)[OFFSET(50)], 2) AS median_lymphocytes,
    
    #ROUND(MIN(bands), 2) AS min_bands,
    #ROUND(MAX(bands), 2) AS max_bands,
    #ROUND(AVG(bands), 2) AS avg_bands,
    #ROUND(APPROX_QUANTILES(bands, 100)[OFFSET(50)], 2) AS median_bands,
    
    ROUND(MIN(hgb), 2) AS min_hgb,
    ROUND(MAX(hgb), 2) AS max_hgb,
    ROUND(AVG(hgb), 2) AS avg_hgb,
    ROUND(APPROX_QUANTILES(hgb, 100)[OFFSET(50)], 2) AS median_hgb,

    ROUND(MIN(plt), 2) AS min_plt,
    ROUND(MAX(plt), 2) AS max_plt,
    ROUND(AVG(plt), 2) AS avg_plt,
    ROUND(APPROX_QUANTILES(plt, 100)[OFFSET(50)], 2) AS median_plt,

    ROUND(MIN(na), 2) AS min_na,
    ROUND(MAX(na), 2) AS max_na,
    ROUND(AVG(na), 2) AS avg_na,
    ROUND(APPROX_QUANTILES(na, 100)[OFFSET(50)], 2) AS median_na,

    ROUND(MIN(hco3), 2) AS min_hco3,
    ROUND(MAX(hco3), 2) AS max_hco3,
    ROUND(AVG(hco3), 2) AS avg_hco3,
    ROUND(APPROX_QUANTILES(hco3, 100)[OFFSET(50)], 2) AS median_hco3,

    ROUND(MIN(bun), 2) AS min_bun,
    ROUND(MAX(bun), 2) AS max_bun,
    ROUND(AVG(bun), 2) AS avg_bun,
    ROUND(APPROX_QUANTILES(bun, 100)[OFFSET(50)], 2) AS median_bun,

    ROUND(MIN(cr), 2) AS min_cr,
    ROUND(MAX(cr), 2) AS max_cr,
    ROUND(AVG(cr), 2) AS avg_cr,
    ROUND(APPROX_QUANTILES(cr, 100)[OFFSET(50)], 2) AS median_cr,

    #ROUND(MIN(glucose), 2) AS min_glucose,
    #ROUND(MAX(glucose), 2) AS max_glucose,
    #ROUND(AVG(glucose), 2) AS avg_glucose,
    #ROUND(APPROX_QUANTILES(glucose, 100)[OFFSET(50)], 2) AS median_glucose,

    ROUND(MIN(lactate), 2) AS min_lactate,
    ROUND(MAX(lactate), 2) AS max_lactate,
    ROUND(AVG(lactate), 2) AS avg_lactate,
    ROUND(APPROX_QUANTILES(lactate, 100)[OFFSET(50)], 2) AS median_lactate,

    ROUND(MIN(procalcitonin), 2) AS min_procalcitonin,
    ROUND(MAX(procalcitonin), 2) AS max_procalcitonin,
    ROUND(AVG(procalcitonin), 2) AS avg_procalcitonin,
    ROUND(APPROX_QUANTILES(procalcitonin, 100)[OFFSET(50)], 2) AS median_procalcitonin

FROM labs
GROUP BY 
    anon_id,
    pat_enc_csn_id_coded,
    order_proc_id_coded,
    blood_culture_order_datetime,
    order_year,
    ed_arrival_datetime,
    positive_blood_culture,
    positive_blood_culture_in_week,
    earliest_iv_antibiotic_datetime,
    #earliest_iv_antibiotic,
    min_heartrate,
    max_heartrate,
    avg_heartrate,
    median_heartrate,
    min_resprate, 
    max_resprate, 
    avg_resprate,
    median_resprate,
    min_temp, 
    max_temp, 
    avg_temp,
    median_temp,
    min_sysbp,
    max_sysbp, 
    avg_sysbp,
    median_sysbp,
    min_diasbp,
    max_diasbp, 
    avg_diasbp,
    median_diasbp

In [171]:
refactor_query = f"""
WITH labs AS (
    SELECT 
        c.*,
        CASE 
            WHEN (LOWER(lr.base_name) = 'wbc' AND LOWER(lr.reference_unit) IN ('thousand/ul','k/ul','10x3/ul','10*3/ul','x10e3/ul')) THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'wbc' AND lr.reference_unit = '/uL' THEN SAFE_CAST(lr.ord_value AS FLOAT64)/1000
        END AS wbc,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE '%neutrophils%' AND lr.reference_unit = '%' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS neutrophils,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE '%lymphocytes%' AND lr.reference_unit = '%' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS lymphocytes,
        -- CASE WHEN LOWER(lr.base_name) LIKE '%bands%' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS bands,
        CASE 
            WHEN LOWER(lr.base_name) = 'hgb' AND lr.reference_unit = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'hgb' AND LOWER(lr.reference_unit) = 'g/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64) * 1000
        END AS hgb,
        CASE 
            WHEN LOWER(lr.base_name) = 'plt' AND LOWER(lr.reference_unit) IN ('x10e3/ul','10x3/ul','k/ul','10*3/ul','thousand/ul') THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'plt' AND LOWER(lr.reference_unit) = 'ul' THEN SAFE_CAST(lr.ord_value AS FLOAT64) / 1000
        END AS plt,
        CASE 
            WHEN LOWER(lr.base_name) = 'na' AND LOWER(lr.reference_unit) = 'mmol/l' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS na,
        CASE 
            WHEN (LOWER(lr.base_name) = 'hco3' AND LOWER(lr.reference_unit) like any ('meq/l','mmol/l')) THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN (LOWER(lr.base_name) = 'co2' AND LOWER(lr.reference_unit) like any ('meq/l','mmol/l')) THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS hco3,      
        CASE 
            WHEN LOWER(lr.base_name) = 'bun' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS bun,
        CASE 
            WHEN LOWER(lr.base_name) = 'cr' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS cr,
        -- CASE WHEN LOWER(lr.base_name) LIKE 'glucose' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS glucose,
        CASE 
            WHEN LOWER(lr.base_name) = 'lac' AND LOWER(lr.reference_unit) IN ('mmol/l', 'mmole/l') THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS lactate,               
        CASE 
            WHEN LOWER(lr.base_name) = 'crp' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'crp' AND LOWER(lr.reference_unit) = 'mg/l' THEN SAFE_CAST(lr.ord_value AS FLOAT64) / 10
        END AS crp,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE 'procalcitonin' AND LOWER(lr.reference_unit) = 'ng/ml' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS procalcitonin
    FROM `{table_id_refactor_vital_temp}` c
    LEFT JOIN `som-nero-phi-jonc101.shc_core_2023.lab_result` lr 
    USING (anon_id, pat_enc_csn_id_coded)
    WHERE TIMESTAMP_DIFF(lr.order_time_jittered_utc, c.blood_culture_order_datetime, HOUR) BETWEEN -24 AND 0
)
SELECT 
    anon_id,
    pat_enc_csn_id_coded,
    order_proc_id_coded,
    blood_culture_order_datetime,
    order_year,
    ed_arrival_datetime,
    positive_blood_culture,
    positive_blood_culture_in_week,
    earliest_iv_antibiotic_datetime,
    #earliest_iv_antibiotic,
    min_heartrate,
    max_heartrate,
    avg_heartrate,
    median_heartrate,
    min_resprate, 
    max_resprate, 
    avg_resprate,
    median_resprate,
    min_temp, 
    max_temp, 
    avg_temp,
    median_temp,
    min_sysbp,
    max_sysbp, 
    avg_sysbp,
    median_sysbp,
    min_diasbp,
    max_diasbp, 
    avg_diasbp,
    median_diasbp,

    ROUND(MIN(wbc), 2) AS min_wbc,
    ROUND(MAX(wbc), 2) AS max_wbc,
    ROUND(AVG(wbc), 2) AS avg_wbc,
    ROUND(APPROX_QUANTILES(wbc, 100)[OFFSET(50)], 2) AS median_wbc,
    
    ROUND(MIN(neutrophils), 2) AS min_neutrophils,
    ROUND(MAX(neutrophils), 2) AS max_neutrophils,
    ROUND(AVG(neutrophils), 2) AS avg_neutrophils,
    ROUND(APPROX_QUANTILES(neutrophils, 100)[OFFSET(50)], 2) AS median_neutrophils,
    
    ROUND(MIN(lymphocytes), 2) AS min_lymphocytes,
    ROUND(MAX(lymphocytes), 2) AS max_lymphocytes,
    ROUND(AVG(lymphocytes), 2) AS avg_lymphocytes,
    ROUND(APPROX_QUANTILES(lymphocytes, 100)[OFFSET(50)], 2) AS median_lymphocytes,
    
    #ROUND(MIN(bands), 2) AS min_bands,
    #ROUND(MAX(bands), 2) AS max_bands,
    #ROUND(AVG(bands), 2) AS avg_bands,
    #ROUND(APPROX_QUANTILES(bands, 100)[OFFSET(50)], 2) AS median_bands,
    
    ROUND(MIN(hgb), 2) AS min_hgb,
    ROUND(MAX(hgb), 2) AS max_hgb,
    ROUND(AVG(hgb), 2) AS avg_hgb,
    ROUND(APPROX_QUANTILES(hgb, 100)[OFFSET(50)], 2) AS median_hgb,

    ROUND(MIN(plt), 2) AS min_plt,
    ROUND(MAX(plt), 2) AS max_plt,
    ROUND(AVG(plt), 2) AS avg_plt,
    ROUND(APPROX_QUANTILES(plt, 100)[OFFSET(50)], 2) AS median_plt,

    ROUND(MIN(na), 2) AS min_na,
    ROUND(MAX(na), 2) AS max_na,
    ROUND(AVG(na), 2) AS avg_na,
    ROUND(APPROX_QUANTILES(na, 100)[OFFSET(50)], 2) AS median_na,

    ROUND(MIN(hco3), 2) AS min_hco3,
    ROUND(MAX(hco3), 2) AS max_hco3,
    ROUND(AVG(hco3), 2) AS avg_hco3,
    ROUND(APPROX_QUANTILES(hco3, 100)[OFFSET(50)], 2) AS median_hco3,

    ROUND(MIN(bun), 2) AS min_bun,
    ROUND(MAX(bun), 2) AS max_bun,
    ROUND(AVG(bun), 2) AS avg_bun,
    ROUND(APPROX_QUANTILES(bun, 100)[OFFSET(50)], 2) AS median_bun,

    ROUND(MIN(cr), 2) AS min_cr,
    ROUND(MAX(cr), 2) AS max_cr,
    ROUND(AVG(cr), 2) AS avg_cr,
    ROUND(APPROX_QUANTILES(cr, 100)[OFFSET(50)], 2) AS median_cr,

    #ROUND(MIN(glucose), 2) AS min_glucose,
    #ROUND(MAX(glucose), 2) AS max_glucose,
    #ROUND(AVG(glucose), 2) AS avg_glucose,
    #ROUND(APPROX_QUANTILES(glucose, 100)[OFFSET(50)], 2) AS median_glucose,

    ROUND(MIN(lactate), 2) AS min_lactate,
    ROUND(MAX(lactate), 2) AS max_lactate,
    ROUND(AVG(lactate), 2) AS avg_lactate,
    ROUND(APPROX_QUANTILES(lactate, 100)[OFFSET(50)], 2) AS median_lactate,

    ROUND(MIN(procalcitonin), 2) AS min_procalcitonin,
    ROUND(MAX(procalcitonin), 2) AS max_procalcitonin,
    ROUND(AVG(procalcitonin), 2) AS avg_procalcitonin,
    ROUND(APPROX_QUANTILES(procalcitonin, 100)[OFFSET(50)], 2) AS median_procalcitonin

FROM labs
GROUP BY 
    anon_id,
    pat_enc_csn_id_coded,
    order_proc_id_coded,
    blood_culture_order_datetime,
    order_year,
    ed_arrival_datetime,
    positive_blood_culture,
    positive_blood_culture_in_week,
    earliest_iv_antibiotic_datetime,
    #earliest_iv_antibiotic,
    min_heartrate,
    max_heartrate,
    avg_heartrate,
    median_heartrate,
    min_resprate, 
    max_resprate, 
    avg_resprate,
    median_resprate,
    min_temp, 
    max_temp, 
    avg_temp,
    median_temp,
    min_sysbp,
    max_sysbp, 
    avg_sysbp,
    median_sysbp,
    min_diasbp,
    max_diasbp, 
    avg_diasbp,
    median_diasbp
    """
refactor_lab = client.query(refactor_query).to_dataframe()

In [172]:
# Upload refactor_base_cohort to BigQuery as a temporary table
table_id_refactor_lab_temp = f"{project_id}.blood_culture_stewardship.refactor_lab_temp"

# Upload the DataFrame to BigQuery
refactor_lab.to_gbq(
    destination_table=table_id_refactor_lab_temp,
    project_id=project_id,
    if_exists='replace'  # This will replace the table if it exists
)

print(f"Uploaded refactor_lab to {table_id_refactor_lab_temp}")

  refactor_lab.to_gbq(


Uploaded refactor_lab to som-nero-phi-jonc101.blood_culture_stewardship.refactor_lab_temp


# demographics

In [None]:
%%bigquery demo_df
Create or replace table som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
with demos as (
    SELECT c.*,
           demo.gender,
           demo.canonical_race as race,
          case when demo.birth_date_jittered is not null then DATE_DIFF(CAST(c.blood_culture_order_datetime AS DATE), CAST(demo.birth_date_jittered AS DATE), YEAR)
                else null end AS age,
    FROM 
        `som-nero-phi-jonc101.blood_culture_stewardship.cohort_test` c
    LEFT JOIN 
        `som-nero-phi-jonc101.shc_core_2023.demographic` demo
    ON 
        c.anon_id = demo.anon_id
)
select * from demos where (age is null or age>=18)

In [174]:
refactor_query = f"""
with demos as (
    SELECT c.*,
           demo.gender,
           demo.canonical_race as race,
          case when demo.birth_date_jittered is not null then DATE_DIFF(CAST(c.blood_culture_order_datetime AS DATE), CAST(demo.birth_date_jittered AS DATE), YEAR)
                else null end AS age,
    FROM 
        `{table_id_refactor_lab_temp}` c
    LEFT JOIN 
        `som-nero-phi-jonc101.shc_core_2023.demographic` demo
    ON 
        c.anon_id = demo.anon_id
)
select * from demos where (age is null or age>=18)
"""
refactor_adult = client.query(refactor_query).to_dataframe()

In [178]:
# Upload refactor_base_cohort to BigQuery as a temporary table
table_id_refactor_adult_temp = f"{project_id}.blood_culture_stewardship.refactor_adult_temp"

# Upload the DataFrame to BigQuery
refactor_adult.to_gbq(
    destination_table=table_id_refactor_adult_temp,
    project_id=project_id,
    if_exists='replace'  # This will replace the table if it exists
)

print(f"Uploaded refactor_adult to {table_id_refactor_adult_temp}")

  refactor_adult.to_gbq(


Uploaded refactor_adult to som-nero-phi-jonc101.blood_culture_stewardship.refactor_adult_temp


# ICD codes

In [None]:
%%bigquery icds 
Create or replace table som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
with icds as (
select *,
case when icd10 like any ('R78.81') or icd9 like any('790.7') then 1 end as bacteremia,
case when icd10 like any ('A41.9','R65.21','Z86.19') or icd9 like any('038.9','995.91','785.52','995.2') then 1 end as septic_shock,
case when icd10 like any ('I33.0') or icd9 like any('421.0') then 1 end as infective_endocarditis,
case when icd10 like any ('I80.9') or icd9 like any('451.9') then 1 end as septic_thrombophlebitis,
case when icd10 like any ('T82.7%A') or icd9 like any('996.61','996.62') then 1 end as vascular_graft_infection,
case when icd10 like any ('T80.211A') or icd9 like any('999.32') then 1 end as CRBSI,
case when icd10 like any ('M46.40') or icd9 like any('722.9') then 1 end as infectious_discitis,
case when icd10 like any ('G06.2','B96.89') or icd9 like any('041.89') then 1 end as epidural_abscess,
case when icd10 like any ('M00.9') or icd9 like any('711.00') then 1 end as septic_arthritis,
case when icd10 like any ('G03.9') or icd9 like any('322.9') then 1 end as meningitis,
case when icd10 like any ('G00.9') or icd9 like any('320.82','320.9') then 1 end as meningitis_bacteria,
case when icd10 like any ('K83.0') or icd9 like any('576.1') then 1 end as cholangitis,
case when icd10 like any ('K83.09','B96.89') or icd9 like any('576.1') then 1 else 0 end as bacterial_cholangitis,
case when icd10 like any ('N12') or icd9 like any('590.80') then 1 else 0 end as pyelonephritis,
case when icd10 like any ('N10','B96.89') or icd9 like any('590.10','590.11') then 1 else 0 end as acute_bacterial_pyelonephritis,
case when icd10 like any ('L03.90') or icd9 like any('486.0') then 1 else 0 end as severe_pneumonia,
case when icd10 like any ('M86.9') or icd9 like any('730.20','730.30','730.90','730.98') then 1 else 0 end as acute_hematogenous_osteomyelitis,
case when icd10 like any ('Q89.01') or icd9 like any('759.0') then 1 else 0 end as asplenia,
case when icd10 like any ('D84.9') or icd9 like any('279.3') then 1 else 0 end as immunocompromised_state,
case when icd10 like any ('L03.90') then 1 else 0 end as severe_cellulitis,
case when icd10 like any ('N30.90') or icd9 like any('595.9') then 1 else 0 end as cystitis,
case when icd10 like any ('N41.9') or icd9 like any('601.9') then 1 else 0 end as prostatitis,
case when icd10 like any ('J18.9') or icd9 like any('486') then 1 else 0 end as CAP,
case when icd10 like any ('E11.69','L08.9') or icd9 like any('250.80','686.9') then 1 else 0 end as diabetic_foot_infection,
case when icd10 like any ('A09','A04.72') or icd9 like any('686.9') then 1 else 0 end as colitis,
case when icd10 like any ('J69.0') or icd9 like any('507.0') then 1 else 0 end as aspiration_pneumonia,
case when icd10 like any ('K81.9') or icd9 like any('575.10') then 1 else 0 end as uncomplicated_cholecystitis,
case when icd10 like any ('K57.92') or icd9 like any('562.11') then 1 else 0 end as uncomplicated_diverticulitis,
case when icd10 like any ('K85.9') or icd9 like any('577.0') then 1 else 0 end as Uncomplicated_pancreatitis,
from som-nero-phi-jonc101.blood_culture_stewardship.cohort_test c
left join 
som-nero-phi-jonc101.shc_core_2023.diagnosis diag
using (anon_id)
#WHERE  DATE_DIFF(CAST(c.blood_culture_order_datetime AS DATE), CAST(diag.start_date_jittered_utc AS DATE), Day) between -6 and 15
WHERE  DATE_DIFF(CAST(c.ed_arrival_datetime AS DATE), CAST(diag.start_date_jittered_utc AS DATE), Day)>0
AND (diag.end_date_jittered_utc is null or DATE_DIFF(CAST(c.ed_arrival_datetime AS DATE), CAST(diag.end_date_jittered_utc AS DATE), Day)<=0)
)
select anon_id,
pat_enc_csn_id_coded,
order_proc_id_coded,
blood_culture_order_datetime,
order_year,
ed_arrival_datetime,
positive_blood_culture,
positive_blood_culture_in_week,
earliest_iv_antibiotic_datetime,
min_heartrate,
max_heartrate,
avg_heartrate,
median_heartrate,
min_resprate,
max_resprate,
avg_resprate,
median_resprate,
min_temp,
max_temp,
avg_temp,
median_temp,
min_sysbp,
max_sysbp,
avg_sysbp,
median_sysbp,
min_diasbp,
max_diasbp,
avg_diasbp,
median_diasbp,
min_wbc,
max_wbc,
avg_wbc,
median_wbc,
min_neutrophils,
max_neutrophils,
avg_neutrophils,
median_neutrophils,
min_lymphocytes,
max_lymphocytes,
avg_lymphocytes,
median_lymphocytes,
min_hgb,
max_hgb,
avg_hgb,
median_hgb,
min_plt,
max_plt,
avg_plt,
median_plt,
min_na,
max_na,
avg_na,
median_na,
min_hco3,
max_hco3,
avg_hco3,
median_hco3,
min_bun,
max_bun,
avg_bun,
median_bun,
min_cr,
max_cr,
avg_cr,
median_cr,
min_lactate,
max_lactate,
avg_lactate,
median_lactate,
min_procalcitonin,
max_procalcitonin,
avg_procalcitonin,
median_procalcitonin,
gender,
race,
age,
max(bacteremia) as bacteremia,
max(septic_shock) as septic_shock,
max(infective_endocarditis) as infective_endocarditis,
max(septic_thrombophlebitis) as septic_thrombophlebitis,
max(vascular_graft_infection) as vascular_graft_infection,
max(CRBSI) as CRBSI,
max(infectious_discitis) as infectious_discitis,
max(epidural_abscess) as epidural_abscess,
max(septic_arthritis) as septic_arthritis,
max(meningitis) as meningitis,
max(meningitis_bacteria) as meningitis_bacteria,
max(cholangitis) as cholangitis,
max(bacterial_cholangitis) as bacterial_cholangitis,
max(pyelonephritis) as pyelonephritis,
max(acute_bacterial_pyelonephritis) as acute_bacterial_pyelonephritis,
max(severe_pneumonia) as severe_pneumonia,
max(acute_hematogenous_osteomyelitis) as acute_hematogenous_osteomyelitis,
max(asplenia) as asplenia,
max(immunocompromised_state) as immunocompromised_state,
max(severe_cellulitis) as severe_cellulitis,
max(cystitis) as cystitis,
max(prostatitis) as prostatitis,
max(CAP) as CAP,
max(diabetic_foot_infection) as diabetic_foot_infection,
max(colitis) as colitis,
max(aspiration_pneumonia) as aspiration_pneumonia,
max(uncomplicated_cholecystitis) as uncomplicated_cholecystitis,
max(uncomplicated_diverticulitis) as uncomplicated_diverticulitis,
max(Uncomplicated_pancreatitis) as Uncomplicated_pancreatitis
from icds
group by anon_id,
pat_enc_csn_id_coded,
order_proc_id_coded,
blood_culture_order_datetime,
order_year,
ed_arrival_datetime,
positive_blood_culture,
positive_blood_culture_in_week,
earliest_iv_antibiotic_datetime,
min_heartrate,
max_heartrate,
avg_heartrate,
median_heartrate,
min_resprate,
max_resprate,
avg_resprate,
median_resprate,
min_temp,
max_temp,
avg_temp,
median_temp,
min_sysbp,
max_sysbp,
avg_sysbp,
median_sysbp,
min_diasbp,
max_diasbp,
avg_diasbp,
median_diasbp,
min_wbc,
max_wbc,
avg_wbc,
median_wbc,
min_neutrophils,
max_neutrophils,
avg_neutrophils,
median_neutrophils,
min_lymphocytes,
max_lymphocytes,
avg_lymphocytes,
median_lymphocytes,
min_hgb,
max_hgb,
avg_hgb,
median_hgb,
min_plt,
max_plt,
avg_plt,
median_plt,
min_na,
max_na,
avg_na,
median_na,
min_hco3,
max_hco3,
avg_hco3,
median_hco3,
min_bun,
max_bun,
avg_bun,
median_bun,
min_cr,
max_cr,
avg_cr,
median_cr,
min_lactate,
max_lactate,
avg_lactate,
median_lactate,
min_procalcitonin,
max_procalcitonin,
avg_procalcitonin,
median_procalcitonin,
gender,
race,
age;

In [189]:
refactor_query = f"""
with icds as (
select *,
case when icd10 like any ('R78.81') or icd9 like any('790.7') then 1 end as bacteremia,
case when icd10 like any ('A41.9','R65.21','Z86.19') or icd9 like any('038.9','995.91','785.52','995.2') then 1 end as septic_shock,
case when icd10 like any ('I33.0') or icd9 like any('421.0') then 1 end as infective_endocarditis,
case when icd10 like any ('I80.9') or icd9 like any('451.9') then 1 end as septic_thrombophlebitis,
case when icd10 like any ('T82.7%A') or icd9 like any('996.61','996.62') then 1 end as vascular_graft_infection,
case when icd10 like any ('T80.211A') or icd9 like any('999.32') then 1 end as CRBSI,
case when icd10 like any ('M46.40') or icd9 like any('722.9') then 1 end as infectious_discitis,
case when icd10 like any ('G06.2','B96.89') or icd9 like any('041.89') then 1 end as epidural_abscess,
case when icd10 like any ('M00.9') or icd9 like any('711.00') then 1 end as septic_arthritis,
case when icd10 like any ('G03.9') or icd9 like any('322.9') then 1 end as meningitis,
case when icd10 like any ('G00.9') or icd9 like any('320.82','320.9') then 1 end as meningitis_bacteria,
case when icd10 like any ('K83.0') or icd9 like any('576.1') then 1 end as cholangitis,
case when icd10 like any ('K83.09','B96.89') or icd9 like any('576.1') then 1 else 0 end as bacterial_cholangitis,
case when icd10 like any ('N12') or icd9 like any('590.80') then 1 else 0 end as pyelonephritis,
case when icd10 like any ('N10','B96.89') or icd9 like any('590.10','590.11') then 1 else 0 end as acute_bacterial_pyelonephritis,
case when icd10 like any ('L03.90') or icd9 like any('486.0') then 1 else 0 end as severe_pneumonia,
case when icd10 like any ('M86.9') or icd9 like any('730.20','730.30','730.90','730.98') then 1 else 0 end as acute_hematogenous_osteomyelitis,
case when icd10 like any ('Q89.01') or icd9 like any('759.0') then 1 else 0 end as asplenia,
case when icd10 like any ('D84.9') or icd9 like any('279.3') then 1 else 0 end as immunocompromised_state,
case when icd10 like any ('L03.90') then 1 else 0 end as severe_cellulitis,
case when icd10 like any ('N30.90') or icd9 like any('595.9') then 1 else 0 end as cystitis,
case when icd10 like any ('N41.9') or icd9 like any('601.9') then 1 else 0 end as prostatitis,
case when icd10 like any ('J18.9') or icd9 like any('486') then 1 else 0 end as CAP,
case when icd10 like any ('E11.69','L08.9') or icd9 like any('250.80','686.9') then 1 else 0 end as diabetic_foot_infection,
case when icd10 like any ('A09','A04.72') or icd9 like any('686.9') then 1 else 0 end as colitis,
case when icd10 like any ('J69.0') or icd9 like any('507.0') then 1 else 0 end as aspiration_pneumonia,
case when icd10 like any ('K81.9') or icd9 like any('575.10') then 1 else 0 end as uncomplicated_cholecystitis,
case when icd10 like any ('K57.92') or icd9 like any('562.11') then 1 else 0 end as uncomplicated_diverticulitis,
case when icd10 like any ('K85.9') or icd9 like any('577.0') then 1 else 0 end as Uncomplicated_pancreatitis,
from `{table_id_refactor_adult_temp}` c #holy hardcode 
left join 
som-nero-phi-jonc101.shc_core_2023.diagnosis diag
using (anon_id)
# WHERE  DATE_DIFF(CAST(c.blood_culture_order_datetime AS DATE), CAST(diag.start_date_jittered_utc AS DATE), Day) between -6 and 15
WHERE  DATE_DIFF(CAST(c.ed_arrival_datetime AS DATE), CAST(diag.start_date_jittered_utc AS DATE), Day)>0
AND (diag.end_date_jittered_utc is null or DATE_DIFF(CAST(c.ed_arrival_datetime AS DATE), CAST(diag.end_date_jittered_utc AS DATE), Day)<=0)
)

select anon_id,
pat_enc_csn_id_coded,
order_proc_id_coded,
blood_culture_order_datetime,
order_year,
ed_arrival_datetime,
positive_blood_culture,
positive_blood_culture_in_week,
earliest_iv_antibiotic_datetime,
min_heartrate,
max_heartrate,
avg_heartrate,
median_heartrate,
min_resprate,
max_resprate,
avg_resprate,
median_resprate,
min_temp,
max_temp,
avg_temp,
median_temp,
min_sysbp,
max_sysbp,
avg_sysbp,
median_sysbp,
min_diasbp,
max_diasbp,
avg_diasbp,
median_diasbp,
min_wbc,
max_wbc,
avg_wbc,
median_wbc,
min_neutrophils,
max_neutrophils,
avg_neutrophils,
median_neutrophils,
min_lymphocytes,
max_lymphocytes,
avg_lymphocytes,
median_lymphocytes,
min_hgb,
max_hgb,
avg_hgb,
median_hgb,
min_plt,
max_plt,
avg_plt,
median_plt,
min_na,
max_na,
avg_na,
median_na,
min_hco3,
max_hco3,
avg_hco3,
median_hco3,
min_bun,
max_bun,
avg_bun,
median_bun,
min_cr,
max_cr,
avg_cr,
median_cr,
min_lactate,
max_lactate,
avg_lactate,
median_lactate,
min_procalcitonin,
max_procalcitonin,
avg_procalcitonin,
median_procalcitonin,
gender,
race,
age,
max(bacteremia) as bacteremia,
max(septic_shock) as septic_shock,
max(infective_endocarditis) as infective_endocarditis,
max(septic_thrombophlebitis) as septic_thrombophlebitis,
max(vascular_graft_infection) as vascular_graft_infection,
max(CRBSI) as CRBSI,
max(infectious_discitis) as infectious_discitis,
max(epidural_abscess) as epidural_abscess,
max(septic_arthritis) as septic_arthritis,
max(meningitis) as meningitis,
max(meningitis_bacteria) as meningitis_bacteria,
max(cholangitis) as cholangitis,
max(bacterial_cholangitis) as bacterial_cholangitis,
max(pyelonephritis) as pyelonephritis,
max(acute_bacterial_pyelonephritis) as acute_bacterial_pyelonephritis,
max(severe_pneumonia) as severe_pneumonia,
max(acute_hematogenous_osteomyelitis) as acute_hematogenous_osteomyelitis,
max(asplenia) as asplenia,
max(immunocompromised_state) as immunocompromised_state,
max(severe_cellulitis) as severe_cellulitis,
max(cystitis) as cystitis,
max(prostatitis) as prostatitis,
max(CAP) as CAP,
max(diabetic_foot_infection) as diabetic_foot_infection,
max(colitis) as colitis,
max(aspiration_pneumonia) as aspiration_pneumonia,
max(uncomplicated_cholecystitis) as uncomplicated_cholecystitis,
max(uncomplicated_diverticulitis) as uncomplicated_diverticulitis,
max(Uncomplicated_pancreatitis) as Uncomplicated_pancreatitis
from icds
group by anon_id,
pat_enc_csn_id_coded,
order_proc_id_coded,
blood_culture_order_datetime,
order_year,
ed_arrival_datetime,
positive_blood_culture,
positive_blood_culture_in_week,
earliest_iv_antibiotic_datetime,
min_heartrate,
max_heartrate,
avg_heartrate,
median_heartrate,
min_resprate,
max_resprate,
avg_resprate,
median_resprate,
min_temp,
max_temp,
avg_temp,
median_temp,
min_sysbp,
max_sysbp,
avg_sysbp,
median_sysbp,
min_diasbp,
max_diasbp,
avg_diasbp,
median_diasbp,
min_wbc,
max_wbc,
avg_wbc,
median_wbc,
min_neutrophils,
max_neutrophils,
avg_neutrophils,
median_neutrophils,
min_lymphocytes,
max_lymphocytes,
avg_lymphocytes,
median_lymphocytes,
min_hgb,
max_hgb,
avg_hgb,
median_hgb,
min_plt,
max_plt,
avg_plt,
median_plt,
min_na,
max_na,
avg_na,
median_na,
min_hco3,
max_hco3,
avg_hco3,
median_hco3,
min_bun,
max_bun,
avg_bun,
median_bun,
min_cr,
max_cr,
avg_cr,
median_cr,
min_lactate,
max_lactate,
avg_lactate,
median_lactate,
min_procalcitonin,
max_procalcitonin,
avg_procalcitonin,
median_procalcitonin,
gender,
race,
age
"""
refactor_icd = client.query(refactor_query).to_dataframe()
# test_refactor_icd = client.query(refactor_query).to_dataframe()

In [190]:
refactor_icd

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_proc_id_coded,blood_culture_order_datetime,order_year,ed_arrival_datetime,positive_blood_culture,positive_blood_culture_in_week,earliest_iv_antibiotic_datetime,min_heartrate,...,severe_cellulitis,cystitis,prostatitis,CAP,diabetic_foot_infection,colitis,aspiration_pneumonia,uncomplicated_cholecystitis,uncomplicated_diverticulitis,Uncomplicated_pancreatitis
0,JC2454055,131212270757,513864430,2017-01-26 20:57:00+00:00,2017,2017-01-26 20:36:00+00:00,0,0,NaT,101.0,...,0,0,0,0,0,0,0,0,0,0
1,JC1934604,131354709624,877324602,2023-04-15 07:55:00+00:00,2023,2023-04-15 06:01:00+00:00,0,0,2023-04-15 08:19:00+00:00,95.0,...,0,0,0,0,1,0,0,0,0,0
2,JC6282517,131334663826,800539843,2022-07-16 07:03:00+00:00,2022,2022-07-16 06:49:00+00:00,0,0,NaT,129.0,...,0,0,0,0,0,0,0,0,0,0
3,JC547494,131261815870,588786333,2018-12-14 22:01:00+00:00,2018,2018-12-14 19:49:00+00:00,0,0,NaT,51.0,...,0,0,0,0,0,0,0,0,0,0
4,JC691472,131239743022,539150780,2017-10-20 00:37:00+00:00,2017,2017-10-20 00:25:00+00:00,0,1,NaT,88.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159118,JC829698,131274659768,626160320,2019-08-11 05:39:00+00:00,2019,2019-08-11 03:08:00+00:00,0,0,2019-08-11 06:19:00+00:00,100.0,...,0,0,0,1,0,1,1,0,0,0
159119,JC1217926,131203564439,508107835,2016-10-19 08:51:00+00:00,2016,2016-10-18 23:39:00+00:00,0,0,2016-10-19 09:00:00+00:00,94.0,...,1,0,0,1,1,1,0,0,0,0
159120,JC2464239,131226361077,523690237,2017-04-30 03:34:00+00:00,2017,2017-04-30 01:41:00+00:00,0,0,NaT,102.0,...,0,0,0,0,0,0,0,0,0,0
159121,JC6539475,131364496370,912857426,2023-10-31 02:31:00+00:00,2023,2023-10-30 17:11:00+00:00,0,0,NaT,69.0,...,0,0,0,0,0,0,0,0,0,0


In [184]:
# Upload refactor_base_cohort to BigQuery as a temporary table
table_id_refactor_icd_temp = f"{project_id}.blood_culture_stewardship.refactor_icd_temp"

# Upload the DataFrame to BigQuery
refactor_icd.to_gbq(
    destination_table=table_id_refactor_icd_temp,
    project_id=project_id,
    if_exists='replace'  # This will replace the table if it exists
)

print(f"Uploaded refactor_icd to {table_id_refactor_icd_temp}")

  refactor_icd.to_gbq(


Uploaded refactor_icd to som-nero-phi-jonc101.blood_culture_stewardship.refactor_icd_temp


# VANC + OZYSN 

In [None]:
%%bigquery df_ed_labels
# CREATE OR REPLACE TABLE som-nero-phi-jonc101.blood_culture_stewardship.cohort AS
With ED_ABX as(
SELECT
    c.anon_id,
    c.pat_enc_csn_id_coded,
    c.order_proc_id_coded,
    o.med_description
  FROM
    `som-nero-phi-jonc101.blood_culture_stewardship.cohort_test` c
  INNER JOIN (
      SELECT
        anon_id,
        pat_enc_csn_id_coded,
        med_description,
        order_start_time_jittered_utc
      FROM
        `som-nero-phi-jonc101.shc_core_2023.order_med`
      WHERE
        thera_class_name IN ('ANTIBIOTICS')
        AND LOWER(med_route) = 'intravenous'
  ) o ON c.anon_id = o.anon_id AND c.pat_enc_csn_id_coded = o.pat_enc_csn_id_coded
  WHERE
    TIMESTAMP_DIFF(o.order_start_time_jittered_utc, c.blood_culture_order_datetime, HOUR) BETWEEN -4 AND 24
),
ED_ABX_rxcui_str AS (
 SELECT 
    m.anon_id,
    m.pat_enc_csn_id_coded,
    m.order_proc_id_coded,
    MAX(CASE WHEN LOWER(rxcui_str) LIKE 'vancomycin' THEN 1 ELSE 0 END) AS vanc,
    MAX(CASE WHEN LOWER(rxcui_str) LIKE ANY ('tazobactam', 'piperacillin', 'glucose') THEN 1 ELSE 0 END) AS zosyn
  FROM 
    ED_ABX m 
  INNER JOIN 
    `som-nero-phi-jonc101.shc_core_2023.mapped_meds` mm 
  ON 
    m.med_description = mm.name
  WHERE 
    rxcui != '0'
    AND (LOWER(rxcui_str) LIKE 'vancomycin' OR 
         UPPER(name) LIKE '%ZOSYN%' OR 
         UPPER(name) LIKE '%PIPERACILLIN-TAZOBACTAM%')
  GROUP BY 
    m.anon_id, m.pat_enc_csn_id_coded,m.order_proc_id_coded
)
SELECT 
  c.*,
  CASE WHEN ea.vanc = 1 THEN 1 ELSE 0 END AS vanc,
  CASE WHEN ea.zosyn = 1 THEN 1 ELSE 0 END AS zosyn,
  CASE WHEN ea.vanc = 1 AND ea.zosyn = 1 THEN 1 ELSE 0 END AS vanc_zosyn,
  CASE WHEN (ea.vanc = 0  or ea.vanc is null or ea.zosyn = 0 or  ea.zosyn is null) AND c.earliest_iv_antibiotic_datetime IS NOT NULL THEN 1 ELSE 0 END AS other_ABX
FROM 
  `som-nero-phi-jonc101.blood_culture_stewardship.cohort_test` c 
LEFT JOIN 
  ED_ABX_rxcui_str  ea
USING (anon_id, pat_enc_csn_id_coded,order_proc_id_coded)

In [185]:
refactor_query = f"""
With ED_ABX as(
SELECT
    c.anon_id,
    c.pat_enc_csn_id_coded,
    c.order_proc_id_coded,
    o.med_description
  FROM
    `{table_id_refactor_icd_temp}` c
  INNER JOIN (
      SELECT
        anon_id,
        pat_enc_csn_id_coded,
        med_description,
        order_start_time_jittered_utc
      FROM
        `som-nero-phi-jonc101.shc_core_2023.order_med`
      WHERE
        thera_class_name IN ('ANTIBIOTICS')
        AND LOWER(med_route) = 'intravenous'
  ) o ON c.anon_id = o.anon_id AND c.pat_enc_csn_id_coded = o.pat_enc_csn_id_coded
  WHERE
    TIMESTAMP_DIFF(o.order_start_time_jittered_utc, c.blood_culture_order_datetime, HOUR) BETWEEN -4 AND 24
),
ED_ABX_rxcui_str AS (
 SELECT 
    m.anon_id,
    m.pat_enc_csn_id_coded,
    m.order_proc_id_coded,
    MAX(CASE WHEN LOWER(rxcui_str) LIKE 'vancomycin' THEN 1 ELSE 0 END) AS vanc,
    MAX(CASE WHEN LOWER(rxcui_str) LIKE ANY ('tazobactam', 'piperacillin', 'glucose') THEN 1 ELSE 0 END) AS zosyn
  FROM 
    ED_ABX m 
  INNER JOIN 
    `som-nero-phi-jonc101.shc_core_2023.mapped_meds` mm 
  ON 
    m.med_description = mm.name
  WHERE 
    rxcui != '0'
    AND (LOWER(rxcui_str) LIKE 'vancomycin' OR 
         UPPER(name) LIKE '%ZOSYN%' OR 
         UPPER(name) LIKE '%PIPERACILLIN-TAZOBACTAM%')
  GROUP BY 
    m.anon_id, m.pat_enc_csn_id_coded,m.order_proc_id_coded
)
SELECT 
  c.*,
  CASE WHEN ea.vanc = 1 THEN 1 ELSE 0 END AS vanc,
  CASE WHEN ea.zosyn = 1 THEN 1 ELSE 0 END AS zosyn,
  CASE WHEN ea.vanc = 1 AND ea.zosyn = 1 THEN 1 ELSE 0 END AS vanc_zosyn,
  CASE WHEN (ea.vanc = 0  or ea.vanc is null or ea.zosyn = 0 or  ea.zosyn is null) AND c.earliest_iv_antibiotic_datetime IS NOT NULL THEN 1 ELSE 0 END AS other_ABX
FROM 
  `{table_id_refactor_icd_temp}` c 
LEFT JOIN 
  ED_ABX_rxcui_str  ea
USING (anon_id, pat_enc_csn_id_coded,order_proc_id_coded)"""

refactor_vanc_ozysn = client.query(refactor_query).to_dataframe()


In [205]:
# Upload refactor_base_cohort to BigQuery as a temporary table
table_id_refactor_vanc_ozysn_temp = f"{project_id}.blood_culture_stewardship.refactor_final_cohort_temp"

# Upload the DataFrame to BigQuery
refactor_vanc_ozysn.to_gbq(
    destination_table=table_id_refactor_vanc_ozysn_temp,
    project_id=project_id,
    if_exists='replace'  # This will replace the table if it exists
)

print(f"Uploaded refactor_vanc_ozysn to {table_id_refactor_vanc_ozysn_temp}")

  refactor_vanc_ozysn.to_gbq(


Uploaded refactor_vanc_ozysn to som-nero-phi-jonc101.blood_culture_stewardship.refactor_final_cohort_temp


## LAB Reports

Im not sure what this section is doing yet

In [None]:
%%bigquery df_ed_labs
SELECT 
       c.anon_id,
    c.pat_enc_csn_id_coded,
    c.order_proc_id_coded,
    c.blood_culture_order_datetime,
    c.order_year,
    c.ed_arrival_datetime,
    c.positive_blood_culture,
    c.positive_blood_culture_in_week,
    c.earliest_iv_antibiotic_datetime,
    c.earliest_iv_antibiotic,
        CASE 
            WHEN (LOWER(lr.base_name) = 'wbc' AND LOWER(lr.reference_unit) IN ('thousand/ul','k/ul','10x3/ul','10*3/ul','x10e3/ul')) THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'wbc' AND lr.reference_unit = '/uL' THEN SAFE_CAST(lr.ord_value AS FLOAT64)/1000
        END AS wbc,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE '%neutrophils%' AND lr.reference_unit = '%' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS neutrophils,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE '%lymphocytes%' AND lr.reference_unit = '%' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS lymphocytes,
        -- CASE WHEN LOWER(lr.base_name) LIKE '%bands%' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS bands,
        CASE 
            WHEN LOWER(lr.base_name) = 'hgb' AND lr.reference_unit = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'hgb' AND LOWER(lr.reference_unit) = 'g/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64) * 1000
        END AS hgb,
        CASE 
            WHEN LOWER(lr.base_name) = 'plt' AND LOWER(lr.reference_unit) IN ('x10e3/ul','10x3/ul','k/ul','10*3/ul','thousand/ul') THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'plt' AND LOWER(lr.reference_unit) = 'ul' THEN SAFE_CAST(lr.ord_value AS FLOAT64) / 1000
        END AS plt,
        CASE 
            WHEN LOWER(lr.base_name) = 'na' AND LOWER(lr.reference_unit) = 'mmol/l' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS na,
        CASE 
            WHEN LOWER(lr.base_name) = 'hco3' AND LOWER(lr.reference_unit) = 'meq/l' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS hco3,      
        CASE 
            WHEN LOWER(lr.base_name) = 'bun' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS bun,
        CASE 
            WHEN LOWER(lr.base_name) = 'cr' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS cr,
        -- CASE WHEN LOWER(lr.base_name) LIKE 'glucose' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS glucose,
        CASE 
            WHEN LOWER(lr.base_name) = 'lac' AND LOWER(lr.reference_unit) IN ('mmol/l', 'mmole/l') THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS lactate,               
        CASE 
            WHEN LOWER(lr.base_name) = 'crp' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'crp' AND LOWER(lr.reference_unit) = 'mg/l' THEN SAFE_CAST(lr.ord_value AS FLOAT64) / 10
        END AS crp,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE 'procalcitonin' AND LOWER(lr.reference_unit) = 'ng/ml' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS procalcitonin
    FROM `som-nero-phi-jonc101.blood_culture_stewardship.cohort` c
    LEFT JOIN `som-nero-phi-jonc101.shc_core_2023.lab_result` lr 
    USING (anon_id, pat_enc_csn_id_coded)
    WHERE TIMESTAMP_DIFF(lr.order_time_jittered_utc, c.blood_culture_order_datetime, HOUR) BETWEEN -24 AND 2
    and c.order_year>=2022

group by anon_id,
    pat_enc_csn_id_coded,
    order_proc_id_coded,
    blood_culture_order_datetime,
    order_year,
    ed_arrival_datetime,
    positive_blood_culture,
    positive_blood_culture_in_week,
    earliest_iv_antibiotic_datetime,
    earliest_iv_antibiotic,
    lab_name,
    base_name,
    ord_value,
    reference_unit


In [193]:
refactor_query = f"""
SELECT 
       c.anon_id,
    c.pat_enc_csn_id_coded,
    c.order_proc_id_coded,
    c.blood_culture_order_datetime,
    c.order_year,
    c.ed_arrival_datetime,
    c.positive_blood_culture,
    c.positive_blood_culture_in_week,
    c.earliest_iv_antibiotic_datetime,
    # c.earliest_iv_antibiotic,
        CASE 
            WHEN (LOWER(lr.base_name) = 'wbc' AND LOWER(lr.reference_unit) IN ('thousand/ul','k/ul','10x3/ul','10*3/ul','x10e3/ul')) THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'wbc' AND lr.reference_unit = '/uL' THEN SAFE_CAST(lr.ord_value AS FLOAT64)/1000
        END AS wbc,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE '%neutrophils%' AND lr.reference_unit = '%' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS neutrophils,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE '%lymphocytes%' AND lr.reference_unit = '%' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS lymphocytes,
        -- CASE WHEN LOWER(lr.base_name) LIKE '%bands%' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS bands,
        CASE 
            WHEN LOWER(lr.base_name) = 'hgb' AND lr.reference_unit = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'hgb' AND LOWER(lr.reference_unit) = 'g/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64) * 1000
        END AS hgb,
        CASE 
            WHEN LOWER(lr.base_name) = 'plt' AND LOWER(lr.reference_unit) IN ('x10e3/ul','10x3/ul','k/ul','10*3/ul','thousand/ul') THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'plt' AND LOWER(lr.reference_unit) = 'ul' THEN SAFE_CAST(lr.ord_value AS FLOAT64) / 1000
        END AS plt,
        CASE 
            WHEN LOWER(lr.base_name) = 'na' AND LOWER(lr.reference_unit) = 'mmol/l' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS na,
        CASE 
            WHEN LOWER(lr.base_name) = 'hco3' AND LOWER(lr.reference_unit) = 'meq/l' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS hco3,      
        CASE 
            WHEN LOWER(lr.base_name) = 'bun' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS bun,
        CASE 
            WHEN LOWER(lr.base_name) = 'cr' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS cr,
        -- CASE WHEN LOWER(lr.base_name) LIKE 'glucose' THEN SAFE_CAST(lr.ord_value AS FLOAT64) END AS glucose,
        CASE 
            WHEN LOWER(lr.base_name) = 'lac' AND LOWER(lr.reference_unit) IN ('mmol/l', 'mmole/l') THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS lactate,               
        CASE 
            WHEN LOWER(lr.base_name) = 'crp' AND LOWER(lr.reference_unit) = 'mg/dl' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
            WHEN LOWER(lr.base_name) = 'crp' AND LOWER(lr.reference_unit) = 'mg/l' THEN SAFE_CAST(lr.ord_value AS FLOAT64) / 10
        END AS crp,
        CASE 
            WHEN LOWER(lr.lab_name) LIKE 'procalcitonin' AND LOWER(lr.reference_unit) = 'ng/ml' THEN SAFE_CAST(lr.ord_value AS FLOAT64)
        END AS procalcitonin
    # FROM `som-nero-phi-jonc101.blood_culture_stewardship.cohort` c
    FROM `{table_id_refactor_vanc_ozysn_temp}` c
    LEFT JOIN `som-nero-phi-jonc101.shc_core_2023.lab_result` lr 
    USING (anon_id, pat_enc_csn_id_coded)
    WHERE TIMESTAMP_DIFF(lr.order_time_jittered_utc, c.blood_culture_order_datetime, HOUR) BETWEEN -24 AND 2
    and c.order_year>=2022

group by anon_id,
    pat_enc_csn_id_coded,
    order_proc_id_coded,
    blood_culture_order_datetime,
    order_year,
    ed_arrival_datetime,
    positive_blood_culture,
    positive_blood_culture_in_week,
    earliest_iv_antibiotic_datetime,
    # earliest_iv_antibiotic,
    lab_name,
    base_name,
    ord_value,
    reference_unit
"""
refactor_final = client.query(refactor_query).to_dataframe()

In [195]:
df_ed_labs = refactor_final.copy()

In [196]:
pos=df_ed_labs[(df_ed_labs.positive_blood_culture==1)|(df_ed_labs.positive_blood_culture_in_week==1)]
neg=df_ed_labs[(df_ed_labs.positive_blood_culture==0)&(df_ed_labs.positive_blood_culture_in_week==0)]


In [197]:
for lab in [ 'wbc',
       'neutrophils', 'lymphocytes', 'hgb', 'plt', 'na', 'hco3', 'bun', 'cr',
       'lactate', 'crp', 'procalcitonin']:
    print('LAB:',lab,pos[pos[lab].notna()][lab].mean(),pos[pos[lab].notna()][lab].median(),pos[pos[lab].notna()][lab].min(),pos[pos[lab].notna()][lab].max())
    print('*******')

LAB: wbc 13.036633663366336 11.3 0.1 333.2
*******
LAB: neutrophils 40.98939688715953 37.2 0.0 97.0
*******
LAB: lymphocytes 9.490713101160864 4.3 0.0 93.0
*******
LAB: hgb 11076.38173704085 11100.0 3800.0 19000.0
*******
LAB: plt 227.71981776765375 207.5 2.0 1429.0
*******
LAB: na 134.37278688524592 135.0 107.0 168.0
*******
LAB: hco3 30.8 30.8 30.8 30.8
*******
LAB: bun 29.799493304379297 23.0 4.0 222.0
*******
LAB: cr 1.6931485971596816 1.1 0.07 18.6
*******
LAB: lactate 2.505842620736238 1.95 0.31 11.32
*******
LAB: crp 14.373015873015873 12.1 0.4 46.9
*******
LAB: procalcitonin 6.844814814814816 0.945 0.06 100.0
*******


In [198]:
for lab in [ 'wbc',
       'neutrophils', 'lymphocytes', 'hgb', 'plt', 'na', 'hco3', 'bun', 'cr',
       'lactate', 'crp', 'procalcitonin']:
    print('LAB:',lab,neg[neg[lab].notna()][lab].mean(),neg[neg[lab].notna()][lab].median(),neg[neg[lab].notna()][lab].min(),neg[neg[lab].notna()][lab].max())
    print('*******')

LAB: wbc 11.345682184383572 9.6 0.1 848.8
*******
LAB: neutrophils 34.67838203212231 25.2 0.0 100.0
*******
LAB: lymphocytes 15.638361408882083 8.7 0.0 100.0
*******
LAB: hgb 11732.911237785016 11800.0 2300.0 25000.0
*******
LAB: plt 261.23118573019235 243.0 1.0 1775.0
*******
LAB: na 135.83593447447913 136.0 100.0 177.0
*******
LAB: hco3 24.936363636363634 24.5 17.5 31.9
*******
LAB: bun 24.009995418766398 18.0 2.0 250.0
*******
LAB: cr 1.3762549156602453 0.91 0.06 24.16
*******
LAB: lactate 2.084963507131613 1.56 0.3 20.7
*******
LAB: crp 7.457696151924038 4.6 0.1 56.4
*******
LAB: procalcitonin 1.3352757221293865 0.14 0.02 124.0
*******


# Refactor note by Sandy:
- I've fully reran this notebook and be able to run it all the way through.
- I've made one major change to the cohort creation. in **Earliest Iv Antibiotic** section, I found the window first and then ranked
- ***The final refactored cohort is here: som-nero-phi-jonc101.blood_culture_stewardship.refactor_final_cohort_temp***

## Todo: make this notebook an executable script so the same code can work for test cohort