In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pulp import *
import pandas as pd
import os, glob
import seaborn as sns
from scipy.stats import kruskal
import scikit_posthocs as sp
from scipy.stats import mannwhitneyu
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] ='/Users/fa/.config/gcloud/application_default_credentials.json' #(path to you keep in step 5 )
os.environ['GCLOUD_PROJECT'] = 'som-nero-phi-jonc101' 
%load_ext google.cloud.bigquery

from google.cloud import bigquery
client=bigquery.Client()

## Number of ICU Transfer Events within 52 hrs of alert

In [2]:
%%bigquery df
WITH random_flag AS (
  SELECT 
    anon_id, 
    pat_enc_csn_id_coded, 
    smrtdta_elem_value AS random_flag 
  FROM `som-nero-phi-jonc101-secure.shc_core_updates.shc_smrtdta` 
  WHERE concept_id = 'SHC#6051'
),

discharges_selected AS (
  SELECT 
    anon_id, 
    pat_enc_csn_id_coded, 
    effective_time_jittered, 
    department_name 
  FROM `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt`
  INNER JOIN `som-nero-phi-jonc101-secure.shc_core_updates.shc_dep_map` USING(department_id)
  INNER JOIN `som-nero-phi-jonc101-secure.starr_map.shc_map_2025-04-15` USING (anon_id)
  WHERE 
    event_type = 'Discharge' 
    AND (
      UPPER(department_name) LIKE 'B3' OR
      UPPER(department_name) LIKE 'C3' OR
      UPPER(department_name) LIKE 'M7' OR
      UPPER(department_name) LIKE 'L7' OR
      UPPER(department_name) LIKE '1%WEST%' OR
      UPPER(department_name) LIKE '2%NORTH%' OR
      UPPER(department_name) LIKE '2%WEST%' OR
      UPPER(department_name) LIKE '3%WEST%'
    )
    AND effective_time_jittered - INTERVAL jitter DAY BETWEEN '2024-08-15' AND '2025-03-15'
),

admissions AS (
  SELECT 
    anon_id, 
    pat_enc_csn_id_coded, 
    effective_time_jittered 
  FROM `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt` 
  WHERE event_type = 'Admission'
),

hospital_stays_selected AS (
  SELECT 
    adm.anon_id, 
    adm.pat_enc_csn_id_coded, 
    adm.effective_time_jittered AS adm_time, 
    dis.effective_time_jittered AS dis_time, 
    dis.department_name,
    DATETIME_DIFF(dis.effective_time_jittered, adm.effective_time_jittered, DAY) + 1 AS duration,
    DATETIME_DIFF(dis.effective_time_jittered, adm.effective_time_jittered, MINUTE) / 60 AS los_hours
  FROM admissions adm 
  INNER JOIN discharges_selected dis USING (anon_id, pat_enc_csn_id_coded)
),

alerts_cbc AS (
  SELECT DISTINCT 
    alt.anon_id, 
    alt.pat_enc_csn_id_coded,
    alt.alt_id_coded, 
    alt.alert_desc, 
    his.update_date_jittered AS alt_time
  FROM `som-nero-phi-jonc101-secure.shc_core_updates.shc_alert` alt 
  INNER JOIN `som-nero-phi-jonc101-secure.shc_core_updates.shc_alert_history` his 
    USING (anon_id, alt_id_coded)
  INNER JOIN hospital_stays_selected USING (anon_id, pat_enc_csn_id_coded)
  WHERE alert_desc IN (
    'SHC AIML LAB CBC STABILITY BASE - LOUD PILOT', 
    'SHC AIML LAB CBC STABILITY BASE - SILENT'
  )
),

cohort AS (
  SELECT 
    alt.*, 
    rf.random_flag  
  FROM alerts_cbc alt 
  INNER JOIN random_flag rf USING (anon_id, pat_enc_csn_id_coded)
  WHERE NOT (
    alt.alert_desc = 'SHC AIML LAB CBC STABILITY BASE - SILENT' 
    AND rf.random_flag = '1'
  )
),

transfer_icu AS (
  SELECT 
    anon_id, 
    pat_enc_csn_id_coded, 
    effective_time_jittered AS icutime
  FROM `som-nero-phi-jonc101.shc_core_2024.adt`
  WHERE 
    pat_lv_of_care = 'Critical Care'
    AND event_type = 'Transfer In'
),
randomized_transfer_icu AS (
  SELECT 
    c.*, 
    t.icutime
  FROM cohort c 
  LEFT JOIN transfer_icu t USING (anon_id, pat_enc_csn_id_coded)
  WHERE t.icutime BETWEEN c.alt_time  and  c.alt_time  + INTERVAL 52 HOUR 
  
),
num_icu_trans AS (
  SELECT 
    anon_id,
    pat_enc_csn_id_coded,
    random_flag,
    COUNT(icutime) AS icu_num 
  FROM randomized_transfer_icu 
  GROUP BY anon_id, pat_enc_csn_id_coded,random_flag
)
SELECT *
FROM num_icu_trans

Query is running:   0%|          |

Downloading:   0%|          |

In [3]:
print(f"Treatment:{df[df.random_flag=='1'].icu_num.mean()}")
print(f"Control:{df[df.random_flag=='2'].icu_num.mean()}")


Treatment:1.6666666666666667
Control:1.7142857142857142


### Pvalue

In [4]:
from scipy import stats
treatment = df[df['random_flag'] == '1']
control = df[df['random_flag'] == '2']
statistic, p_value = stats.ttest_ind(list(treatment['icu_num']), list(control['icu_num']))
print(f't-statistic = {statistic:.2f}, p_value = {p_value:.3f}')

t-statistic = -0.08, p_value = 0.939


## Randomized patient who have been admitted to ICU (Encounter Level)

In [5]:
%%bigquery df
WITH random_flag AS (
  SELECT anon_id,
   pat_enc_csn_id_coded, 
   smrtdta_elem_value AS random_flag 
   FROM `som-nero-phi-jonc101-secure.shc_core_updates.shc_smrtdta` 
   WHERE concept_id = 'SHC#6051'
),
 
discharges_selected AS (
  SELECT anon_id, pat_enc_csn_id_coded, effective_time_jittered, department_name FROM `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt`
    INNER JOIN `som-nero-phi-jonc101-secure.shc_core_updates.shc_dep_map` USING(department_id)
    INNER JOIN `som-nero-phi-jonc101-secure.starr_map.shc_map_2025-04-15` USING (anon_id)
    WHERE event_type = 'Discharge' AND UPPER(department_name) LIKE ANY ('B3', 'C3', 'M7', 'L7', '1%WEST%', '2%NORTH%', '2%WEST%', '3%WEST%')
    AND effective_time_jittered - INTERVAL jitter DAY BETWEEN '2024-08-15' AND '2025-03-15'
),
 
admissions AS (
  SELECT anon_id, pat_enc_csn_id_coded, effective_time_jittered FROM `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt` WHERE event_type = 'Admission'
),
 
hospital_stays_selected AS (
  SELECT anon_id, pat_enc_csn_id_coded, adm.effective_time_jittered AS adm_time, dis.effective_time_jittered AS dis_time, department_name,
    DATETIME_DIFF(dis.effective_time_jittered, adm.effective_time_jittered, DAY)+1 AS duration, --for normalization
    DATETIME_DIFF(dis.effective_time_jittered, adm.effective_time_jittered, MINUTE)/60 AS los_hours --for secondary outcome
  FROM admissions adm INNER JOIN discharges_selected dis USING (anon_id, pat_enc_csn_id_coded)
),
study as (
select random_flag.*,
    hospital_stays_selected.adm_time,
    hospital_stays_selected.dis_time,
    hospital_stays_selected.duration,
    hospital_stays_selected.los_hours 
from random_flag 
inner join hospital_stays_selected 
using(anon_id, pat_enc_csn_id_coded)
),
admission_icu as (
SELECT *,
1 as ICUvisit
FROM `som-nero-phi-jonc101.shc_core_2024.adt`
WHERE event_type = 'Admission' 
     AND pat_lv_of_care = 'Critical Care'
)
select c.anon_id,
c.pat_enc_csn_id_coded,
a.effective_time_jittered,
a.ICUvisit,  
c.random_flag,
from study c 
left join admission_icu a using (anon_id,pat_enc_csn_id_coded)

Query is running:   0%|          |

Downloading:   0%|          |

In [6]:
print(f"Treatment:{df[(df.ICUvisit==1)&(df.random_flag=='1')].pat_enc_csn_id_coded.nunique()/df[(df.random_flag=='1')].pat_enc_csn_id_coded.nunique()*100}")
print(f"Control:{df[(df.ICUvisit==1)&(df.random_flag=='2')].pat_enc_csn_id_coded.nunique()/df[(df.random_flag=='2')].pat_enc_csn_id_coded.nunique()*100}")

Treatment:0.9364111498257839
Control:1.0047028644719966


In [7]:
import pandas as pd
from scipy.stats import chi2_contingency

# Create a 2x2 contingency table
contingency_table = pd.crosstab(df['random_flag'], df['ICUvisit'].fillna(0))

# Run Chi-Square test
chi2, p, dof, expected = chi2_contingency(contingency_table)

print("Chi-Square Test")
print(f"Chi2 statistic: {chi2:.3f}")
print(f"Degrees of freedom: {dof}")
print(f"P-value: {p:.4f}")
print("Expected frequencies:")
print(expected)

Chi-Square Test
Chi2 statistic: 0.053
Degrees of freedom: 1
P-value: 0.8186
Expected frequencies:
[[4547.41747573   44.58252427]
 [4632.58252427   45.41747573]]


## Randomized patient who have been transfered to ICU (Encounter Level)

In [8]:
%%bigquery df
WITH random_flag AS (
  SELECT anon_id,
   pat_enc_csn_id_coded, 
   smrtdta_elem_value AS random_flag 
   FROM `som-nero-phi-jonc101-secure.shc_core_updates.shc_smrtdta` 
   WHERE concept_id = 'SHC#6051'
),
 
discharges_selected AS (
  SELECT anon_id, pat_enc_csn_id_coded, effective_time_jittered, department_name FROM `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt`
    INNER JOIN `som-nero-phi-jonc101-secure.shc_core_updates.shc_dep_map` USING(department_id)
    INNER JOIN `som-nero-phi-jonc101-secure.starr_map.shc_map_2025-04-15` USING (anon_id)
    WHERE event_type = 'Discharge' AND UPPER(department_name) LIKE ANY ('B3', 'C3', 'M7', 'L7', '1%WEST%', '2%NORTH%', '2%WEST%', '3%WEST%')
    AND effective_time_jittered - INTERVAL jitter DAY BETWEEN '2024-08-15' AND '2025-03-15'
),
 
admissions AS (
  SELECT anon_id, pat_enc_csn_id_coded, effective_time_jittered FROM `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt` WHERE event_type = 'Admission'
),
 
hospital_stays_selected AS (
  SELECT anon_id, pat_enc_csn_id_coded, adm.effective_time_jittered AS adm_time, dis.effective_time_jittered AS dis_time, department_name,
    DATETIME_DIFF(dis.effective_time_jittered, adm.effective_time_jittered, DAY)+1 AS duration, --for normalization
    DATETIME_DIFF(dis.effective_time_jittered, adm.effective_time_jittered, MINUTE)/60 AS los_hours --for secondary outcome
  FROM admissions adm INNER JOIN discharges_selected dis USING (anon_id, pat_enc_csn_id_coded)
),
study as (
select random_flag.*,
    hospital_stays_selected.adm_time,
    hospital_stays_selected.dis_time,
    hospital_stays_selected.duration,
    hospital_stays_selected.los_hours 
from random_flag 
inner join hospital_stays_selected 
using(anon_id, pat_enc_csn_id_coded)
),
transfer_icu as (
SELECT *,
1 as ICUvisit
FROM `som-nero-phi-jonc101.shc_core_2024.adt`
WHERE event_type = 'Transfer In' 
     AND pat_lv_of_care = 'Critical Care'
)
select c.anon_id,
c.pat_enc_csn_id_coded,
t.effective_time_jittered,
t.ICUvisit,  
c.random_flag,
from study c 
left join transfer_icu t using (anon_id,pat_enc_csn_id_coded)

Query is running:   0%|          |

Downloading:   0%|          |

In [9]:
print(f"Treatment:{df[(df.ICUvisit==1)&(df.random_flag=='1')].pat_enc_csn_id_coded.nunique()/df[(df.random_flag=='1')].pat_enc_csn_id_coded.nunique()*100}")
print(f"Control:{df[(df.ICUvisit==1)&(df.random_flag=='2')].pat_enc_csn_id_coded.nunique()/df[(df.random_flag=='2')].pat_enc_csn_id_coded.nunique()*100}")

Treatment:7.556620209059234
Control:8.123129542539546


###  Chi-Square test

In [10]:
import pandas as pd
from scipy.stats import chi2_contingency

# Create a 2x2 contingency table
contingency_table = pd.crosstab(df['random_flag'], df['ICUvisit'].fillna(0))

# Run Chi-Square test
chi2, p, dof, expected = chi2_contingency(contingency_table)

print("Chi-Square Test")
print(f"Chi2 statistic: {chi2:.3f}")
print(f"Degrees of freedom: {dof}")
print(f"P-value: {p:.4f}")
print("Expected frequencies:")
print(expected)

Chi-Square Test
Chi2 statistic: 3.722
Degrees of freedom: 1
P-value: 0.0537
Expected frequencies:
[[4208.88775209  799.11224791]
 [4334.11224791  822.88775209]]


## Demographic of randomized patient

In [11]:
%%bigquery df
WITH random_flag AS (
  SELECT anon_id,
   pat_enc_csn_id_coded, 
   smrtdta_elem_value AS random_flag 
   FROM `som-nero-phi-jonc101-secure.shc_core_updates.shc_smrtdta` 
   WHERE concept_id = 'SHC#6051'
),
 
discharges_selected AS (
  SELECT anon_id, pat_enc_csn_id_coded, effective_time_jittered, department_name FROM `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt`
    INNER JOIN `som-nero-phi-jonc101-secure.shc_core_updates.shc_dep_map` USING(department_id)
    INNER JOIN `som-nero-phi-jonc101-secure.starr_map.shc_map_2025-04-15` USING (anon_id)
    WHERE event_type = 'Discharge' AND UPPER(department_name) LIKE ANY ('B3', 'C3', 'M7', 'L7', '1%WEST%', '2%NORTH%', '2%WEST%', '3%WEST%')
    AND effective_time_jittered - INTERVAL jitter DAY BETWEEN '2024-08-15' AND '2025-03-15'
),
 
admissions AS (
  SELECT anon_id, pat_enc_csn_id_coded, effective_time_jittered FROM `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt` WHERE event_type = 'Admission'
),
 
hospital_stays_selected AS (
  SELECT anon_id, pat_enc_csn_id_coded, adm.effective_time_jittered AS adm_time, dis.effective_time_jittered AS dis_time, department_name,
    DATETIME_DIFF(dis.effective_time_jittered, adm.effective_time_jittered, DAY)+1 AS duration, --for normalization
    DATETIME_DIFF(dis.effective_time_jittered, adm.effective_time_jittered, MINUTE)/60 AS los_hours --for secondary outcome
  FROM admissions adm INNER JOIN discharges_selected dis USING (anon_id, pat_enc_csn_id_coded)
),
study as (
select random_flag.*,hospital_stays_selected.adm_time,hospital_stays_selected.duration,hospital_stays_selected.los_hours 
from random_flag 
inner join hospital_stays_selected 
using(anon_id, pat_enc_csn_id_coded)
),
demo as (
  select c.* ,
  DATETIME_DIFF(c.adm_time,demo.birth_date_jittered,YEAR) as Age,
  demo.gender,
  demo.canonical_race
  FROM study  c inner join `som-nero-phi-jonc101-secure.shc_core_updates.shc_demographic` demo
  using(anon_id)
) 
    select * from demo

Query is running:   0%|          |

Downloading:   0%|          |

In [12]:
df

Unnamed: 0,anon_id,pat_enc_csn_id_coded,random_flag,adm_time,duration,los_hours,Age,gender,canonical_race
0,JC3560080,131385906082.000000000,2,2024-08-10 05:34:00,2,35.383333,27,Male,Other
1,JC1802035,131397392048.000000000,1,2025-01-24 11:28:00,3,48.866667,101,Female,Asian
2,JC700993,131392217186.000000000,1,2024-11-04 06:15:00,6,124.533333,98,Female,Asian
3,JC2410777,131394602663.000000000,1,2024-12-30 15:41:00,2,30.066667,19,Male,Asian
4,JC6309220,131394961303.000000000,1,2024-12-29 11:07:00,3,52.883333,98,Female,Unknown
...,...,...,...,...,...,...,...,...,...
9790,JC620913,131395741901.000000000,2,2025-02-06 15:59:00,6,117.533333,95,Female,White
9791,JC864044,131397764649.000000000,2,2025-02-10 23:15:00,8,158.266667,95,Female,White
9792,JC969912,131397230180.000000000,1,2025-02-01 16:15:00,2,7.833333,95,Male,White
9793,JC969912,131395038445.000000000,1,2025-01-05 11:15:00,4,77.666667,95,Male,White


In [13]:
df['Age']=df['Age'].astype(int)
df[df.random_flag=='2'].Age.quantile(0.5)
df[(df.canonical_race=='Unknown')&(df.random_flag=='2')][['anon_id','pat_enc_csn_id_coded']].drop_duplicates().shape[0]/df[df.random_flag=='2'][['anon_id','pat_enc_csn_id_coded']].drop_duplicates().shape[0]*100

2.3728088926891835

In [3]:
%%bigquery df
with base as (
select anon_id,
pat_enc_csn_id_coded,
smrtdta_elem_value,
from `som-nero-phi-jonc101-secure.shc_core_updates.shc_smrtdta`
where concept_id like 'SHC#6051'
and smrtdta_elem_value in ('1','2')
),
department as (
  select anon_id,
  pat_enc_csn_id_coded,
  department_name,
  effective_time_jittered,
  from `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt`
  inner join `som-nero-phi-jonc101-secure.shc_core_updates.shc_dep_map`
  using(department_id)
  where event_type='Discharge'
and department_name like any ('B3','C3','M7','L7','1%West',
'2%North', '2%West', '3%West')
and effective_time_jittered>='2024-08-15'
and effective_time_jittered<='2025-03-15'
)
select c.*,
from base  c
inner join department d using (anon_id,pat_enc_csn_id_coded)

Query is running:   0%|          |

Downloading:   0%|          |

In [6]:
df[df.smrtdta_elem_value=='1'][['anon_id','pat_enc_csn_id_coded']].drop_duplicates().shape

(2059, 2)

## in units

In [20]:
%%bigquery lab_orders_in_units
with base as (
select anon_id,
pat_enc_csn_id_coded,
smrtdta_elem_value,
from `som-nero-phi-jonc101-secure.shc_core_updates.shc_smrtdta`
where concept_id like 'SHC#6051'
and smrtdta_elem_value in ('1','2')
),
department as (
  select anon_id,
  pat_enc_csn_id_coded,
  department_name,
  effective_time_jittered,
  from `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt`
  inner join `som-nero-phi-jonc101-secure.shc_core_updates.shc_dep_map`
  using(department_id)
  where event_type='Discharge'
and department_name like any ('B3','C3','M7','L7','1%West',
'2%North', '2%West', '3%West')
and effective_time_jittered>='2024-08-15'
and effective_time_jittered<='2025-03-15'
),
study_cohort as (
select c.*,
from base  c
inner join department d using (anon_id,pat_enc_csn_id_coded)
),
lab_taken as (
select lr.anon_id,
lr.pat_enc_csn_id_coded,
lr.order_id_coded,
lr.taken_time_jittered,
#lr.base_name,
lr.group_lab_name,
op.department_id
from  `som-nero-phi-jonc101-secure.shc_core_updates.shc_lab_result` lr
inner join  `som-nero-phi-jonc101-secure.shc_core_updates.shc_order_proc` op
on lr.anon_id=op.anon_id and lr.pat_enc_csn_id_coded=op.pat_enc_csn_id_coded and lr.order_id_coded=op.order_proc_id_coded
#where lower(base_name) in ('wbc','hgb','plt')
where lower(group_lab_name) like '%cbc%dif%'
),
lab_taken_in_unit as (
select lr.anon_id,
lr.pat_enc_csn_id_coded,
lr.order_id_coded,
lr.taken_time_jittered,
#lr.base_name,
lr.group_lab_name
from lab_taken lr
inner join  `som-nero-phi-jonc101-secure.shc_core_updates.shc_dep_map` using(department_id)
where department_name like any ('B3','C3','M7','L7','1%West',
'2%North', '2%West', '3%West')
group by anon_id,pat_enc_csn_id_coded,order_id_coded,taken_time_jittered,group_lab_name
order by anon_id,pat_enc_csn_id_coded,order_id_coded,taken_time_jittered,group_lab_name
)
select c.*,
lr.taken_time_jittered,
lr.order_id_coded,
#lr.base_name,
lr.group_lab_name,
from study_cohort c
inner join lab_taken_in_unit lr using (anon_id,pat_enc_csn_id_coded)


Query is running:   0%|          |

Downloading:   0%|          |

In [37]:
lab_orders_in_units['taken_time_jittered'] = pd.to_datetime(lab_orders_in_units['taken_time_jittered'])
lab_orders_in_units['taken_date_jittered'] = lab_orders_in_units['taken_time_jittered'].dt.date
group1 = lab_orders_in_units[lab_orders_in_units['smrtdta_elem_value'] == '2']
te=group1.groupby(['anon_id', 'pat_enc_csn_id_coded', 'smrtdta_elem_value', 'taken_date_jittered'],as_index=False).count()
te.taken_time_jittered.mean()

1.0513100436681222

In [48]:
# Corrected datetime conversion
lab_orders_in_units['taken_time_jittered'] = pd.to_datetime(lab_orders_in_units['taken_time_jittered'])
lab_orders_in_units['taken_date_jittered'] = lab_orders_in_units['taken_time_jittered'].dt.date

# Separate the groups
group1 = lab_orders_in_units[lab_orders_in_units['smrtdta_elem_value'] == '1']
group2 = lab_orders_in_units[lab_orders_in_units['smrtdta_elem_value'] == '2']

# Group by multiple columns (must be a list)
te = group1.groupby(['anon_id', 'pat_enc_csn_id_coded', 'smrtdta_elem_value', 'taken_date_jittered'], as_index=False).count()
te2 = group2.groupby(['anon_id', 'pat_enc_csn_id_coded', 'smrtdta_elem_value', 'taken_date_jittered'], as_index=False).count()

# Calculate the average number of 'base_name' occurrences
avg_base_name = te['taken_time_jittered'].mean()#.quantile(0.25)
avg_base_name2 = te2['taken_time_jittered'].mean()#quantile(0.25)
print(f'Group 1, average number of base_name: {avg_base_name}')
print(f'Group 2, average number of base_name: {avg_base_name2}')

Group 1, average number of base_name: 1.062948779320249
Group 2, average number of base_name: 1.0513100436681222


## all units

In [63]:
%%bigquery lab_orders
with base as (
select anon_id,
pat_enc_csn_id_coded,
smrtdta_elem_value,
from `som-nero-phi-jonc101-secure.shc_core_updates.shc_smrtdta`
where concept_id like 'SHC#6051'
and smrtdta_elem_value in ('1','2')
),
department as (
  select anon_id,
  pat_enc_csn_id_coded,
  department_name,
  effective_time_jittered,
  from `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt`
  inner join `som-nero-phi-jonc101-secure.shc_core_updates.shc_dep_map`
  using(department_id)
  where event_type='Discharge'
and department_name like any ('B3','C3','M7','L7','1%West',
'2%North', '2%West', '3%West')
and effective_time_jittered>='2024-08-15'
and effective_time_jittered<='2025-03-15'
),
study_cohort as (
select c.*,
from base  c
inner join department d using (anon_id,pat_enc_csn_id_coded)
),
lab_taken as (
select lr.anon_id,
lr.pat_enc_csn_id_coded,
lr.order_id_coded,
lr.taken_time_jittered,
#r.base_name,
'cbc' as group_lab_name,
#op.department_id
from  `som-nero-phi-jonc101-secure.shc_core_updates.shc_lab_result` lr
inner join  `som-nero-phi-jonc101-secure.shc_core_updates.shc_order_proc` op
on lr.anon_id=op.anon_id and lr.pat_enc_csn_id_coded=op.pat_enc_csn_id_coded and lr.order_id_coded=op.order_proc_id_coded
where 
#lower(base_name) in ('wbc','hgb','plt')
#and
lower(group_lab_name) like '%cbc%dif%'
group by anon_id,pat_enc_csn_id_coded,order_id_coded,taken_time_jittered,group_lab_name
order by anon_id,pat_enc_csn_id_coded,order_id_coded,taken_time_jittered,group_lab_name

)
select c.*,
lr.taken_time_jittered,
#lr.base_name,
lr.group_lab_name,
from study_cohort c
left join lab_taken lr using (anon_id,pat_enc_csn_id_coded)

Query is running:   0%|          |

Downloading:   0%|          |

In [71]:
lab_orders[(lab_orders.taken_time_jittered.isna())&(lab_orders.smrtdta_elem_value=='2')][['anon_id','pat_enc_csn_id_coded']].drop_duplicates().shape[0]/lab_orders[(lab_orders.smrtdta_elem_value=='2')][['anon_id','pat_enc_csn_id_coded']].drop_duplicates().shape[0]*100

27.886497064579256

In [76]:
group1[group1.taken_date_jittered.isna()]

Unnamed: 0,anon_id,pat_enc_csn_id_coded,smrtdta_elem_value,taken_time_jittered,group_lab_name,taken_date_jittered
0,JC6517739,131381240520.000000000,1,NaT,,NaT
1,JC2717494,131392677712.000000000,1,NaT,,NaT
6,JC2463780,131391067126.000000000,1,NaT,,NaT
41,JC3797487,131398733799.000000000,1,NaT,,NaT
42,JC1833229,131378654763.000000000,1,NaT,,NaT
...,...,...,...,...,...,...
15274,JC3564132,131374079282.000000000,1,NaT,,NaT
15326,JC1524567,131389306557.000000000,1,NaT,,NaT
15366,JC2893351,131370520665.000000000,1,NaT,,NaT
15389,JC1165911,131384926348.000000000,1,NaT,,NaT


In [89]:
group1[(group1.anon_id=='JC6345733')&(group1.pat_enc_csn_id_coded==131379875842)&(group1.taken_date_jittered==pd.Date('2024-06-18'))]
#te[te.taken_time_jittered>6]

AttributeError: module 'pandas' has no attribute 'date'

In [73]:
# Corrected datetime conversion
lab_orders['taken_time_jittered'] = pd.to_datetime(lab_orders['taken_time_jittered'])
lab_orders['taken_date_jittered'] = lab_orders['taken_time_jittered'].dt.date

# Separate the groups
group1 = lab_orders[lab_orders['smrtdta_elem_value'] == '1']
group2 = lab_orders[lab_orders['smrtdta_elem_value'] == '2']

# Group by multiple columns (must be a list)
te = group1.groupby(['anon_id', 'pat_enc_csn_id_coded', 'smrtdta_elem_value', 'taken_date_jittered'], as_index=False).count()
te2 = group2.groupby(['anon_id', 'pat_enc_csn_id_coded', 'smrtdta_elem_value', 'taken_date_jittered'], as_index=False).count()

# Calculate the average number of 'base_name' occurrences
avg_base_name = te['taken_time_jittered'].max()
avg_base_name2 = te2['taken_time_jittered'].max()
print(f'Group 1, average number of base_name: {avg_base_name}')
print(f'Group 2, average number of base_name: {avg_base_name2}')

Group 1, average number of base_name: 7
Group 2, average number of base_name: 6


In [54]:
%%bigquery df
with base as (
select anon_id,
pat_enc_csn_id_coded,
smrtdta_elem_value,
from `som-nero-phi-jonc101-secure.shc_core_updates.shc_smrtdta`
where concept_id like 'SHC#6051'
and smrtdta_elem_value in ('1','2')
),
department as (
  select anon_id,
  pat_enc_csn_id_coded,
  department_name,
  effective_time_jittered,
  from `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt`
  inner join `som-nero-phi-jonc101-secure.shc_core_updates.shc_dep_map`
  using(department_id)
  where event_type='Discharge'
and department_name like any ('B3','C3','M7','L7','1%West',
'2%North', '2%West', '3%West')
and effective_time_jittered>='2024-08-15'
and effective_time_jittered<='2025-03-15'
),
study_cohort as (
select c.*,
from base  c
inner join department d using (anon_id,pat_enc_csn_id_coded)
),
alert_element as (
select anon_id,
pat_enc_csn_id_coded,
alt_id_coded,
alert_desc,
ah.alt_action_inst,
patient_dep_id,
department_name,
from `som-nero-phi-jonc101-secure.shc_core_updates.shc_alert_history` ah
inner join `som-nero-phi-jonc101-secure.shc_core_updates.shc_alert` a
using(anon_id,alt_id_coded)
where 
department_name like any ('B3','C3','M7','L7','1%West', 
'2%North', '2%West', '3%West')
and 
alert_desc like '%CBC%STABILITY%SILENT'
)
select c.*,
ae.alt_action_inst as alert_time,
from study_cohort c 
inner join alert_element ae using(anon_id,pat_enc_csn_id_coded)
group by anon_id,pat_enc_csn_id_coded,smrtdta_elem_value,alert_time
order by anon_id,pat_enc_csn_id_coded,smrtdta_elem_value,alert_time


Query is running:   0%|          |

Downloading:   0%|          |

In [58]:
df

Unnamed: 0,anon_id,pat_enc_csn_id_coded,smrtdta_elem_value,alert_time
0,JC1049406,131396004990.000000000,1,2025-01-24 02:56:56
1,JC6541357,131386383884.000000000,2,2024-09-20 01:14:26
2,JC2846363,131392271948.000000000,1,2025-01-04 23:01:16
3,JC1170843,131396679041.000000000,2,2025-02-02 01:53:56
4,JC1636828,131392756925.000000000,1,2024-12-08 23:28:41
...,...,...,...,...
509,JC1970898,131392684919.000000000,1,2024-12-10 23:29:09
510,JC1365543,131388274061.000000000,2,2024-10-07 00:07:40
511,JC595595,131390646482.000000000,1,2024-11-11 09:11:00
512,JC545328,131382802150.000000000,1,2024-07-24 00:18:52


In [59]:
%%bigquery lab_orders
with base as (
select anon_id,
pat_enc_csn_id_coded,
smrtdta_elem_value,
from `som-nero-phi-jonc101-secure.shc_core_updates.shc_smrtdta`
where concept_id like 'SHC#6051'
and smrtdta_elem_value in ('1','2')
),
department as (
  select anon_id,
  pat_enc_csn_id_coded,
  department_name,
  effective_time_jittered,
  from `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt`
  inner join `som-nero-phi-jonc101-secure.shc_core_updates.shc_dep_map`
  using(department_id)
  where event_type='Discharge'
and department_name like any ('B3','C3','M7','L7','1%West',
'2%North', '2%West', '3%West')
and effective_time_jittered>='2024-08-15'
and effective_time_jittered<='2025-03-15'
),
study_cohort as (
select c.*,
from base  c
inner join department d using (anon_id,pat_enc_csn_id_coded)
),
lab_taken as (
select lr.anon_id,
lr.pat_enc_csn_id_coded,
lr.order_id_coded,
lr.taken_time_jittered,
lr.base_name,
lr.group_lab_name,
op.department_id
from  `som-nero-phi-jonc101-secure.shc_core_updates.shc_lab_result` lr
inner join  `som-nero-phi-jonc101-secure.shc_core_updates.shc_order_proc` op
on lr.anon_id=op.anon_id and lr.pat_enc_csn_id_coded=op.pat_enc_csn_id_coded and lr.order_id_coded=op.order_proc_id_coded
where lower(base_name) in ('wbc','hgb','plt')
and lower(group_lab_name) like '%cbc%dif%'
)
select c.*,
lr.taken_time_jittered,
lr.base_name,
from study_cohort c
inner join lab_taken lr using (anon_id,pat_enc_csn_id_coded)

Query is running:   0%|          |

Downloading:   0%|          |

In [60]:
df

Unnamed: 0,anon_id,pat_enc_csn_id_coded,smrtdta_elem_value,alert_time
0,JC1049406,131396004990.000000000,1,2025-01-24 02:56:56
1,JC6541357,131386383884.000000000,2,2024-09-20 01:14:26
2,JC2846363,131392271948.000000000,1,2025-01-04 23:01:16
3,JC1170843,131396679041.000000000,2,2025-02-02 01:53:56
4,JC1636828,131392756925.000000000,1,2024-12-08 23:28:41
...,...,...,...,...
509,JC1970898,131392684919.000000000,1,2024-12-10 23:29:09
510,JC1365543,131388274061.000000000,2,2024-10-07 00:07:40
511,JC595595,131390646482.000000000,1,2024-11-11 09:11:00
512,JC545328,131382802150.000000000,1,2024-07-24 00:18:52


In [61]:
# Perform the merge operation
df2 = pd.merge(df, lab_orders, how='inner', on=['anon_id', 'pat_enc_csn_id_coded', 'smrtdta_elem_value'])

# Calculate the time difference in hours and take the absolute value
# Fixed the parentheses and method chaining
df2['sub_diff'] = (df2['alert_time'] - df2['taken_time_jittered']).dt.total_seconds().abs() / 3600

# Filter the DataFrame based on the time difference
df2 = df2[df2['sub_diff'] <= 28]

df2

Unnamed: 0,anon_id,pat_enc_csn_id_coded,smrtdta_elem_value,alert_time,taken_time_jittered,base_name,sub_diff
660,JC570841,131398209598.000000000,1,2025-02-26 09:52:34,2025-02-26 04:57:00,PLT,4.926111
661,JC570841,131398209598.000000000,1,2025-02-26 09:52:34,2025-02-26 04:57:00,HGB,4.926111
662,JC570841,131398209598.000000000,1,2025-02-26 09:52:34,2025-02-26 04:57:00,WBC,4.926111
942,JC1427444,131389983481.000000000,2,2024-10-31 00:24:00,2024-10-30 05:35:00,HGB,18.816667
943,JC1427444,131389983481.000000000,2,2024-10-31 00:24:00,2024-10-30 05:35:00,WBC,18.816667
...,...,...,...,...,...,...,...
13048,JC1970898,131392684919.000000000,1,2024-12-10 23:29:09,2024-12-11 06:43:00,PLT,7.230833
13049,JC1970898,131392684919.000000000,1,2024-12-10 23:29:09,2024-12-11 06:43:00,WBC,7.230833
13107,JC595595,131390646482.000000000,1,2024-11-11 09:11:00,2024-11-11 19:26:00,HGB,10.250000
13108,JC595595,131390646482.000000000,1,2024-11-11 09:11:00,2024-11-11 19:26:00,PLT,10.250000


In [63]:
group1=df2[df2.smrtdta_elem_value=='1'][['anon_id','pat_enc_csn_id_coded','smrtdta_elem_value','alert_time','base_name']]
group2=df2[df2.smrtdta_elem_value=='2'][['anon_id','pat_enc_csn_id_coded','smrtdta_elem_value','alert_time','base_name']]

te=group1.groupby(['anon_id','pat_enc_csn_id_coded','smrtdta_elem_value','alert_time']).count()
te2=group2.groupby(['anon_id','pat_enc_csn_id_coded','smrtdta_elem_value','alert_time']).count()


# Calculate the average number of 'base_name' occurrences
avg_base_name = te['base_name'].max()#.quantile(0.75)
avg_base_name2 = te2['base_name'].max()#.quantile(0.75)
print(f'Group 1, average number of base_name: {avg_base_name}')
print(f'Group 2, average number of base_name: {avg_base_name2}')

Group 1, average number of base_name: 12
Group 2, average number of base_name: 12


In [52]:
%%bigquery los
with base as (
select anon_id,
pat_enc_csn_id_coded,
smrtdta_elem_value,
from `som-nero-phi-jonc101-secure.shc_core_updates.shc_smrtdta`
where concept_id like 'SHC#6051'
and smrtdta_elem_value in ('1','2')
),
department as (
  select anon_id,
  pat_enc_csn_id_coded,
  department_name,
  effective_time_jittered,
  from `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt`
  inner join `som-nero-phi-jonc101-secure.shc_core_updates.shc_dep_map`
  using(department_id)
  where event_type='Discharge'
and department_name like any ('B3','C3','M7','L7','1%West',
'2%North', '2%West', '3%West')
and effective_time_jittered>='2024-08-15'
and effective_time_jittered<='2025-03-15'
),
study_cohort as (
select c.*,
from base  c
inner join department d using (anon_id,pat_enc_csn_id_coded)
)
select c.*,
 TIMESTAMP_DIFF(en.hosp_disch_time_jittered, en.hosp_admsn_time_jittered, HOUR) AS los
from study_cohort c 
inner join  `som-nero-phi-jonc101-secure.shc_core_updates.shc_encounter` en using(anon_id,pat_enc_csn_id_coded)

Query is running:   0%|          |

Downloading:   0%|          |

In [65]:
print(los[los.smrtdta_elem_value=='1'].los.quantile(0.75))
print(los[los.smrtdta_elem_value=='2'].los.quantile(0.75))


151.0
147.0


In [67]:
%%bigquery mortality
with base as (
select anon_id,
pat_enc_csn_id_coded,
smrtdta_elem_value,
from `som-nero-phi-jonc101-secure.shc_core_updates.shc_smrtdta`
where concept_id like 'SHC#6051'
and smrtdta_elem_value in ('1','2')
),
department as (
  select anon_id,
  pat_enc_csn_id_coded,
  department_name,
  effective_time_jittered,
  from `som-nero-phi-jonc101-secure.shc_core_updates.shc_adt`
  inner join `som-nero-phi-jonc101-secure.shc_core_updates.shc_dep_map`
  using(department_id)
  where event_type='Discharge'
and department_name like any ('B3','C3','M7','L7','1%West',
'2%North', '2%West', '3%West')
and effective_time_jittered>='2024-08-15'
and effective_time_jittered<='2025-03-15'
),
study_cohort as (
select c.*,
from base  c
inner join department d using (anon_id,pat_enc_csn_id_coded)
),
en_mort as (
    select en.anon_id,
    en.pat_enc_csn_id_coded,
case when death_date_jittered<=hosp_disch_time_jittered and death_date_jittered>=hosp_admsn_time_jittered then 1
             else 0 end as mortality 
from   `som-nero-phi-jonc101-secure.shc_core_updates.shc_encounter` en 
    inner join `som-nero-phi-jonc101-secure.shc_core_updates.shc_demographic` d using(anon_id) 
)
select c.*,
en.mortality
from study_cohort c 
inner join  en_mort en using(anon_id,pat_enc_csn_id_coded)

Query is running:   0%|          |

Downloading:   0%|          |

In [70]:
print(mortality[(mortality.smrtdta_elem_value=='1')&(mortality.mortality==1)].pat_enc_csn_id_coded.nunique()/mortality[(mortality.smrtdta_elem_value=='1')].pat_enc_csn_id_coded.nunique()*100)
print(mortality[(mortality.smrtdta_elem_value=='2')&(mortality.mortality==1)].pat_enc_csn_id_coded.nunique()/mortality[(mortality.smrtdta_elem_value=='2')].pat_enc_csn_id_coded.nunique()*100)


1.0199125789218066
0.8806262230919765


In [44]:
df=df[['mrn','anon_id','pat_enc_csn_id_coded','smrtdta_elem_value','alert_time','department_name']]
df.head(5)

Unnamed: 0,mrn,anon_id,pat_enc_csn_id_coded,smrtdta_elem_value,alert_time,department_name
0,3220282,JC539488,131399412769.0,1,2025-04-10 08:17:09,C3
1,3220282,JC539488,131399412769.0,1,2025-04-14 00:14:38,C3
2,3220282,JC539488,131399412769.0,1,2025-04-10 03:34:45,C3
3,36585099,JC2063302,131401217746.0,1,2025-04-05 02:36:14,M7
4,81611352,JC3173401,131388844291.0,1,2024-10-13 06:24:18,M7


In [45]:
df['alert_time'] = pd.to_datetime(df['alert_time'])
df = df[(df['alert_time'] >= '2024-08-15') & (df['alert_time'] <= '2025-03-15')]
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['alert_time'] = pd.to_datetime(df['alert_time'])


Unnamed: 0,mrn,anon_id,pat_enc_csn_id_coded,smrtdta_elem_value,alert_time,department_name
4,81611352,JC3173401,131388844291.000000000,1,2024-10-13 06:24:18,M7
8,10216729,JC641256,131390196578.000000000,1,2024-11-02 06:07:53,B3
9,5001286599,JC3564684,131384623685.000000000,1,2024-09-14 05:40:07,B3
10,5001286599,JC3564684,131384623685.000000000,1,2024-09-15 08:06:37,B3
11,5001286599,JC3564684,131384623685.000000000,1,2024-09-13 08:30:04,B3
...,...,...,...,...,...,...
785,33762014,JC1897240,131387507861.000000000,2,2024-09-26 00:52:18,B3
786,85254654,JC6467835,131396874397.000000000,2,2025-02-11 00:45:37,B3
787,20590634,JC1173656,131397226880.000000000,2,2025-03-06 23:46:20,B3
788,20590634,JC1173656,131397226880.000000000,2,2025-03-12 08:31:52,B3


In [46]:
df.drop_duplicates(inplace=True)
df.shape
df[df.smrtdta_elem_value=='2'][['anon_id','pat_enc_csn_id_coded','alert_time']].drop_duplicates().shape#/df[df.smrtdta_elem_value=='2'].pat_enc_csn_id_coded.nunique()*100

(284, 3)

In [47]:
df.anon_id.unique()

array(['JC3173401', 'JC641256', 'JC3564684', 'JC656429', 'JC3664145',
       'JC870685', 'JC3643491', 'JC554504', 'JC858009', 'JC2719245',
       'JC999072', 'JC848609', 'JC3697951', 'JC3710958', 'JC1885542',
       'JC3632652', 'JC3610044', 'JC6480651', 'JC3786153', 'JC877494',
       'JC1317535', 'JC679459', 'JC3650731', 'JC1161228', 'JC1672859',
       'JC1066257', 'JC1365543', 'JC2926412', 'JC3722499', 'JC2074310',
       'JC3195191', 'JC6226995', 'JC545301', 'JC6106787', 'JC1107638',
       'JC624359', 'JC3708434', 'JC3620694', 'JC6554298', 'JC3553238',
       'JC756184', 'JC746892', 'JC570841', 'JC6105474', 'JC3646778',
       'JC3238674', 'JC2060417', 'JC3245561', 'JC1568202', 'JC1482310',
       'JC6479568', 'JC807395', 'JC958549', 'JC3572505', 'JC6538821',
       'JC852802', 'JC3664349', 'JC3784724', 'JC6260941', 'JC2060226',
       'JC748547', 'JC2755380', 'JC2910160', 'JC3662453', 'JC785271',
       'JC2123447', 'JC2530539', 'JC1213747', 'JC592941', 'JC1747236',
       'JC65

In [48]:
%%bigquery labs
select anon_id,
pat_enc_csn_id_coded,
order_id_coded,
order_time_jittered, # we need to use the taken time 
base_name,
group_lab_name,
from  `som-nero-phi-jonc101-secure.shc_core_updates.shc_lab_result`
where
lower(base_name) in ('wbc','hgb','plt')
and
lower(group_lab_name) like '%cbc%dif%'
and
anon_id in ('JC3173401', 'JC3559122', 'JC3691270', 'JC3245561', 'JC1482310',
       'JC520284', 'JC1970898', 'JC524167', 'JC667988', 'JC3658862',
       'JC6558657', 'JC1961517', 'JC3555388', 'JC725496', 'JC2896215',
       'JC1672859', 'JC1567931', 'JC3644249', 'JC1262131', 'JC574759',
       'JC1412667', 'JC1897240', 'JC803640', 'JC1931437', 'JC3599577',
       'JC1619001', 'JC1711851', 'JC3595425', 'JC1747236', 'JC1724637',
       'JC516527', 'JC679459', 'JC543042', 'JC3078802', 'JC2982165',
       'JC6486367', 'JC3660174', 'JC655449', 'JC3598000', 'JC1664143',
       'JC646789', 'JC2006363', 'JC1540069', 'JC2635045', 'JC656429',
       'JC3564684', 'JC597651', 'JC6210522', 'JC1274209', 'JC6226995',
       'JC3238674', 'JC6291801', 'JC641256', 'JC6484773', 'JC1600950',
       'JC3568893', 'JC1427444', 'JC987062', 'JC567780', 'JC518883',
       'JC3583880', 'JC1418456', 'JC1450514', 'JC979234', 'JC848609',
       'JC6476303', 'JC3643491', 'JC6489499', 'JC1154903', 'JC3663906',
       'JC1195984', 'JC1766810', 'JC2123447', 'JC6479568', 'JC1475996',
       'JC3111816', 'JC6534289', 'JC2129962', 'JC726224', 'JC3195191',
       'JC6508776', 'JC6458543', 'JC539907', 'JC3196177', 'JC6538821',
       'JC3694080', 'JC3234600', 'JC6505871', 'JC6106787', 'JC3664145',
       'JC807395', 'JC6538318', 'JC1103032', 'JC3678933', 'JC6110261',
       'JC527906', 'JC634629', 'JC1365543', 'JC6526533', 'JC2899954',
       'JC3610717', 'JC3691998', 'JC6541357', 'JC3702738', 'JC748250',
       'JC3679089', 'JC870685', 'JC1219588', 'JC1231437', 'JC6105474',
       'JC1331512', 'JC6339124', 'JC3554543', 'JC3665877', 'JC3620694',
       'JC3670164', 'JC3026207', 'JC1625411', 'JC1324839', 'JC1178538',
       'JC1109557', 'JC1628117', 'JC3552389', 'JC1902468', 'JC586327',
       'JC695466', 'JC6137588', 'JC3664349', 'JC3608191', 'JC858959',
       'JC640050', 'JC3664682', 'JC2527137', 'JC2846363', 'JC689001',
       'JC3662453', 'JC1695251', 'JC6480651', 'JC3702828', 'JC688808',
       'JC3597358', 'JC2060417', 'JC3606672', 'JC3562090', 'JC532196',
       'JC592051', 'JC2175036', 'JC6262922', 'JC1637573', 'JC591683',
       'JC3687493', 'JC646469', 'JC1283221', 'JC660131', 'JC6543754',
       'JC598430', 'JC547355', 'JC723993', 'JC3610044', 'JC641358',
       'JC3712527', 'JC1636828', 'JC746892', 'JC2530539', 'JC2009749',
       'JC745712', 'JC2079457', 'JC537941', 'JC522895', 'JC6412217',
       'JC1632294', 'JC3572505', 'JC2370953', 'JC2121278', 'JC2724713',
       'JC595595', 'JC1725213', 'JC3681601', 'JC3595595', 'JC538209',
       'JC3563783', 'JC1052591', 'JC3628670', 'JC573186', 'JC537284',
       'JC3648264', 'JC748547', 'JC6242747', 'JC624359', 'JC3625264',
       'JC2145092', 'JC572949', 'JC1107638', 'JC1707982', 'JC3618547',
       'JC1994540', 'JC1625743', 'JC620337', 'JC3592214', 'JC1612184',
       'JC785271', 'JC1188873', 'JC543434', 'JC1753328', 'JC814564',
       'JC6534069', 'JC2378354', 'JC655795', 'JC3083794', 'JC2060226',
       'JC585582', 'JC2456368', 'JC824883', 'JC6414389', 'JC2205980',
       'JC1320025', 'JC693902', 'JC2274255', 'JC794589', 'JC6230989',
       'JC2995767', 'JC858009', 'JC6428767', 'JC2183138', 'JC3646778',
       'JC1149379', 'JC3700139', 'JC2910160', 'JC1213747', 'JC522517',
       'JC1527891', 'JC3702479', 'JC1854776', 'JC3613966', 'JC999072',
       'JC3120525', 'JC566825', 'JC2932686', 'JC566696', 'JC3708434',
       'JC628238', 'JC3584505', 'JC2755380', 'JC3565760', 'JC6449829',
       'JC6287877', 'JC1524523', 'JC1052387')

Query is running:   0%|          |

Downloading:   0%|          |

In [49]:
labs=labs[(labs.pat_enc_csn_id_coded.isin(df.pat_enc_csn_id_coded.unique()))]
labs.head()

Unnamed: 0,anon_id,pat_enc_csn_id_coded,order_id_coded,order_time_jittered,base_name,group_lab_name
8,JC3564684,131384623685.0,993053218.0,2024-09-30 17:56:00,WBC,CBC with Differential
16,JC1320025,131390310034.0,1003508763.0,2024-10-10 05:50:00,WBC,CBC with Differential
18,JC693902,131389843033.0,1004317080.0,2024-10-16 04:02:00,WBC,CBC with Differential
22,JC6458543,131382077741.0,975099882.0,2024-07-09 02:32:00,HGB,CBC with Differential
30,JC3606672,131385276755.0,986596370.0,2024-08-03 06:07:00,HGB,CBC with Differential


In [50]:
labs=labs[['anon_id','pat_enc_csn_id_coded','order_id_coded','order_time_jittered']]
labs.drop_duplicates(inplace=True)
labs=pd.merge(Jitter,labs,how='inner',on=['anon_id'])

In [51]:
labs['order_time_jittered'] = pd.to_datetime(labs['order_time_jittered'])
labs['order_time'] = labs['order_time_jittered'] - pd.to_timedelta(labs['jitter'], unit='D')
labs=labs[['mrn','anon_id','pat_enc_csn_id_coded','order_id_coded','order_time']]

In [57]:
df2=pd.merge(df,labs,how='inner',on=['mrn','anon_id','pat_enc_csn_id_coded'])
df2

Unnamed: 0,mrn,anon_id,pat_enc_csn_id_coded,smrtdta_elem_value,alert_time,department_name,order_id_coded,order_time
0,81611352,JC3173401,131388844291.000000000,1,2024-10-13 06:24:18,M7,997072188.000000000,2024-10-14 05:43:00
1,81611352,JC3173401,131388844291.000000000,1,2024-10-13 06:24:18,M7,997002292.000000000,2024-10-13 14:09:00
2,10216729,JC641256,131390196578.000000000,1,2024-11-02 06:07:53,B3,1002867598.000000000,2024-11-06 11:46:00
3,10216729,JC641256,131390196578.000000000,1,2024-11-02 06:07:53,B3,1001580857.000000000,2024-10-31 16:50:00
4,5001286599,JC3564684,131384623685.000000000,1,2024-09-14 05:40:07,B3,993053218.000000000,2024-09-26 17:56:00
...,...,...,...,...,...,...,...,...
4689,28754018,JC1628117,131394029233.000000000,2,2024-12-27 07:32:11,B3,1015653958.000000000,2024-12-28 07:30:00
4690,28754018,JC1628117,131394029233.000000000,2,2024-12-27 07:32:11,B3,1015760764.000000000,2024-12-29 06:56:00
4691,28754018,JC1628117,131394029233.000000000,2,2024-12-27 07:32:11,B3,1015417749.000000000,2024-12-27 05:23:00
4692,28754018,JC1628117,131394029233.000000000,2,2024-12-27 07:32:11,B3,1015310738.000000000,2024-12-26 14:16:00


In [58]:
#df2['order_alert_diff']=(df2['order_time']-df2['alert_time']).dt.total_seconds()/3600
#df2=df2[((df2.order_alert_diff<=28)&((df2.order_alert_diff>=0)))]
df2.head()

Unnamed: 0,mrn,anon_id,pat_enc_csn_id_coded,smrtdta_elem_value,alert_time,department_name,order_id_coded,order_time
0,81611352,JC3173401,131388844291.0,1,2024-10-13 06:24:18,M7,997072188.0,2024-10-14 05:43:00
1,81611352,JC3173401,131388844291.0,1,2024-10-13 06:24:18,M7,997002292.0,2024-10-13 14:09:00
2,10216729,JC641256,131390196578.0,1,2024-11-02 06:07:53,B3,1002867598.0,2024-11-06 11:46:00
3,10216729,JC641256,131390196578.0,1,2024-11-02 06:07:53,B3,1001580857.0,2024-10-31 16:50:00
4,5001286599,JC3564684,131384623685.0,1,2024-09-14 05:40:07,B3,993053218.0,2024-09-26 17:56:00


In [59]:
df2=df2[['mrn','anon_id','pat_enc_csn_id_coded','smrtdta_elem_value','order_id_coded','alert_time']]
df2.drop_duplicates(inplace=True)
#df=pd.merge(df,df2,how='left',on=['mrn','anon_id','pat_enc_csn_id_coded','smrtdta_elem_value'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2.drop_duplicates(inplace=True)


In [60]:
df2

Unnamed: 0,mrn,anon_id,pat_enc_csn_id_coded,smrtdta_elem_value,order_id_coded,alert_time
0,81611352,JC3173401,131388844291.000000000,1,997072188.000000000,2024-10-13 06:24:18
1,81611352,JC3173401,131388844291.000000000,1,997002292.000000000,2024-10-13 06:24:18
2,10216729,JC641256,131390196578.000000000,1,1002867598.000000000,2024-11-02 06:07:53
3,10216729,JC641256,131390196578.000000000,1,1001580857.000000000,2024-11-02 06:07:53
4,5001286599,JC3564684,131384623685.000000000,1,993053218.000000000,2024-09-14 05:40:07
...,...,...,...,...,...,...
4689,28754018,JC1628117,131394029233.000000000,2,1015653958.000000000,2024-12-27 07:32:11
4690,28754018,JC1628117,131394029233.000000000,2,1015760764.000000000,2024-12-27 07:32:11
4691,28754018,JC1628117,131394029233.000000000,2,1015417749.000000000,2024-12-27 07:32:11
4692,28754018,JC1628117,131394029233.000000000,2,1015310738.000000000,2024-12-27 07:32:11


In [62]:
df2[(df2.smrtdta_elem_value=='2')&(df2.order_id_coded.notna())][['alert_time']].nunique()/df2[(df2.smrtdta_elem_value=='2')][['alert_time']].nunique()*100

alert_time    100.0
dtype: float64

In [66]:
lab_counts = df2.groupby(['smrtdta_elem_value', 'anon_id','pat_enc_csn_id_coded']).size().reset_index(name='lab_count')
lab_counts

Unnamed: 0,smrtdta_elem_value,anon_id,pat_enc_csn_id_coded,lab_count
0,1,JC1052591,131391244567.000000000,2
1,1,JC1103032,131388951758.000000000,70
2,1,JC1149379,131389209082.000000000,12
3,1,JC1154903,131384661704.000000000,2
4,1,JC1178538,131384174375.000000000,15
...,...,...,...,...
260,2,JC794589,131393234156.000000000,27
261,2,JC803640,131388770432.000000000,12
262,2,JC824883,131392955851.000000000,5
263,2,JC858959,131386145428.000000000,1


In [68]:
import pandas as pd
from scipy.stats import mannwhitneyu , ttest_ind

# Step 1: Count labs per patient-encounter
lab_counts = df2.groupby(['smrtdta_elem_value', 'anon_id','pat_enc_csn_id_coded']).size().reset_index(name='lab_count')

# Step 2: Split by group
group1_counts = lab_counts[lab_counts['smrtdta_elem_value'] == '1']['lab_count']
group2_counts = lab_counts[lab_counts['smrtdta_elem_value'] == '2']['lab_count']

# Step 3: Compare distributions (non-parametric test)
stat, p_value = ttest_ind(group1_counts, group2_counts, alternative='two-sided')

print(f"Group 1 mean: {group1_counts.mean():.2f}")
print(f"Group 2 mean: {group2_counts.mean():.2f}")
print(f"Mann-Whitney U test p-value: {p_value:.4f}")

Group 1 mean: 17.55
Group 2 mean: 17.90
Mann-Whitney U test p-value: 0.9458


In [61]:
test=df[['anon_id','mrn','pat_enc_csn_id_coded','smrtdta_elem_value','alert_time','order_id_coded']]
test['next_order']=0
test.loc[test.order_id_coded.notna(),'next_order']=1
test=test[['anon_id','mrn','pat_enc_csn_id_coded','alert_time','smrtdta_elem_value','next_order']]
test=test.groupby(['anon_id','mrn','pat_enc_csn_id_coded','smrtdta_elem_value','alert_time'],as_index=False).max()
test

Unnamed: 0,anon_id,mrn,pat_enc_csn_id_coded,smrtdta_elem_value,alert_time,next_order
0,JC1037152,18562793,131395740786.000000000,2,2025-02-22 02:57:06,1
1,JC1037152,18562793,131395740786.000000000,2,2025-03-01 07:23:32,1
2,JC1049406,18766782,131396004990.000000000,1,2025-01-24 02:56:56,1
3,JC1049406,18766782,131396004990.000000000,1,2025-01-24 02:57:16,1
4,JC1052387,18819615,131384607974.000000000,2,2024-08-25 00:10:07,1
...,...,...,...,...,...,...
737,JC971667,17468091,131398117592.000000000,1,2025-03-04 00:32:49,0
738,JC979234,17588930,131394109026.000000000,1,2025-01-02 01:16:51,0
739,JC987062,17720590,131387341249.000000000,2,2024-10-12 02:03:00,0
740,JC999072,17918517,131388980022.000000000,1,2024-10-27 00:45:25,0


In [66]:
test[(test.smrtdta_elem_value=='2')].shape

(325, 6)

In [68]:
272/417*100

65.22781774580336

In [71]:
%%bigquery LOS
select anon_id,
pat_enc_csn_id_coded,
hosp_admsn_time_jittered,
hosp_disch_time_jittered, # we need to use the taken time 
from  `som-nero-phi-jonc101-secure.shc_core_updates.shc_encounter`
where
anon_id in ('JC3173401', 'JC3559122', 'JC3691270', 'JC3245561', 'JC1482310',
       'JC520284', 'JC1970898', 'JC524167', 'JC667988', 'JC3658862',
       'JC6558657', 'JC1961517', 'JC3555388', 'JC725496', 'JC2896215',
       'JC1672859', 'JC1567931', 'JC3644249', 'JC1262131', 'JC574759',
       'JC1412667', 'JC1897240', 'JC803640', 'JC1931437', 'JC3599577',
       'JC1619001', 'JC1711851', 'JC3595425', 'JC1747236', 'JC1724637',
       'JC516527', 'JC679459', 'JC543042', 'JC3078802', 'JC2982165',
       'JC6486367', 'JC3660174', 'JC655449', 'JC3598000', 'JC1664143',
       'JC646789', 'JC2006363', 'JC1540069', 'JC2635045', 'JC656429',
       'JC3564684', 'JC597651', 'JC6210522', 'JC1274209', 'JC6226995',
       'JC3238674', 'JC6291801', 'JC641256', 'JC6484773', 'JC1600950',
       'JC3568893', 'JC1427444', 'JC987062', 'JC567780', 'JC518883',
       'JC3583880', 'JC1418456', 'JC1450514', 'JC979234', 'JC848609',
       'JC6476303', 'JC3643491', 'JC6489499', 'JC1154903', 'JC3663906',
       'JC1195984', 'JC1766810', 'JC2123447', 'JC6479568', 'JC1475996',
       'JC3111816', 'JC6534289', 'JC2129962', 'JC726224', 'JC3195191',
       'JC6508776', 'JC6458543', 'JC539907', 'JC3196177', 'JC6538821',
       'JC3694080', 'JC3234600', 'JC6505871', 'JC6106787', 'JC3664145',
       'JC807395', 'JC6538318', 'JC1103032', 'JC3678933', 'JC6110261',
       'JC527906', 'JC634629', 'JC1365543', 'JC6526533', 'JC2899954',
       'JC3610717', 'JC3691998', 'JC6541357', 'JC3702738', 'JC748250',
       'JC3679089', 'JC870685', 'JC1219588', 'JC1231437', 'JC6105474',
       'JC1331512', 'JC6339124', 'JC3554543', 'JC3665877', 'JC3620694',
       'JC3670164', 'JC3026207', 'JC1625411', 'JC1324839', 'JC1178538',
       'JC1109557', 'JC1628117', 'JC3552389', 'JC1902468', 'JC586327',
       'JC695466', 'JC6137588', 'JC3664349', 'JC3608191', 'JC858959',
       'JC640050', 'JC3664682', 'JC2527137', 'JC2846363', 'JC689001',
       'JC3662453', 'JC1695251', 'JC6480651', 'JC3702828', 'JC688808',
       'JC3597358', 'JC2060417', 'JC3606672', 'JC3562090', 'JC532196',
       'JC592051', 'JC2175036', 'JC6262922', 'JC1637573', 'JC591683',
       'JC3687493', 'JC646469', 'JC1283221', 'JC660131', 'JC6543754',
       'JC598430', 'JC547355', 'JC723993', 'JC3610044', 'JC641358',
       'JC3712527', 'JC1636828', 'JC746892', 'JC2530539', 'JC2009749',
       'JC745712', 'JC2079457', 'JC537941', 'JC522895', 'JC6412217',
       'JC1632294', 'JC3572505', 'JC2370953', 'JC2121278', 'JC2724713',
       'JC595595', 'JC1725213', 'JC3681601', 'JC3595595', 'JC538209',
       'JC3563783', 'JC1052591', 'JC3628670', 'JC573186', 'JC537284',
       'JC3648264', 'JC748547', 'JC6242747', 'JC624359', 'JC3625264',
       'JC2145092', 'JC572949', 'JC1107638', 'JC1707982', 'JC3618547',
       'JC1994540', 'JC1625743', 'JC620337', 'JC3592214', 'JC1612184',
       'JC785271', 'JC1188873', 'JC543434', 'JC1753328', 'JC814564',
       'JC6534069', 'JC2378354', 'JC655795', 'JC3083794', 'JC2060226',
       'JC585582', 'JC2456368', 'JC824883', 'JC6414389', 'JC2205980',
       'JC1320025', 'JC693902', 'JC2274255', 'JC794589', 'JC6230989',
       'JC2995767', 'JC858009', 'JC6428767', 'JC2183138', 'JC3646778',
       'JC1149379', 'JC3700139', 'JC2910160', 'JC1213747', 'JC522517',
       'JC1527891', 'JC3702479', 'JC1854776', 'JC3613966', 'JC999072',
       'JC3120525', 'JC566825', 'JC2932686', 'JC566696', 'JC3708434',
       'JC628238', 'JC3584505', 'JC2755380', 'JC3565760', 'JC6449829',
       'JC6287877', 'JC1524523', 'JC1052387')

Query is running:   0%|          |

Downloading:   0%|          |

In [74]:
LOS['hosp_admsn_time_jittered'] = pd.to_datetime(LOS['hosp_admsn_time_jittered'])
LOS['hosp_disch_time_jittered'] = pd.to_datetime(LOS['hosp_disch_time_jittered'])
LOS['los']=(LOS['hosp_disch_time_jittered']-LOS['hosp_admsn_time_jittered']).dt.total_seconds()/3600
LOS=LOS[LOS.los.notna()]
LOS

Unnamed: 0,anon_id,pat_enc_csn_id_coded,hosp_admsn_time_jittered,hosp_disch_time_jittered,los
0,JC824883,131386532499,2024-10-08 01:50:00,2024-10-08 07:08:00,5.300000
31,JC1178538,131244168055,2018-01-11 11:35:00,2018-01-11 23:59:00,12.400000
32,JC539907,131390261712,2024-11-05 13:06:00,2024-11-05 23:59:00,10.883333
35,JC591683,131092753396,2015-08-02 10:02:00,2015-08-02 10:02:00,0.000000
90,JC3568893,131388853695,2024-09-18 00:00:00,2024-09-18 23:59:00,23.983333
...,...,...,...,...,...
174765,JC979234,131295597757,2020-11-23 08:18:00,2020-11-23 23:59:00,15.683333
174772,JC979234,131012497653,2011-03-17 10:12:00,2011-03-17 23:59:00,13.783333
174775,JC979234,131345849415,2023-03-09 05:54:00,2023-03-09 08:38:00,2.733333
174803,JC979234,131020396961,2012-09-25 14:22:00,2012-09-25 23:59:00,9.616667


In [75]:
LOS=LOS[['anon_id','pat_enc_csn_id_coded','los']]
df3=pd.merge(df2,LOS,how='left',on=['anon_id','pat_enc_csn_id_coded'])

In [80]:
df3[(df3.smrtdta_elem_value=='2')][['los']].median()/24

los    32.634028
dtype: float64

In [None]:
df2[(df2.smrtdta_elem_value=='2')&(df2.order_id_coded.notna())][['alert_time']].nunique()/df2[(df2.smrtdta_elem_value=='2')][['alert_time']].nunique()*100