### Description: NEW
Refine new cohort4 to only include CSNs that are not already in the original cohort
- New cohort: shc_core_2021, from 04/2020 - 2021
- Original old cohort: shc_core, 2015 - 03/2020

However, some CSNs in the new cohort are also in the old cohort, but with different anon_id and admit time
- Remove these overlapping CSNs. This is clean if use both cohort4 from old and new data
- When using Demographics/HW data later in 6.8 notebook, be aware that there are a few overlapped CSNs. This is due to cohort2 was used to get demo/HW data. These overlapped CSNs were actually removed from cohort3 already. They are removed from new cohort4 again when merging with the old demo/HW data to prevent further issues. But this `cohort4` remain intact.

**Input:**
- `1_4_cohort.csv` (original cohort4)
- `6_7_0_cohort4` (new cohort4 with labels)


**Output:**
- `6_7_cohort4` size 60,464. This is the *final cohort* combing 2015 - 03/2020 (43,980) and 04/2020 - 2021 (16,484) data


In [1]:
library(data.table)
library(tidyverse)
library(lubridate)
# library(Matrix)
# library(slam)
# library(bit64)
# library(mtools) for one hot coder, not available on Nero or use caret or tidyr
options(repr.matrix.max.rows=200, repr.matrix.max.cols=30)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.2     [32m✔[39m [34mdplyr  [39m 1.0.6
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mbetween()[39m   masks [34mdata.table[39m::between()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m    masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mfirst()[39m     masks [34mdata.table[39m::first()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m       masks [34mstats[39m::lag()
[31m✖[39m [34mdplyr[39m::[32mlast()[39m      masks [34mdata.table[39m::last()
[31m✖[39m [34mpurrr[39m::[32mtranspose()[39m masks [34mdata.table[39m::transpose(

In [2]:
datadir = "../../DataTD/shc2021"
outdir = "../../OutputTD/shc2021"

### Combine all labels across different times

In [6]:
# use cohort having labels within the first 3 hours as the main cohort
cohort3hr <- read_csv("../../OutputTD/shc2021/7_cohort4_3hr_labels.csv")
cohort6hr <- read_csv("../../OutputTD/shc2021/7_cohort4_6hr_labels.csv")
cohort9hr <- read_csv("../../OutputTD/shc2021/7_cohort4_9hr_labels.csv")
cohort12hr <- read_csv("../../OutputTD/shc2021/7_cohort4_12hr_labels.csv")
cohort24hr <- read_csv("../../OutputTD/shc2021/7_cohort4_24hr_labels.csv")


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  anon_id = [31mcol_character()[39m,
  pat_enc_csn_id_coded = [32mcol_double()[39m,
  admit_time = [34mcol_datetime(format = "")[39m,
  label_max3 = [32mcol_double()[39m,
  label_3hr_recent = [32mcol_double()[39m,
  admit_label = [32mcol_double()[39m,
  has_admit_label = [32mcol_double()[39m,
  died_within_24hrs = [32mcol_double()[39m,
  death_3hr_max_label = [32mcol_double()[39m,
  death_3hr_recent_label = [32mcol_double()[39m,
  first_label = [32mcol_double()[39m,
  first_label_minutes_since_admit = [32mcol_double()[39m,
  acute_to_critical_label_recent_3hr = [32mcol_double()[39m,
  critical_to_acute_label_recent_3hr = [32mcol_double()[39m,
  acute_to_critical_label_max_3hr = [32mcol_double()[39m,
  critical_to_acute_label_max_3hr = [32mcol_double()[39m
)



[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────

In [7]:
dim(cohort3hr)
dim(cohort6hr)
dim(cohort9hr)
dim(cohort12hr)
dim(cohort24hr)

In [8]:
cohort_labels <- left_join(cohort3hr, cohort6hr) %>% left_join(cohort9hr) %>% left_join(cohort12hr) %>% left_join(cohort24hr)
dim(cohort_labels)

Joining, by = c("anon_id", "pat_enc_csn_id_coded", "admit_time", "admit_label", "has_admit_label", "died_within_24hrs", "first_label", "first_label_minutes_since_admit")

Joining, by = c("anon_id", "pat_enc_csn_id_coded", "admit_time", "admit_label", "has_admit_label", "died_within_24hrs", "first_label", "first_label_minutes_since_admit")

Joining, by = c("anon_id", "pat_enc_csn_id_coded", "admit_time", "admit_label", "has_admit_label", "died_within_24hrs", "first_label", "first_label_minutes_since_admit")

Joining, by = c("anon_id", "pat_enc_csn_id_coded", "admit_time", "admit_label", "has_admit_label", "died_within_24hrs", "first_label", "first_label_minutes_since_admit")



In [9]:
head(cohort_labels, 1)
colnames(cohort_labels)

anon_id,pat_enc_csn_id_coded,admit_time,label_max3,label_3hr_recent,admit_label,has_admit_label,died_within_24hrs,death_3hr_max_label,death_3hr_recent_label,first_label,first_label_minutes_since_admit,acute_to_critical_label_recent_3hr,critical_to_acute_label_recent_3hr,acute_to_critical_label_max_3hr,⋯,label_12hr_recent,death_12hr_max_label,death_12hr_recent_label,acute_to_critical_label_recent_12hr,critical_to_acute_label_recent_12hr,acute_to_critical_label_max_12hr,critical_to_acute_label_max_12hr,label_max24,label_24hr_recent,death_24hr_max_label,death_24hr_recent_label,acute_to_critical_label_recent_24hr,critical_to_acute_label_recent_24hr,acute_to_critical_label_max_24hr,critical_to_acute_label_max_24hr
<chr>,<dbl>,<dttm>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
JC1000116,131066472308,2015-01-28 00:46:00,0,0,0,1,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [34]:
# cohort has labels within the first 3 hours
write.csv(cohort_labels, file = file.path(outdir, "7_cohort4_labels.csv"), row.names=FALSE) 

# Use this 7_cohort4_labels to query diagnosis, department, and treatment team

### Get a cohort_op: 

Remove surg patients 

Those who had a OR related department_id within 24hrs since admission or prior to admission within the same visit 
pull up the departments names for our cohort before time 24

Those have dept_name contains within 24 hours of inpatient admission

"PRE-OP", "INTRA-OP", "PACU", "MAIN OPERATING ROOM", "SURGERY", "CARDIAC CATH LAB" from shc_core_2021

In [35]:
dept <- read_csv(file.path(datadir, "dept_2021.csv")) 
nrow(dept) #1171180


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  dept_id = [32mcol_double()[39m,
  dept_name = [31mcol_character()[39m
)




In [11]:
adt <- read_csv("../../DataTD/shc2021/cohort3_adt_2021.csv")
dim(adt)


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  anon_id = [31mcol_character()[39m,
  pat_enc_csn_id_coded = [32mcol_double()[39m,
  effective_time_jittered_utc = [34mcol_datetime(format = "")[39m,
  seq_num_in_enc = [32mcol_double()[39m,
  pat_class = [31mcol_character()[39m,
  base_pat_class_c = [32mcol_double()[39m,
  pat_lvl_of_care_c = [32mcol_double()[39m,
  pat_lv_of_care = [31mcol_character()[39m,
  event_type = [31mcol_character()[39m,
  pat_service = [31mcol_character()[39m,
  department_id = [32mcol_double()[39m
)




In [36]:
head(adt, 1)
head(dept, 1)

anon_id,pat_enc_csn_id_coded,effective_time_jittered_utc,seq_num_in_enc,pat_class,base_pat_class_c,pat_lvl_of_care_c,pat_lv_of_care,event_type,pat_service,department_id
<chr>,<dbl>,<dttm>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>
JC681028,131313000000.0,2021-08-06 06:41:00,49,Inpatient,,8,Critical Care,Patient Update,Pulmonary,120201006


dept_id,dept_name
<dbl>,<chr>
110100017,M7


In [37]:
op = paste(c("PRE-OP", "INTRA-OP", "PACU", "MAIN OPERATING ROOM", "SURGERY", "CARDIAC CATH LAB"), collapse = '|')
op_dept <- dept %>% filter(str_detect(dept_name, op)) %>% distinct() %>% arrange(-dept_id)
op_dept

# id = c(120201023, 117301007, 7301007, 7301001, )

dept %>% filter(dept_id %in% op_dept$dept_id) %>% distinct() %>% arrange(-dept_id)

dept_id,dept_name
<dbl>,<chr>
120201070,VCP CATH INTRA-OP
120201061,VCP INTRA-OP
120201028,VCP CARDIAC CATH LAB
120201023,VCP PRE-OP/DAY CARE
120201022,VCP PACU - RECOVERY ROOM
120201021,VCP SURGERY
117301008,CATH AMC PACU
117301007,CATH AMC PRE-OP
7301010,CATH AMC INTRA-OP
7301008,ZZCATH ASC PACU


dept_id,dept_name
<dbl>,<chr>
120201070,VCP CATH INTRA-OP
120201061,VCP INTRA-OP
120201028,VCP CARDIAC CATH LAB
120201023,VCP PRE-OP/DAY CARE
120201022,VCP PACU - RECOVERY ROOM
120201021,VCP SURGERY
117301008,CATH AMC PACU
117301007,CATH AMC PRE-OP
7301010,CATH AMC INTRA-OP
7301008,ZZCATH ASC PACU


In [38]:
cohort_op <- cohort_labels %>% select(anon_id, pat_enc_csn_id_coded, admit_time) %>% left_join(adt) %>%
                mutate(event_admit_delta = round(difftime(effective_time_jittered_utc, admit_time, units="hours"), 0)) %>%
                filter(department_id %in% op_dept$dept_id, event_admit_delta <= 24) %>% distinct(pat_enc_csn_id_coded)
nrow(cohort_op) # 8116

Joining, by = c("anon_id", "pat_enc_csn_id_coded")



In [39]:
length(unique(cohort_labels$pat_enc_csn_id_coded)) #60648
cohort_no_op <- anti_join(cohort_labels, cohort_op) #52532
length(unique(cohort_no_op$pat_enc_csn_id_coded))
# head(cohort_adt)

Joining, by = "pat_enc_csn_id_coded"



In [40]:
head(cohort_no_op, 1)

anon_id,pat_enc_csn_id_coded,admit_time,label_max3,label_3hr_recent,admit_label,has_admit_label,died_within_24hrs,death_3hr_max_label,death_3hr_recent_label,first_label,first_label_minutes_since_admit,acute_to_critical_label_recent_3hr,critical_to_acute_label_recent_3hr,acute_to_critical_label_max_3hr,⋯,label_12hr_recent,death_12hr_max_label,death_12hr_recent_label,acute_to_critical_label_recent_12hr,critical_to_acute_label_recent_12hr,acute_to_critical_label_max_12hr,critical_to_acute_label_max_12hr,label_max24,label_24hr_recent,death_24hr_max_label,death_24hr_recent_label,acute_to_critical_label_recent_24hr,critical_to_acute_label_recent_24hr,acute_to_critical_label_max_24hr,critical_to_acute_label_max_24hr
<chr>,<dbl>,<dttm>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
JC1000116,131066472308,2015-01-28 00:46:00,0,0,0,1,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [41]:
nrow(cohort_no_op %>% select(anon_id) %>% distinct()) # 35144
nrow(cohort_no_op %>% select(pat_enc_csn_id_coded) %>% distinct()) # 52532
nrow(cohort_no_op %>% select(anon_id, pat_enc_csn_id_coded) %>% distinct()) # 52532
head(cohort_no_op)

anon_id,pat_enc_csn_id_coded,admit_time,label_max3,label_3hr_recent,admit_label,has_admit_label,died_within_24hrs,death_3hr_max_label,death_3hr_recent_label,first_label,first_label_minutes_since_admit,acute_to_critical_label_recent_3hr,critical_to_acute_label_recent_3hr,acute_to_critical_label_max_3hr,⋯,label_12hr_recent,death_12hr_max_label,death_12hr_recent_label,acute_to_critical_label_recent_12hr,critical_to_acute_label_recent_12hr,acute_to_critical_label_max_12hr,critical_to_acute_label_max_12hr,label_max24,label_24hr_recent,death_24hr_max_label,death_24hr_recent_label,acute_to_critical_label_recent_24hr,critical_to_acute_label_recent_24hr,acute_to_critical_label_max_24hr,critical_to_acute_label_max_24hr
<chr>,<dbl>,<dttm>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
JC1000116,131066472308,2015-01-28 00:46:00,0,0,0,1,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
JC1000116,131295313275,2020-09-29 22:45:00,0,0,0,1,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
JC1000296,131100574537,2015-07-03 04:51:00,0,0,0,1,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
JC1000441,131074233004,2015-02-23 06:51:00,0,0,0,1,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
JC1000527,131084989828,2015-05-26 23:19:00,0,0,0,1,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
JC1000904,131080383001,2015-03-03 11:02:00,0,0,0,1,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [42]:
cohort_final <- left_join(cohort_no_op, cohort_labels)
dim(cohort_final) # 52532, 48
head(cohort_final, 1)
colnames(cohort_final)

Joining, by = c("anon_id", "pat_enc_csn_id_coded", "admit_time", "label_max3", "label_3hr_recent", "admit_label", "has_admit_label", "died_within_24hrs", "death_3hr_max_label", "death_3hr_recent_label", "first_label", "first_label_minutes_since_admit", "acute_to_critical_label_recent_3hr", "critical_to_acute_label_recent_3hr", "acute_to_critical_label_max_3hr", "critical_to_acute_label_max_3hr", "label_max6", "label_6hr_recent", "death_6hr_max_label", "death_6hr_recent_label", "acute_to_critical_label_recent_6hr", "critical_to_acute_label_recent_6hr", "acute_to_critical_label_max_6hr", "critical_to_acute_label_max_6hr", "label_max9", "label_9hr_recent", "death_9hr_max_label", "death_9hr_recent_label", "acute_to_critical_label_recent_9hr", "critical_to_acute_label_recent_9hr", "acute_to_critical_label_max_9hr", "critical_to_acute_label_max_9hr", "label_max12", "label_12hr_recent", "death_12hr_max_label", "death_12hr_recent_label", "acute_to_critical_label_recent_12hr", "critical_to_acut

anon_id,pat_enc_csn_id_coded,admit_time,label_max3,label_3hr_recent,admit_label,has_admit_label,died_within_24hrs,death_3hr_max_label,death_3hr_recent_label,first_label,first_label_minutes_since_admit,acute_to_critical_label_recent_3hr,critical_to_acute_label_recent_3hr,acute_to_critical_label_max_3hr,⋯,label_12hr_recent,death_12hr_max_label,death_12hr_recent_label,acute_to_critical_label_recent_12hr,critical_to_acute_label_recent_12hr,acute_to_critical_label_max_12hr,critical_to_acute_label_max_12hr,label_max24,label_24hr_recent,death_24hr_max_label,death_24hr_recent_label,acute_to_critical_label_recent_24hr,critical_to_acute_label_recent_24hr,acute_to_critical_label_max_24hr,critical_to_acute_label_max_24hr
<chr>,<dbl>,<dttm>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
JC1000116,131066472308,2015-01-28 00:46:00,0,0,0,1,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [43]:
sum(is.na(cohort_final))
summary(cohort_final)

   anon_id          pat_enc_csn_id_coded   admit_time                 
 Length:52532       Min.   :1.311e+11    Min.   :2015-01-01 06:30:00  
 Class :character   1st Qu.:1.312e+11    1st Qu.:2016-12-19 23:49:45  
 Mode  :character   Median :1.313e+11    Median :2018-11-14 01:22:00  
                    Mean   :1.312e+11    Mean   :2018-08-31 11:06:06  
                    3rd Qu.:1.313e+11    3rd Qu.:2020-05-28 22:04:00  
                    Max.   :1.313e+11    Max.   :2021-09-30 07:00:00  
                                                                      
   label_max3     label_3hr_recent  admit_label     has_admit_label 
 Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
 1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1.0000  
 Median :0.0000   Median :0.0000   Median :0.0000   Median :1.0000  
 Mean   :0.1142   Mean   :0.1114   Mean   :0.1078   Mean   :0.9927  
 3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:1.0000  
 Max.   :1.0000   

In [20]:
# cohort has labels within the first 3 hours, and no OR
write.csv(cohort_final, file = file.path(outdir, "7_cohort4_3hr_labels_noOR.csv"), row.names=FALSE) 

### DKA patients

In [44]:
dx <- read_csv(file.path(datadir, "diagnosis_2021.csv")) # %>% mutate(admit_time = ymd_hms(admit_time))
nrow(dx) #3640536


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  dx_name = [31mcol_character()[39m,
  icd10 = [31mcol_character()[39m,
  pat_enc_csn_id_jittered = [32mcol_double()[39m
)




In [45]:
head(dx,1)

dx_name,icd10,pat_enc_csn_id_jittered
<chr>,<chr>,<dbl>
"Hodgkin's disease, unspecified type, of lymph nodes of multiple sites",C81.98,131066472308


In [46]:
# get dx_name contains both "diabet" and "ketoacidos"
dka <- dx %>% filter(grepl("diabet|DKA", dx_name, ignore.case = TRUE),  
                     grepl("ketoacidos|DKA", dx_name, ignore.case = TRUE)) %>% distinct(dx_name) #%>% arrange(-dept_id)
dka

# dx %>% filter(dx_name %in% dka$dx_name) %>% distinct(dx_name) # %>% arrange(-dept_id)

dx_name
<chr>
Diabetic ketoacidosis without coma associated with type 2 diabetes mellitus (CMS-HCC)
Type 2 diabetes mellitus with ketoacidosis without coma
Other specified diabetes mellitus with ketoacidosis without coma
Diabetic ketoacidosis without coma associated with type 1 diabetes mellitus (CMS-HCC)
Type 1 diabetes mellitus with ketoacidosis without coma
Type 1 diabetes mellitus with ketoacidosis without coma (CMS-HCC)
Diabetic ketoacidosis with coma associated with other specified diabetes mellitus (CMS-HCC)
Other specified diabetes mellitus with ketoacidosis with coma
Diabetic ketoacidosis without coma associated with other specified diabetes mellitus (CMS-HCC)
Diabetes mellitus due to underlying condition with ketoacidosis without coma


In [47]:
dx %>% filter(grepl("DKA", dx_name, ignore.case = TRUE)) %>% distinct(dx_name)

dx_name
<chr>
DKA (diabetic ketoacidosis) (CMS-HCC)
DKA (diabetic ketoacidoses)
"DKA, type 1 (CMS-HCC)"


In [48]:
# nrow(cohort %>% select(anon_id, pat_enc_csn_id_coded, admit_time) %>% 
#             left_join(dx, by=c('pat_enc_csn_id_coded' = "pat_enc_csn_id_jittered")) %>%
#             filter(dx_name %in% dka$dx_name) %>% distinct(pat_enc_csn_id_coded))

cohort_dka <-  cohort_labels %>% select(anon_id, pat_enc_csn_id_coded, admit_time) %>% 
                            left_join(dx, by=c('pat_enc_csn_id_coded' = "pat_enc_csn_id_jittered")) %>%
                            filter(dx_name %in% dka$dx_name) %>% distinct(pat_enc_csn_id_coded) # dx_name
nrow(cohort_dka) #726
head(cohort_dka)

pat_enc_csn_id_coded
<dbl>
131243371751
131203847107
131277704987
131254806278
131126251939
131242696174


In [49]:
# sanity check 60648 - 726 = 59922
length(unique(cohort_labels$pat_enc_csn_id_coded)) # 60648
cohort_no_dka <- anti_join(cohort_labels, cohort_dka) # 59922
length(unique(cohort_no_dka$pat_enc_csn_id_coded))

Joining, by = "pat_enc_csn_id_coded"



In [50]:
# no OR and no DKA (686 non overlapping patient visits)
length(unique(cohort_no_op$pat_enc_csn_id_coded)) #52532
cohort_no_op_no_dka <- anti_join(cohort_no_op, cohort_dka)
length(unique(cohort_no_op_no_dka$pat_enc_csn_id_coded)) #51846

Joining, by = "pat_enc_csn_id_coded"



In [29]:
head(cohort_no_op_no_dka, 1)

anon_id,pat_enc_csn_id_coded,admit_time,label_max3,label_3hr_recent,admit_label,has_admit_label,died_within_24hrs,death_3hr_max_label,death_3hr_recent_label,first_label,first_label_minutes_since_admit,acute_to_critical_label_recent_3hr,critical_to_acute_label_recent_3hr,acute_to_critical_label_max_3hr,⋯,label_12hr_recent,death_12hr_max_label,death_12hr_recent_label,acute_to_critical_label_recent_12hr,critical_to_acute_label_recent_12hr,acute_to_critical_label_max_12hr,critical_to_acute_label_max_12hr,label_max24,label_24hr_recent,death_24hr_max_label,death_24hr_recent_label,acute_to_critical_label_recent_24hr,critical_to_acute_label_recent_24hr,acute_to_critical_label_max_24hr,critical_to_acute_label_max_24hr
<chr>,<dbl>,<dttm>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
JC1000116,131066472308,2015-01-28 00:46:00,0,0,0,1,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [51]:
# sanity check to make sure no DKA in the cohort
left_join(cohort_no_op_no_dka, dx, by=c('pat_enc_csn_id_coded' = "pat_enc_csn_id_jittered")) %>% 
        filter(grepl("diabet|DKA", dx_name, ignore.case = TRUE),  
        grepl("ketoacidos|DKA", dx_name, ignore.case = TRUE)) %>% distinct(dx_name)

dx_name
<chr>


In [31]:
# cohort has labels within the first 3 hours, and no OR no DKA
write.csv(cohort_no_op_no_dka, file = file.path(outdir, "7_cohort4_3hr_labels_noOR_noDKA.csv"), row.names=FALSE) 