In [None]:
########################
# set working directory
setwd("/Users/luca/Projects/rl_sepsis/SEPSIS")

########################
# load libraries
library(tidyverse) # install.packages("tidyverse")

# Path to directory with data files
data_path <- 'ICV_data/'

########################################################################################################################
### Import all ICU addmissions with Inclusion SQL criteria applied

# FULL DATASET
D <- read_csv("ICV_data/admissions.csv")#,sep=",", header = TRUE)

# cleanup 
names(D)[1] <- "PatientID"

# class cleanup
D$start_time <- as.POSIXct(strptime(x = as.character(D$Real_AdmissionDateTime), format = "%Y-%m-%d %H:%M:%S"))
D$DischargeDate <- as.POSIXct(strptime(x = as.character(D$DischargeDate), format = "%Y-%m-%d %H:%M:%S"))
D$end_time <- as.POSIXct(NA)

# visual check
print(c(names(D)))
head(D)

In [None]:
########################################################################################################################
### Calculate SIRS components + final score

# SIRS TEMP
D$sirs_temp <- as.factor(rep(NA,nrow(D)))
levels(D$sirs_temp) <- c(0,1)
D$temp <- (D$first_Temp_value - 32)/1.8 
D$sirs_temp <- as.factor(as.integer((D$temp > 39 | D$temp < 36)))
levels(D$sirs_temp) <- c(0,1)

In [None]:
# SIRS Heartfrequency
D$sirs_hf <- as.factor(rep(NA,nrow(D)))
levels(D$sirs_hf) <- c(0,1)
D$sirs_hf <- as.factor(as.integer(D$first_hf_value > 90))
levels(D$sirs_hf) <- c(0,1)

In [None]:
# SIRS Respiratory
D$sirs_resp <- as.factor(rep(NA,nrow(D)))
levels(D$sirs_resp) <- c(0,1)
D$sirs_resp <- as.factor(as.integer(D$first_AF_value > 20))
levels(D$sirs_resp) <- c(0,1)

# SIRS Leucocytes
D$leuco <- D$first_leuco_value / 1000000
D$sirs_leuco <- as.factor(rep(NA,nrow(D)))
levels(D$sirs_leuco) <- c(0,1)
D$sirs_leuco <- as.factor(as.integer((D$leuco > 12 | D$leuco < 4)))
levels(D$sirs_leuco) <- c(0,1)

# final SIRS score
D$SIRS <- as.numeric(as.character(D$sirs_temp)) + as.numeric(as.character(D$sirs_hf)) + as.numeric(as.character(D$sirs_resp)) + as.numeric(as.character(D$sirs_leuco))

########################################################################################################################
### ADD FILTERS

# ADD COLUMN: Hours difference between Real_AdmissionDateTime and first_elevated_lactate_time MUST BE <<< 8H 
# column FILTER description: (any elevated lactate in first 8h of admission)
D$lactate_8h <- (D$Real_AdmissionDateTime - D$First_elevated_lactate_time_24h) < (8*60)
D$lactate_8h[D$lactate_8h %in% NA] <- FALSE

# ADD COLUMN: Hours difference between Real_AdmissionDateTime and nor_start_time_24h MUST BE <<< 8H 
# column FILTER description: (any noradrenaline started in first 8h of admission)
D$nor_8h <- (D$Real_AdmissionDateTime - D$nor_start_time_24h) < (8*60)
D$nor_8h[D$nor_8h %in% NA] <- FALSE


########################################################################################################################
### APPLY FILTER SELECTION (must have atleast a "real admission datetime: the first 4 columns can't have NULL's)

# MUST HAVE ANY AB STARTED WITHIN 24H OF ADMISSION
DD <- D[!D$First_AB_Name %in% NA & D$SIRS>1, ]

# MUST HAVE NORADRENALINE STARTED WITHIN 24H OR ANY ELEVATED LACTATE IN FIRST 8H
DDD <- DD[(!DD$nor_start_time_24h %in% NA) | (DD$lactate_8h %in% TRUE), ]

# final FILTERED dataset
final_df <- DDD

# visually inspect
head(final_df)

In [None]:
########################################################################################################################
### Create start and endtimes dataframe AND save to CSV

# DEFINE WINDOW PERIOD
hours_since_admission = 72 # let's also try: 7*24=168

# CREATE END_TIME: if the end of admission is before the desired end of window time: cut it (end_time) short
for (i in 1:nrow(final_df)) {
  if(final_df$DischargeDate[i] < final_df$start_time[i] + hours_since_admission*60*60) {     
    final_df$end_time[i] <- final_df$DischargeDate[i]   
  } else { 
    final_df$end_time[i] <- final_df$start_time[i] + hours_since_admission*60*60   
  }
}

# keep selection of columns
interval_times_columns <- c("PatientID", "start_time", "end_time")
final_df <- final_df[c(interval_times_columns)]

# save to new CSV
write.csv(final_df, file = "ICV_data/admissions_df.csv", row.names = FALSE)