In [1]:
import pandas as pd
import numpy as np
import consts
pd.options.display.max_rows = 50
pd.options.display.max_columns = 100
INTERVAL = 12

In [2]:
inputevets_filtered = pd.read_csv("filtered\\input_events_filtered_by_subject_id_and_medicine.csv")
chartevents_filtered = pd.read_csv("filtered\\filtered_chartevents.csv")
icustays_filtered = pd.read_csv("filtered\\filtered_icustays.csv")
patients_filtered = pd.read_csv("filtered\\filtered_patients.csv")
procedureevents_filtered = pd.read_csv("filtered\\filtered_procedureevents.csv")
labevents_filtered = pd.read_csv("filtered\\filtered_labevents.csv")
transfers_filtered = pd.read_csv("filtered\\filtered_transfers.csv")
itemid = pd.read_csv("data\\icu\\d_items.csv")

  labevents_filtered = pd.read_csv("filtered\\filtered_labevents.csv")


In [3]:
## add_previous_events
def preceding_events(event_time, interval, subject_id, table, time_field, is_procedure=False):
    if is_procedure: 
        table[time_field[0]] = pd.to_datetime(table[time_field[0]])
        table[time_field[1]] = pd.to_datetime(table[time_field[1]])
    else:    
        table[time_field] = pd.to_datetime(table[time_field])
    event_time = pd.Timestamp(event_time)
    interval = pd.Timedelta(value=interval, unit="hours")
    events = table[table["subject_id"] == subject_id]
    if is_procedure: # labevents, chartsevents
        start, end = time_field[0], time_field[1]
        events = events[((events[end] >= event_time - interval) & (events[end] <= event_time)) | # finishes in the interval
                        ((events[end] >= event_time) & (events[start] <= event_time))] # starts before the event and ends after the event
    else: # procedure
        events = events[(events[time_field] >= event_time - interval) & (events[time_field] <= event_time)]
    return events


In [4]:
def window_statistics(events, type):
    if not events.empty:
        if type != "procedure":
            events_lst = events[["charttime", "value"]].apply(lambda row: (row["charttime"], row["value"]), axis=1).to_list()
        else:
            events_lst = events[["starttime", "endtime", "value"]].apply(lambda row: (row["starttime"], row["endtime"], row["value"]), axis=1).to_list()
    else:
        events_lst = np.nan
    return [events["value"].max(), 
            events["value"].min(), 
            events["value"].std(),
            events["value"].mean(),
            events["value"].count(),
            str(events_lst)]


In [5]:
WINDOWING_CONFIG={
  "hr": {
    "time_field":"charttime",
    "table": chartevents_filtered,
    "itemid": consts.HR
  },
  "bp": {
    "time_field":"charttime",
    "table": chartevents_filtered,
    "itemid": consts.BP
  },
  "lactate" :{
    "time_field":"charttime",
    "table":labevents_filtered,
    "itemid": consts.LABEVENTS_CODES
  },
  "procedure":{
    "time_field":("starttime","endtime"),
    "table": procedureevents_filtered,
    "itemid": consts.PROCEDURE_CODES
  }
}

def windowing_for_row(input_events_row, interval, type):
  subject_id = input_events_row["subject_id"]
  event_time = input_events_row["starttime"]
  config = WINDOWING_CONFIG[type]
  pe_chartevents =  preceding_events(event_time, interval, subject_id, config["table"], config["time_field"], type=="procedure")
  events = pe_chartevents[pe_chartevents["itemid"].isin(config["itemid"])]  
  return window_statistics(events, type)




In [6]:
input_small = inputevets_filtered[0:100]

In [7]:
def filter_short_stays_and_different_unit(inputevents, icustays_filtered):
    icustays_filtered = icustays_filtered[icustays_filtered["first_careunit"] == icustays_filtered["last_careunit"]]
    icustays_filtered = icustays_filtered[icustays_filtered["los"] >= consts.MINIMAL_LOS]
    inputevents = inputevents.merge(icustays_filtered[["stay_id", "first_careunit"]], left_on="stay_id", right_on="stay_id")
    return inputevents

In [8]:
def add_window_statistics(input_events, event_type):
    statistics = input_events.apply(lambda row:windowing_for_row(row, INTERVAL, event_type), axis=1, result_type="expand")
    statistics = statistics.rename(columns={
                                        0:f"{event_type}_max",
                                        1:f"{event_type}_min",
                                        2:f"{event_type}_std",
                                        3:f"{event_type}_mean",
                                        4:f"{event_type}_count",
                                        5:f"{event_type}_events_at_interval"})
    return pd.concat([input_events, statistics], axis=1)

In [9]:
def add_all_statistics(inputevents):
    inputevents = inputevents[["subject_id","hadm_id","stay_id","starttime","endtime","storetime","itemid","itemid_label","amount","amountuom","rate","rateuom","statusdescription","patientweight", "first_careunit"]]
    for info_type in ["bp", "hr", "lactate", "procedure"]:
        inputevents = add_window_statistics(inputevents, info_type)
    return inputevents

In [10]:
inputevents_filtered_by_los = filter_short_stays_and_different_unit(inputevets_filtered, icustays_filtered)
combined_df = add_all_statistics(inputevents_filtered_by_los)
combined_df.to_csv("combined_df_example.csv")