This notebook is the illustration of how to extract Time Series features from the chartevents data for one single patient that has both icu and cxr data.

To obtain the list of all patients who have icu and cxr data, please run the following notebook: ```icu_cxr_patients.ipynb```

Imports

In [1]:
import os
os.chdir('../')

from src.data import constants
import pandas as pd
from pandas import read_csv
import datetime as dt
import numpy as np

import tsfresh
from tsfresh import extract_features


#### Chart events example

Read all data from chartevents and d_items

chartevents dataframe

In [14]:
df_chartevents =read_csv(constants.chartevents,low_memory=False, dtype={'value': 'object', 'valueuom': 'object'}, nrows=20000000)


d_items dataframe

In [15]:
df_d_items = pd.read_csv(constants.d_items)


In [16]:
df_d_items.head()

Unnamed: 0,itemid,label,abbreviation,linksto,category,unitname,param_type,lownormalvalue,highnormalvalue
0,220003,ICU Admission date,ICU Admission date,datetimeevents,ADT,,Date and time,,
1,220045,Heart Rate,HR,chartevents,Routine Vital Signs,bpm,Numeric,,
2,220046,Heart rate Alarm - High,HR Alarm - High,chartevents,Alarms,bpm,Numeric,,
3,220047,Heart Rate Alarm - Low,HR Alarm - Low,chartevents,Alarms,bpm,Numeric,,
4,220048,Heart Rhythm,Heart Rhythm,chartevents,Routine Vital Signs,,Text,,


Select chart data for subject_id_example

In [17]:
df_chartevents_subject_id_example = df_chartevents[df_chartevents["subject_id"] == constants.subject_id_example]

In [19]:
df_chartevents_subject_id_example

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
184093,10216097,23709960,38761080,2189-07-03 20:00:00,2189-07-03 21:05:00,220045,57,57.0,bpm,0
184094,10216097,23709960,38761080,2189-07-03 20:00:00,2189-07-03 21:05:00,220046,130,130.0,bpm,0
184095,10216097,23709960,38761080,2189-07-03 20:00:00,2189-07-03 21:05:00,220047,50,50.0,bpm,0
184096,10216097,23709960,38761080,2189-07-03 20:00:00,2189-07-03 21:06:00,220059,61,61.0,mmHg,0
184097,10216097,23709960,38761080,2189-07-03 20:00:00,2189-07-03 21:06:00,220060,23,23.0,mmHg,0
...,...,...,...,...,...,...,...,...,...,...
13255480,10216097,23709960,38761080,2189-07-10 16:25:00,2189-07-10 16:25:00,224011,Soft,,,0
13255481,10216097,23709960,38761080,2189-07-10 16:25:00,2189-07-10 16:25:00,224012,Commode,,,0
13255482,10216097,23709960,38761080,2189-07-10 16:25:00,2189-07-10 16:25:00,224794,Brown,,,0
13255483,10216097,23709960,38761080,2189-07-10 16:25:00,2189-07-10 16:25:00,228013,Grade 0,,,0


In [20]:
df_chartevents_subject_id_example[df_chartevents_subject_id_example["itemid"].isin\
                                  (df_d_items[df_d_items["label"] == constants.chart_event_list[0]]["itemid"])]

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
184093,10216097,23709960,38761080,2189-07-03 20:00:00,2189-07-03 21:05:00,220045,57,57.0,bpm,0
184128,10216097,23709960,38761080,2189-07-03 21:00:00,2189-07-03 21:06:00,220045,54,54.0,bpm,0
184146,10216097,23709960,38761080,2189-07-03 22:00:00,2189-07-03 22:33:00,220045,77,77.0,bpm,0
184152,10216097,23709960,38761080,2189-07-03 23:00:00,2189-07-03 23:13:00,220045,71,71.0,bpm,0
184158,10216097,23709960,38761080,2189-07-04 00:00:00,2189-07-04 00:01:00,220045,83,83.0,bpm,0
...,...,...,...,...,...,...,...,...,...,...
186120,10216097,23709960,38761080,2189-07-10 12:02:00,2189-07-10 12:02:00,220045,81,81.0,bpm,0
186124,10216097,23709960,38761080,2189-07-10 13:00:00,2189-07-10 14:01:00,220045,86,86.0,bpm,0
186131,10216097,23709960,38761080,2189-07-10 14:00:00,2189-07-10 14:01:00,220045,80,80.0,bpm,0
186137,10216097,23709960,38761080,2189-07-10 15:00:00,2189-07-10 15:29:00,220045,87,87.0,bpm,0


In [21]:
df_chartevents_subject_id_example = df_chartevents_subject_id_example[df_chartevents_subject_id_example["itemid"].isin\
                                  (df_d_items[df_d_items["label"] == constants.chart_event_list[0]]["itemid"])]

In [26]:
df_chartevents_subject_id_example

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valuenum,valueuom,warning,Heart Rate
184093,10216097,23709960,38761080,2189-07-03 20:00:00,2189-07-03 21:05:00,220045,57,57.0,bpm,0,57.0
184128,10216097,23709960,38761080,2189-07-03 21:00:00,2189-07-03 21:06:00,220045,54,54.0,bpm,0,54.0
184146,10216097,23709960,38761080,2189-07-03 22:00:00,2189-07-03 22:33:00,220045,77,77.0,bpm,0,77.0
184152,10216097,23709960,38761080,2189-07-03 23:00:00,2189-07-03 23:13:00,220045,71,71.0,bpm,0,71.0
184158,10216097,23709960,38761080,2189-07-04 00:00:00,2189-07-04 00:01:00,220045,83,83.0,bpm,0,83.0
...,...,...,...,...,...,...,...,...,...,...,...
186120,10216097,23709960,38761080,2189-07-10 12:02:00,2189-07-10 12:02:00,220045,81,81.0,bpm,0,81.0
186124,10216097,23709960,38761080,2189-07-10 13:00:00,2189-07-10 14:01:00,220045,86,86.0,bpm,0,86.0
186131,10216097,23709960,38761080,2189-07-10 14:00:00,2189-07-10 14:01:00,220045,80,80.0,bpm,0,80.0
186137,10216097,23709960,38761080,2189-07-10 15:00:00,2189-07-10 15:29:00,220045,87,87.0,bpm,0,87.0


In [27]:
df_chartevents_subject_id_example.dtypes

subject_id      int64
hadm_id         int64
stay_id         int64
charttime      object
storetime      object
itemid          int64
value          object
valuenum      float64
valueuom       object
Heart Rate    float64
dtype: object

In [23]:
df_chartevents_subject_id_example[constants.chart_event_list[0]] = df_chartevents_subject_id_example["valuenum"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chartevents_subject_id_example[constants.chart_event_list[0]] = df_chartevents_subject_id_example["valuenum"]


In [28]:
chart_series = df_chartevents_subject_id_example[["stay_id","charttime",constants.chart_event_list[0]]]

In [29]:
 fc_parameters = {"length": None,
                    "absolute_sum_of_changes": None, 
                    "maximum": None, 
                    "mean": None,
                    "mean_abs_change": None,
                    "mean_change": None,
                    "median": None,
                    "minimum": None,
                    "standard_deviation": None,
                    "variance": None,
                    "large_standard_deviation": [{"r": r * 0.2} for r in range(1, 5)],
                    "quantile": [{"q": q} for q in [.25, .5, .75, 1]],
                    "linear_trend": [{"attr": "pvalue"}, {"attr": "rvalue"}, {"attr": "intercept"},{"attr": "slope"}, {"attr": "stderr"}]}

In [34]:
chart_extracted_features = extract_features(chart_series, column_id="stay_id", column_sort="charttime", default_fc_parameters=fc_parameters)


Feature Extraction: 100%|███████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.78s/it]


In [35]:
chart_extracted_features

Unnamed: 0,Heart Rate__length,Heart Rate__absolute_sum_of_changes,Heart Rate__maximum,Heart Rate__mean,Heart Rate__mean_abs_change,Heart Rate__mean_change,Heart Rate__median,Heart Rate__minimum,Heart Rate__standard_deviation,Heart Rate__variance,...,Heart Rate__large_standard_deviation__r_0.8,Heart Rate__quantile__q_0.25,Heart Rate__quantile__q_0.5,Heart Rate__quantile__q_0.75,Heart Rate__quantile__q_1,"Heart Rate__linear_trend__attr_""pvalue""","Heart Rate__linear_trend__attr_""rvalue""","Heart Rate__linear_trend__attr_""intercept""","Heart Rate__linear_trend__attr_""slope""","Heart Rate__linear_trend__attr_""stderr"""
38761080,169.0,1400.0,118.0,86.863905,8.333333,0.166667,88.0,54.0,12.433461,154.590946,...,0.0,79.0,88.0,96.0,118.0,0.210892,0.096732,84.793039,0.024653,0.019629


#### Lab events example

Read all data from chartevents and d_items

labevents dataframe

In [2]:
df_labevents =read_csv(constants.labevents,low_memory=False, dtype={'storetime': 'object', 'value': 'object', 'valueuom': 'object', 'flag': 'object', 'priority': 'object', 'comments': 'object'}, nrows=20000000)


d_labitems dataframe

In [3]:
df_d_labitems = pd.read_csv(constants.d_labitems)


In [5]:
df_d_labitems

Unnamed: 0,itemid,label,fluid,category,loinc_code
0,51905,,Other Body Fluid,Chemistry,
1,51532,11-Deoxycorticosterone,Blood,Chemistry,
2,51957,17-Hydroxycorticosteroids,Urine,Chemistry,
3,51958,"17-Ketosteroids, Urine",Urine,Chemistry,
4,52068,24 Hr,Blood,Hematology,
...,...,...,...,...,...
1625,52017,"Zinc, Urine",Urine,Chemistry,
1626,52420,ZZDUMMY,Urine,Hematology,
1627,51771,,Blood,Chemistry,
1628,51955,,Stool,Chemistry,


Select lab data for subject_id_example

In [6]:
df_labevents_subject_id_example = df_labevents[df_labevents["subject_id"] == constants.subject_id_example]

In [8]:
df_labevents_subject_id_example

Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments
257948,2603420,10216097,,42503221,50861,2189-06-10 17:49:00,2189-06-10 23:10:00,22,22.0,IU/L,0.0,40.0,,STAT,
257949,2603421,10216097,,42503221,50863,2189-06-10 17:49:00,2189-06-10 23:10:00,259,259.0,IU/L,40.0,130.0,abnormal,STAT,
257950,2603422,10216097,,42503221,50868,2189-06-10 17:49:00,2189-06-10 18:52:00,19,19.0,mEq/L,8.0,20.0,,STAT,
257951,2603423,10216097,,42503221,50878,2189-06-10 17:49:00,2189-06-10 23:10:00,31,31.0,IU/L,0.0,40.0,,STAT,
257952,2603424,10216097,,42503221,50882,2189-06-10 17:49:00,2189-06-10 18:52:00,27,27.0,mEq/L,22.0,32.0,,STAT,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12743854,2604751,10216097,23709960.0,32602348,51275,2189-07-10 04:31:00,2189-07-10 05:30:00,100.9,100.9,sec,25.0,36.5,abnormal,ROUTINE,___
12748010,2603658,10216097,23709960.0,4901999,51275,2189-06-16 02:47:00,2189-06-16 03:37:00,142.4,142.4,sec,25.0,36.5,abnormal,ROUTINE,___
12751434,2604508,10216097,23709960.0,69110349,51275,2189-07-04 15:40:00,2189-07-04 16:21:00,131.9,131.9,sec,25.0,36.5,abnormal,ROUTINE,___
12752772,2604863,10216097,23709960.0,69505888,50893,2189-07-13 07:05:00,2189-07-13 10:07:00,0,0.0,mg/dL,8.4,10.3,abnormal,ROUTINE,___


In [9]:
df_labevents_subject_id_example = df_labevents_subject_id_example[df_labevents_subject_id_example["itemid"].isin\
                                  (df_d_labitems[df_d_labitems["label"] == constants.lab_event_list[0]]["itemid"])]

In [10]:
df_labevents_subject_id_example

Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments
258239,2603739,10216097,23709960.0,7802983,50809,2189-06-17 13:08:00,2189-06-17 13:11:00,154,154.0,mg/dL,70.0,105.0,abnormal,,
258881,2604482,10216097,23709960.0,33443350,50809,2189-07-04 12:38:00,2189-07-04 12:40:00,155,155.0,mg/dL,70.0,105.0,abnormal,,


In [11]:
df_labevents_subject_id_example.dtypes

labevent_id          int64
subject_id           int64
hadm_id            float64
specimen_id          int64
itemid               int64
charttime           object
storetime           object
value               object
valuenum           float64
valueuom            object
ref_range_lower    float64
ref_range_upper    float64
flag                object
priority            object
comments            object
dtype: object

In [12]:
df_labevents_subject_id_example[constants.lab_event_list[0]] = df_labevents_subject_id_example["valuenum"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_labevents_subject_id_example[constants.lab_event_list[0]] = df_labevents_subject_id_example["valuenum"]


In [13]:
df_labevents_subject_id_example

Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments,Glucose
258239,2603739,10216097,23709960.0,7802983,50809,2189-06-17 13:08:00,2189-06-17 13:11:00,154,154.0,mg/dL,70.0,105.0,abnormal,,,154.0
258881,2604482,10216097,23709960.0,33443350,50809,2189-07-04 12:38:00,2189-07-04 12:40:00,155,155.0,mg/dL,70.0,105.0,abnormal,,,155.0


In [33]:
lab_series = df_labevents_subject_id_example[["hadm_id","charttime",constants.lab_event_list[0]]]

In [36]:
lab_extracted_features = extract_features(lab_series, column_id="hadm_id", column_sort="charttime", default_fc_parameters=fc_parameters)


Feature Extraction: 100%|███████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.76s/it]


In [37]:
lab_extracted_features

Unnamed: 0,Glucose__length,Glucose__absolute_sum_of_changes,Glucose__maximum,Glucose__mean,Glucose__mean_abs_change,Glucose__mean_change,Glucose__median,Glucose__minimum,Glucose__standard_deviation,Glucose__variance,...,Glucose__large_standard_deviation__r_0.8,Glucose__quantile__q_0.25,Glucose__quantile__q_0.5,Glucose__quantile__q_0.75,Glucose__quantile__q_1,"Glucose__linear_trend__attr_""pvalue""","Glucose__linear_trend__attr_""rvalue""","Glucose__linear_trend__attr_""intercept""","Glucose__linear_trend__attr_""slope""","Glucose__linear_trend__attr_""stderr"""
23709960.0,2.0,1.0,155.0,154.5,1.0,1.0,154.5,154.0,0.5,0.25,...,0.0,154.25,154.5,154.75,155.0,0.0,1.0,154.0,1.0,0.0
