### Imports


In [1]:
import os
os.chdir('../')

from src.data import constants
import pandas as pd
from pandas import read_csv
import datetime as dt
import numpy as np


### Read all data from local source

MIMIC-IV Dataset

In [2]:
## CORE
df_admissions = pd.read_csv(constants.admissions, dtype={'admission_location': 'object','deathtime': 'object','edouttime': 'object','edregtime': 'object'})
df_patients = pd.read_csv(constants.patients, dtype={'dod': 'object'})  
df_transfers = pd.read_csv(constants.transfers, dtype={'careunit': 'object'})


In [3]:
## ICU
df_d_items = pd.read_csv(constants.d_items)

df_chartevents = pd.read_csv(constants.chartevents, low_memory=False, dtype={'value': 'object', 'valueuom': 'object'}, nrows=20000000)


### Create dataframe for patient  10003700

Core

In [5]:
df_admissions = df_admissions[df_admissions["subject_id"]==11658675]
df_patients = df_patients[df_patients["subject_id"]==11658675]
df_transfers = df_transfers[df_transfers["subject_id"]==11658675]


ICU

In [6]:
df_chartevents = df_chartevents[df_chartevents["subject_id"]==11658675]

In [7]:
### Fix data type issues to allow for merging


df_admissions['admittime'] = pd.to_datetime(df_admissions['admittime'])
df_admissions['dischtime'] = pd.to_datetime(df_admissions['dischtime'])
df_admissions['deathtime'] = pd.to_datetime(df_admissions['deathtime'])
df_admissions['edregtime'] = pd.to_datetime(df_admissions['edregtime'])
df_admissions['edouttime'] = pd.to_datetime(df_admissions['edouttime'])

df_transfers['intime'] = pd.to_datetime(df_transfers['intime'])
df_transfers['outtime'] = pd.to_datetime(df_transfers['outtime'])

df_chartevents['charttime'] = pd.to_datetime(df_chartevents['charttime'])
df_chartevents['storetime'] = pd.to_datetime(df_chartevents['storetime'])

In [8]:
df_admissions.dtypes, df_transfers.dtypes, df_chartevents.dtypes


(subject_id                       int64
 hadm_id                          int64
 admittime               datetime64[ns]
 dischtime               datetime64[ns]
 deathtime               datetime64[ns]
 admission_type                  object
 admission_location              object
 discharge_location              object
 insurance                       object
 language                        object
 marital_status                  object
 ethnicity                       object
 edregtime               datetime64[ns]
 edouttime               datetime64[ns]
 hospital_expire_flag             int64
 dtype: object,
 subject_id              int64
 hadm_id               float64
 transfer_id             int64
 eventtype              object
 careunit               object
 intime         datetime64[ns]
 outtime        datetime64[ns]
 dtype: object,
 subject_id             int64
 hadm_id                int64
 stay_id                int64
 charttime     datetime64[ns]
 storetime     datetime64[ns]
 

In [10]:
event_list = ['Heart Rate','Non Invasive Blood Pressure systolic',
                    'Non Invasive Blood Pressure diastolic', 'Non Invasive Blood Pressure mean', 
                    'Respiratory Rate','O2 saturation pulseoxymetry', 
                    'GCS - Verbal Response', 'GCS - Eye Opening', 'GCS - Motor Response'] 

In [9]:
df_chartevents

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
2230149,11658675,23997967,39582364,2152-09-13 11:19:00,2152-09-13 11:27:00,228236,0,0.0,,0
2230150,11658675,23997967,39582364,2152-09-13 12:00:00,2152-09-13 12:57:00,223761,98.9,98.9,°F,0
2230151,11658675,23997967,39582364,2152-09-13 12:30:00,2152-09-13 14:01:00,220046,120,120.0,bpm,0
2230152,11658675,23997967,39582364,2152-09-13 12:30:00,2152-09-13 14:01:00,220047,60,60.0,bpm,0
2230153,11658675,23997967,39582364,2152-09-13 12:30:00,2152-09-13 14:01:00,223751,160,160.0,mmHg,0
...,...,...,...,...,...,...,...,...,...,...
17700831,11658675,25483847,39768846,2157-10-22 16:00:00,2157-10-22 15:53:00,229381,Name,1.0,,0
17700832,11658675,25483847,39768846,2157-10-22 17:00:00,2157-10-22 17:36:00,220048,SR (Sinus Rhythm),,,0
17700833,11658675,25483847,39768846,2157-10-22 17:00:00,2157-10-22 17:36:00,224650,,,,0
17700834,11658675,25483847,39768846,2157-10-22 18:00:00,2157-10-22 17:43:00,227570,Albuterol 0.083% unit,,,0


In [11]:
len(df_chartevents["itemid"].unique())

522

In [12]:
df_d_items

Unnamed: 0,itemid,label,abbreviation,linksto,category,unitname,param_type,lownormalvalue,highnormalvalue
0,220003,ICU Admission date,ICU Admission date,datetimeevents,ADT,,Date and time,,
1,220045,Heart Rate,HR,chartevents,Routine Vital Signs,bpm,Numeric,,
2,220046,Heart rate Alarm - High,HR Alarm - High,chartevents,Alarms,bpm,Numeric,,
3,220047,Heart Rate Alarm - Low,HR Alarm - Low,chartevents,Alarms,bpm,Numeric,,
4,220048,Heart Rhythm,Heart Rhythm,chartevents,Routine Vital Signs,,Text,,
...,...,...,...,...,...,...,...,...,...
3856,229355,Absolute Neutrophil Count,Absolute Neutrophil Count,chartevents,Labs,,Numeric,,
3857,229453,Exam-GI/GU,Exam-GI/GU,chartevents,MD Progress Note,,Text,,
3858,229604,Therapeutic Bed,Therapeutic Bed,chartevents,Treatments,,Text,,
3859,229709,Angiotensin II (Giapreza),Angiotensin II (Giapreza),inputevents,Medications,mg,Solution,,


In [13]:
df_d_items[df_d_items["label"].isin(event_list)]

Unnamed: 0,itemid,label,abbreviation,linksto,category,unitname,param_type,lownormalvalue,highnormalvalue
1,220045,Heart Rate,HR,chartevents,Routine Vital Signs,bpm,Numeric,,
22,220179,Non Invasive Blood Pressure systolic,NBPs,chartevents,Routine Vital Signs,mmHg,Numeric,,
23,220180,Non Invasive Blood Pressure diastolic,NBPd,chartevents,Routine Vital Signs,mmHg,Numeric,,
24,220181,Non Invasive Blood Pressure mean,NBPm,chartevents,Routine Vital Signs,mmHg,Numeric,,
26,220210,Respiratory Rate,RR,chartevents,Respiratory,insp/min,Numeric,,
33,220277,O2 saturation pulseoxymetry,SpO2,chartevents,Respiratory,%,Numeric,,
63,220739,GCS - Eye Opening,Eye Opening,chartevents,Neurological,,Text,,
285,223900,GCS - Verbal Response,Verbal Response,chartevents,Neurological,,Text,,
286,223901,GCS - Motor Response,Motor Response,chartevents,Neurological,,Text,,


In [15]:
df_d_items_chart = df_d_items[df_d_items['label'].isin(event_list)]

df_d_items_chart = df_d_items_chart[["category","label","itemid"]]

df_d_items_chart

Unnamed: 0,category,label,itemid
1,Routine Vital Signs,Heart Rate,220045
22,Routine Vital Signs,Non Invasive Blood Pressure systolic,220179
23,Routine Vital Signs,Non Invasive Blood Pressure diastolic,220180
24,Routine Vital Signs,Non Invasive Blood Pressure mean,220181
26,Respiratory,Respiratory Rate,220210
33,Respiratory,O2 saturation pulseoxymetry,220277
63,Neurological,GCS - Eye Opening,220739
285,Neurological,GCS - Verbal Response,223900
286,Neurological,GCS - Motor Response,223901


In [19]:
df_d_items_chart['itemid']

1      220045
22     220179
23     220180
24     220181
26     220210
33     220277
63     220739
285    223900
286    223901
Name: itemid, dtype: int64

In [17]:
df_chartevents = df_chartevents[df_chartevents["itemid"].isin(df_d_items_chart['itemid'])]

In [18]:
df_chartevents

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
2230163,11658675,23997967,39582364,2152-09-13 12:37:00,2152-09-13 12:57:00,220045,87,87.0,bpm,0
2230164,11658675,23997967,39582364,2152-09-13 12:37:00,2152-09-13 12:57:00,220210,15,15.0,insp/min,0
2230165,11658675,23997967,39582364,2152-09-13 12:38:00,2152-09-13 12:57:00,220277,92,92.0,%,0
2230166,11658675,23997967,39582364,2152-09-13 12:41:00,2152-09-13 12:57:00,220179,108,108.0,mmHg,0
2230167,11658675,23997967,39582364,2152-09-13 12:41:00,2152-09-13 12:57:00,220180,54,54.0,mmHg,0
...,...,...,...,...,...,...,...,...,...,...
17700625,11658675,25483847,39768846,2157-10-22 12:00:00,2157-10-22 13:43:00,223900,Confused,4.0,,0
17700626,11658675,25483847,39768846,2157-10-22 12:00:00,2157-10-22 13:43:00,223901,Obeys Commands,6.0,,0
17700754,11658675,25483847,39768846,2157-10-22 16:00:00,2157-10-22 15:53:00,220739,To Speech,3.0,,0
17700757,11658675,25483847,39768846,2157-10-22 16:00:00,2157-10-22 15:53:00,223900,Confused,4.0,,0


In [20]:
df_chartevents["itemid"].unique()

array([220045, 220210, 220277, 220179, 220180, 220181, 220739, 223900,
       223901])

In [21]:
df_chartevents["valueuom"].fillna('', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chartevents["valueuom"].fillna('', inplace=True)


In [22]:
df_chartevents

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
2230163,11658675,23997967,39582364,2152-09-13 12:37:00,2152-09-13 12:57:00,220045,87,87.0,bpm,0
2230164,11658675,23997967,39582364,2152-09-13 12:37:00,2152-09-13 12:57:00,220210,15,15.0,insp/min,0
2230165,11658675,23997967,39582364,2152-09-13 12:38:00,2152-09-13 12:57:00,220277,92,92.0,%,0
2230166,11658675,23997967,39582364,2152-09-13 12:41:00,2152-09-13 12:57:00,220179,108,108.0,mmHg,0
2230167,11658675,23997967,39582364,2152-09-13 12:41:00,2152-09-13 12:57:00,220180,54,54.0,mmHg,0
...,...,...,...,...,...,...,...,...,...,...
17700625,11658675,25483847,39768846,2157-10-22 12:00:00,2157-10-22 13:43:00,223900,Confused,4.0,,0
17700626,11658675,25483847,39768846,2157-10-22 12:00:00,2157-10-22 13:43:00,223901,Obeys Commands,6.0,,0
17700754,11658675,25483847,39768846,2157-10-22 16:00:00,2157-10-22 15:53:00,220739,To Speech,3.0,,0
17700757,11658675,25483847,39768846,2157-10-22 16:00:00,2157-10-22 15:53:00,223900,Confused,4.0,,0


In [23]:
df_chartevents_d_items_chart = df_chartevents.merge(df_d_items_chart, on=("itemid"))

In [24]:
df_chartevents_d_items_chart

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valuenum,valueuom,warning,category,label
0,11658675,23997967,39582364,2152-09-13 12:37:00,2152-09-13 12:57:00,220045,87,87.0,bpm,0,Routine Vital Signs,Heart Rate
1,11658675,23997967,39582364,2152-09-13 13:00:00,2152-09-13 14:01:00,220045,79,79.0,bpm,0,Routine Vital Signs,Heart Rate
2,11658675,23997967,39582364,2152-09-13 14:00:00,2152-09-13 14:01:00,220045,79,79.0,bpm,0,Routine Vital Signs,Heart Rate
3,11658675,23997967,39582364,2152-09-13 15:00:00,2152-09-13 15:02:00,220045,74,74.0,bpm,0,Routine Vital Signs,Heart Rate
4,11658675,23997967,39582364,2152-09-13 16:00:00,2152-09-13 16:17:00,220045,79,79.0,bpm,0,Routine Vital Signs,Heart Rate
...,...,...,...,...,...,...,...,...,...,...,...,...
4404,11658675,25483847,39768846,2157-10-21 23:00:00,2157-10-21 23:35:00,223901,Localizes Pain,5.0,,0,Neurological,GCS - Motor Response
4405,11658675,25483847,39768846,2157-10-22 04:00:00,2157-10-22 05:54:00,223901,Localizes Pain,5.0,,0,Neurological,GCS - Motor Response
4406,11658675,25483847,39768846,2157-10-22 08:00:00,2157-10-22 11:50:00,223901,Obeys Commands,6.0,,0,Neurological,GCS - Motor Response
4407,11658675,25483847,39768846,2157-10-22 12:00:00,2157-10-22 13:43:00,223901,Obeys Commands,6.0,,0,Neurological,GCS - Motor Response


In [46]:
df_chartevents_d_items_chart['valuenum']
df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']=='Heart Rate']['valuenum']

#.isin(df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']]=='Heart Rate')


0      87.0
1      79.0
2      79.0
3      74.0
4      79.0
       ... 
682    68.0
683    66.0
684    78.0
685    81.0
686    81.0
Name: valuenum, Length: 687, dtype: float64

In [72]:
# create a new table with additional columns with label list  
df1 = df_chartevents[['subject_id', 'hadm_id', 'stay_id', 'charttime']] 
for event in event_list:    
    df1[event] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1[event] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1[event] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1[event] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation

In [73]:
df1

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,Heart Rate,Non Invasive Blood Pressure systolic,Non Invasive Blood Pressure diastolic,Non Invasive Blood Pressure mean,Respiratory Rate,O2 saturation pulseoxymetry,GCS - Verbal Response,GCS - Eye Opening,GCS - Motor Response
2230163,11658675,23997967,39582364,2152-09-13 12:37:00,0,0,0,0,0,0,0,0,0
2230164,11658675,23997967,39582364,2152-09-13 12:37:00,0,0,0,0,0,0,0,0,0
2230165,11658675,23997967,39582364,2152-09-13 12:38:00,0,0,0,0,0,0,0,0,0
2230166,11658675,23997967,39582364,2152-09-13 12:41:00,0,0,0,0,0,0,0,0,0
2230167,11658675,23997967,39582364,2152-09-13 12:41:00,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17700625,11658675,25483847,39768846,2157-10-22 12:00:00,0,0,0,0,0,0,0,0,0
17700626,11658675,25483847,39768846,2157-10-22 12:00:00,0,0,0,0,0,0,0,0,0
17700754,11658675,25483847,39768846,2157-10-22 16:00:00,0,0,0,0,0,0,0,0,0
17700757,11658675,25483847,39768846,2157-10-22 16:00:00,0,0,0,0,0,0,0,0,0


In [74]:
df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']=='Heart Rate']['valuenum']


0      87.0
1      79.0
2      79.0
3      74.0
4      79.0
       ... 
682    68.0
683    66.0
684    78.0
685    81.0
686    81.0
Name: valuenum, Length: 687, dtype: float64

In [75]:
for i in range(len(df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[0]]['valuenum']
)):
    df1[event_list[0]][df1.index[i]] = df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[0]]['valuenum'][i]

    
'''for i in range(len(df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[1]]['valuenum']
)):
    df1[event_list[1]][df1.index[i]] = df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[0]]['valuenum'][i]

for i in range(len(df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[2]]['valuenum']
)):
    df1[event_list[2]][df1.index[i]] = df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[0]]['valuenum'][i]

for i in range(len(df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[3]]['valuenum']
)):
    df1[event_list[3]][df1.index[i]] = df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[0]]['valuenum'][i]

for i in range(len(df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[4]]['valuenum']
)):
    df1[event_list[4]][df1.index[i]] = df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[0]]['valuenum'][i]

for i in range(len(df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[5]]['valuenum']
)):
    df1[event_list[5]][df1.index[i]] = df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[0]]['valuenum'][i]

for i in range(len(df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[6]]['valuenum']
)):
    df1[event_list[6]][df1.index[i]] = df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[0]]['valuenum'][i]

for i in range(len(df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[7]]['valuenum']
)):
    df1[event_list[7]][df1.index[i]] = df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[0]]['valuenum'][i]

for i in range(len(df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[8]]['valuenum']
)):
    df1[event_list[8]][df1.index[i]] = df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[0]]['valuenum'][i]

    '''
    


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1[event_list[0]][df1.index[i]] = df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[0]]['valuenum'][i]


"for i in range(len(df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[1]]['valuenum']\n)):\n    df1[event_list[1]][df1.index[i]] = df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[0]]['valuenum'][i]\n\nfor i in range(len(df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[2]]['valuenum']\n)):\n    df1[event_list[2]][df1.index[i]] = df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[0]]['valuenum'][i]\n\nfor i in range(len(df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[3]]['valuenum']\n)):\n    df1[event_list[3]][df1.index[i]] = df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[0]]['valuenum'][i]\n\nfor i in range(len(df_chartevents_d_items_chart[df_chartevents_d_items_chart['label']==event_list[4]]['valuenum']\n)):\n    df1[event_list[4]][df1.index[i]] = df_chartevents_d_items_chart[df_chartevents_d_items_chart['l

In [109]:
'''df1[event_list[0]][df1.index[0]] = df_chartevents_d_items_chart["valuenum"][0]

df1[event_list[1]][df1.index[1]] = df_chartevents_d_items_chart["valuenum"][1]

df1[event_list[2]][df1.index[2]] = df_chartevents_d_items_chart["valuenum"][2]
df1[event_list[2]][df1.index[3]] = df_chartevents_d_items_chart["valuenum"][3]
df1[event_list[2]][df1.index[4]] = df_chartevents_d_items_chart["valuenum"][4]
'''

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1[event_list[0]][df1.index[0]] = df_chartevents_d_items_chart["valuenum"][0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1[event_list[1]][df1.index[1]] = df_chartevents_d_items_chart["valuenum"][1]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1[event_list[2]][df1.index[2]] = df_chartevents_d_items_chart["valuenum"][2]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-d

In [76]:
df1

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,Heart Rate,Non Invasive Blood Pressure systolic,Non Invasive Blood Pressure diastolic,Non Invasive Blood Pressure mean,Respiratory Rate,O2 saturation pulseoxymetry,GCS - Verbal Response,GCS - Eye Opening,GCS - Motor Response
2230163,11658675,23997967,39582364,2152-09-13 12:37:00,87,0,0,0,0,0,0,0,0
2230164,11658675,23997967,39582364,2152-09-13 12:37:00,79,0,0,0,0,0,0,0,0
2230165,11658675,23997967,39582364,2152-09-13 12:38:00,79,0,0,0,0,0,0,0,0
2230166,11658675,23997967,39582364,2152-09-13 12:41:00,74,0,0,0,0,0,0,0,0
2230167,11658675,23997967,39582364,2152-09-13 12:41:00,79,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17700625,11658675,25483847,39768846,2157-10-22 12:00:00,0,0,0,0,0,0,0,0,0
17700626,11658675,25483847,39768846,2157-10-22 12:00:00,0,0,0,0,0,0,0,0,0
17700754,11658675,25483847,39768846,2157-10-22 16:00:00,0,0,0,0,0,0,0,0,0
17700757,11658675,25483847,39768846,2157-10-22 16:00:00,0,0,0,0,0,0,0,0,0


In [84]:
df1['stay_id'].unique()

array([39582364, 38325727, 31333062, 39305891, 38100706, 36219274,
       34670900, 34191025, 35495745, 33401284, 38378117, 34957756,
       37656629, 33662840, 33577025, 30327469, 38572232, 39768846])

In [77]:
fc_parameters = {"length": None,
                    "absolute_sum_of_changes": None, 
                    "maximum": None, 
                    "mean": None,
                    "mean_abs_change": None,
                    "mean_change": None,
                    "median": None,
                    "minimum": None,
                    "standard_deviation": None,
                    "variance": None,
                    "large_standard_deviation": [{"r": r * 0.2} for r in range(1, 5)],
                     
                     # Comment by Yu: don't think we need the 1 for quntile?
                    "quantile": [{"q": q} for q in [.25, .5, .75, 1]],
                    "linear_trend": [{"attr": "pvalue"}, {"attr": "rvalue"}, {"attr": "intercept"},{"attr": "slope"}, {"attr": "stderr"}]}

In [86]:
from tsfresh import extract_features

extracted_features = extract_features(df1[['subject_id','charttime','Heart Rate','Non Invasive Blood Pressure systolic','Non Invasive Blood Pressure diastolic','Non Invasive Blood Pressure mean','Respiratory Rate','O2 saturation pulseoxymetry','GCS - Verbal Response','GCS - Eye Opening','GCS - Motor Response']], column_id="subject_id", column_sort="charttime", default_fc_parameters=fc_parameters)


Feature Extraction: 100%|█████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 750.01it/s]


In [87]:
extracted_features

Unnamed: 0,Heart Rate__length,Heart Rate__absolute_sum_of_changes,Heart Rate__maximum,Heart Rate__mean,Heart Rate__mean_abs_change,Heart Rate__mean_change,Heart Rate__median,Heart Rate__minimum,Heart Rate__standard_deviation,Heart Rate__variance,...,Respiratory Rate__large_standard_deviation__r_0.8,Respiratory Rate__quantile__q_0.25,Respiratory Rate__quantile__q_0.5,Respiratory Rate__quantile__q_0.75,Respiratory Rate__quantile__q_1,"Respiratory Rate__linear_trend__attr_""pvalue""","Respiratory Rate__linear_trend__attr_""rvalue""","Respiratory Rate__linear_trend__attr_""intercept""","Respiratory Rate__linear_trend__attr_""slope""","Respiratory Rate__linear_trend__attr_""stderr"""
11658675,4409.0,8113.0,127.0,11.566115,1.840517,-0.019737,0.0,0.0,27.3777,749.538439,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [88]:
extracted_features.columns

Index(['Heart Rate__length', 'Heart Rate__absolute_sum_of_changes',
       'Heart Rate__maximum', 'Heart Rate__mean',
       'Heart Rate__mean_abs_change', 'Heart Rate__mean_change',
       'Heart Rate__median', 'Heart Rate__minimum',
       'Heart Rate__standard_deviation', 'Heart Rate__variance',
       ...
       'Respiratory Rate__large_standard_deviation__r_0.8',
       'Respiratory Rate__quantile__q_0.25',
       'Respiratory Rate__quantile__q_0.5',
       'Respiratory Rate__quantile__q_0.75', 'Respiratory Rate__quantile__q_1',
       'Respiratory Rate__linear_trend__attr_"pvalue"',
       'Respiratory Rate__linear_trend__attr_"rvalue"',
       'Respiratory Rate__linear_trend__attr_"intercept"',
       'Respiratory Rate__linear_trend__attr_"slope"',
       'Respiratory Rate__linear_trend__attr_"stderr"'],
      dtype='object', length=207)