### Imports


In [1]:
import os
os.chdir('../')

from src.data import constants
import pandas as pd
from pandas import read_csv
import datetime as dt
import numpy as np


### Read all data from local source

MIMIC-IV Dataset

In [2]:
## CORE
df_admissions = pd.read_csv(constants.admissions, dtype={'admission_location': 'object','deathtime': 'object','edouttime': 'object','edregtime': 'object'})
df_patients = pd.read_csv(constants.patients, dtype={'dod': 'object'})  
df_transfers = pd.read_csv(constants.transfers, dtype={'careunit': 'object'})


In [3]:
## ICU
df_d_items = pd.read_csv(constants.d_items)

df_chartevents = pd.read_csv(constants.chartevents, low_memory=False, dtype={'value': 'object', 'valueuom': 'object'}, nrows=2000)


### Create dataframe for patient  10003700

Core

In [4]:
df_admissions = df_admissions[df_admissions["subject_id"]==10003700]
df_patients = df_patients[df_patients["subject_id"]==10003700]
df_transfers = df_transfers[df_transfers["subject_id"]==10003700]


ICU

In [5]:
df_chartevents = df_chartevents[df_chartevents["subject_id"]==10003700]

In [6]:
### Fix data type issues to allow for merging


df_admissions['admittime'] = pd.to_datetime(df_admissions['admittime'])
df_admissions['dischtime'] = pd.to_datetime(df_admissions['dischtime'])
df_admissions['deathtime'] = pd.to_datetime(df_admissions['deathtime'])
df_admissions['edregtime'] = pd.to_datetime(df_admissions['edregtime'])
df_admissions['edouttime'] = pd.to_datetime(df_admissions['edouttime'])

df_transfers['intime'] = pd.to_datetime(df_transfers['intime'])
df_transfers['outtime'] = pd.to_datetime(df_transfers['outtime'])

df_chartevents['charttime'] = pd.to_datetime(df_chartevents['charttime'])
df_chartevents['storetime'] = pd.to_datetime(df_chartevents['storetime'])

In [7]:
df_admissions.dtypes, df_transfers.dtypes, df_chartevents.dtypes


(subject_id                       int64
 hadm_id                          int64
 admittime               datetime64[ns]
 dischtime               datetime64[ns]
 deathtime               datetime64[ns]
 admission_type                  object
 admission_location              object
 discharge_location              object
 insurance                       object
 language                        object
 marital_status                  object
 ethnicity                       object
 edregtime               datetime64[ns]
 edouttime               datetime64[ns]
 hospital_expire_flag             int64
 dtype: object,
 subject_id              int64
 hadm_id               float64
 transfer_id             int64
 eventtype              object
 careunit               object
 intime         datetime64[ns]
 outtime        datetime64[ns]
 dtype: object,
 subject_id             int64
 hadm_id                int64
 stay_id                int64
 charttime     datetime64[ns]
 storetime     datetime64[ns]
 

In [8]:
df_chartevents

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
0,10003700,28623837,30600691,2165-04-24 05:10:00,2165-04-24 05:11:00,228236,0,0.0,,0
1,10003700,28623837,30600691,2165-04-24 05:12:00,2165-04-24 05:14:00,225067,0,0.0,,0
2,10003700,28623837,30600691,2165-04-24 05:12:00,2165-04-24 05:14:00,225070,1,1.0,,0
3,10003700,28623837,30600691,2165-04-24 05:12:00,2165-04-24 05:14:00,225076,1,1.0,,0
4,10003700,28623837,30600691,2165-04-24 05:12:00,2165-04-24 05:14:00,225078,1,1.0,,0
...,...,...,...,...,...,...,...,...,...,...
131,10003700,28623837,30600691,2165-04-24 08:00:00,2165-04-24 08:25:00,225187,1,1.0,,0
132,10003700,28623837,30600691,2165-04-24 08:00:00,2165-04-24 08:26:00,226137,0,0.0,,0
133,10003700,28623837,30600691,2165-04-24 08:00:00,2165-04-24 08:25:00,226169,1,1.0,,0
134,10003700,28623837,30600691,2165-04-24 08:00:00,2165-04-24 08:26:00,227367,1,1.0,,0


In [None]:
len(df_chartevents["itemid"].unique())

In [9]:
df_d_items[df_d_items["itemid"].isin([225187,225078,225117])]

Unnamed: 0,itemid,label,abbreviation,linksto,category,unitname,param_type,lownormalvalue,highnormalvalue
800,225078,Social work consult,Social work consult,chartevents,Adm History/FHPA,,Checkbox,,
819,225117,Unable to assess nutrition / education,Unable to assess nutrition / education,chartevents,Adm History/FHPA,,Checkbox,,
3527,225187,Back Care,Back Care,chartevents,Treatments,,Checkbox,,


In [10]:
type(df_d_items[df_d_items["itemid"]==225187]["itemid"])

pandas.core.series.Series

In [11]:
event_list = ['Social work consult','Unable to assess nutrition / education','Back Care'] 

df_d_items_chart = df_d_items[df_d_items['label'].isin(event_list)]

df_d_items_chart = df_d_items_chart[["category","label","itemid"]]

df_d_items_chart

Unnamed: 0,category,label,itemid
800,Adm History/FHPA,Social work consult,225078
819,Adm History/FHPA,Unable to assess nutrition / education,225117
3527,Treatments,Back Care,225187


In [12]:
df_chartevents = df_chartevents[df_chartevents["itemid"].isin([225187,225078,225117])]

In [13]:
df_chartevents

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
4,10003700,28623837,30600691,2165-04-24 05:12:00,2165-04-24 05:14:00,225078,1,1.0,,0
10,10003700,28623837,30600691,2165-04-24 05:12:00,2165-04-24 05:14:00,225117,1,1.0,,0
33,10003700,28623837,30600691,2165-04-24 05:42:00,2165-04-24 05:42:00,225187,1,1.0,,0
67,10003700,28623837,30600691,2165-04-24 06:02:00,2165-04-24 06:02:00,225187,1,1.0,,0
131,10003700,28623837,30600691,2165-04-24 08:00:00,2165-04-24 08:25:00,225187,1,1.0,,0


In [15]:
df_chartevents["valueuom"].fillna('', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)


In [16]:
df_chartevents

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
4,10003700,28623837,30600691,2165-04-24 05:12:00,2165-04-24 05:14:00,225078,1,1.0,,0
10,10003700,28623837,30600691,2165-04-24 05:12:00,2165-04-24 05:14:00,225117,1,1.0,,0
33,10003700,28623837,30600691,2165-04-24 05:42:00,2165-04-24 05:42:00,225187,1,1.0,,0
67,10003700,28623837,30600691,2165-04-24 06:02:00,2165-04-24 06:02:00,225187,1,1.0,,0
131,10003700,28623837,30600691,2165-04-24 08:00:00,2165-04-24 08:25:00,225187,1,1.0,,0


In [17]:
df_chartevents_d_items_chart = df_chartevents.merge(df_d_items_chart, on=("itemid"))

In [19]:
df_chartevents_d_items_chart

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valuenum,valueuom,warning,category,label
0,10003700,28623837,30600691,2165-04-24 05:12:00,2165-04-24 05:14:00,225078,1,1.0,,0,Adm History/FHPA,Social work consult
1,10003700,28623837,30600691,2165-04-24 05:12:00,2165-04-24 05:14:00,225117,1,1.0,,0,Adm History/FHPA,Unable to assess nutrition / education
2,10003700,28623837,30600691,2165-04-24 05:42:00,2165-04-24 05:42:00,225187,1,1.0,,0,Treatments,Back Care
3,10003700,28623837,30600691,2165-04-24 06:02:00,2165-04-24 06:02:00,225187,1,1.0,,0,Treatments,Back Care
4,10003700,28623837,30600691,2165-04-24 08:00:00,2165-04-24 08:25:00,225187,1,1.0,,0,Treatments,Back Care


In [88]:
# create a new table with additional columns with label list  
df1 = df_chartevents[['subject_id', 'hadm_id', 'stay_id', 'charttime']] 
for event in event_list:    
    df1[event] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1[event] = 0


In [110]:
df1

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,Social work consult,Unable to assess nutrition / education,Back Care
4,10003700,28623837,30600691,2165-04-24 05:12:00,1,0,0
10,10003700,28623837,30600691,2165-04-24 05:12:00,0,1,0
33,10003700,28623837,30600691,2165-04-24 05:42:00,0,0,1
67,10003700,28623837,30600691,2165-04-24 06:02:00,0,0,1
131,10003700,28623837,30600691,2165-04-24 08:00:00,0,0,1


In [58]:
for i in df_chartevents_d_items_chart["label"].index:
    print (df_chartevents_d_items_chart["label"][i],df_chartevents_d_items_chart["valuenum"][i])
    df1 

Social work consult 1.0
Unable to assess nutrition / education 1.0
Back Care 1.0
Back Care 1.0
Back Care 1.0


In [101]:
df_chartevents_d_items_chart["valuenum"][0]

1.0

0

In [109]:
df1[event_list[0]][df1.index[0]] = df_chartevents_d_items_chart["valuenum"][0]

df1[event_list[1]][df1.index[1]] = df_chartevents_d_items_chart["valuenum"][1]

df1[event_list[2]][df1.index[2]] = df_chartevents_d_items_chart["valuenum"][2]
df1[event_list[2]][df1.index[3]] = df_chartevents_d_items_chart["valuenum"][3]
df1[event_list[2]][df1.index[4]] = df_chartevents_d_items_chart["valuenum"][4]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1[event_list[0]][df1.index[0]] = df_chartevents_d_items_chart["valuenum"][0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1[event_list[1]][df1.index[1]] = df_chartevents_d_items_chart["valuenum"][1]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1[event_list[2]][df1.index[2]] = df_chartevents_d_items_chart["valuenum"][2]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-d

In [111]:
df1

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,Social work consult,Unable to assess nutrition / education,Back Care
4,10003700,28623837,30600691,2165-04-24 05:12:00,1,0,0
10,10003700,28623837,30600691,2165-04-24 05:12:00,0,1,0
33,10003700,28623837,30600691,2165-04-24 05:42:00,0,0,1
67,10003700,28623837,30600691,2165-04-24 06:02:00,0,0,1
131,10003700,28623837,30600691,2165-04-24 08:00:00,0,0,1


In [152]:
df1[["stay_id","charttime","Social work consult","Unable to assess nutrition / education","Back Care"]]

Unnamed: 0_level_0,stay_id,charttime,Social work consult,Unable to assess nutrition / education,Back Care
df1index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4,30600691,2165-04-24 05:12:00,1,0,0
10,30600691,2165-04-24 05:12:00,0,1,0
33,30600691,2165-04-24 05:42:00,0,0,1
67,30600691,2165-04-24 06:02:00,0,0,1
131,30600691,2165-04-24 08:00:00,0,0,1


In [147]:
fc_parameters = {"length": None,
                    "absolute_sum_of_changes": None, 
                    "maximum": None, 
                    "mean": None,
                    "mean_abs_change": None,
                    "mean_change": None,
                    "median": None,
                    "minimum": None,
                    "standard_deviation": None,
                    "variance": None,
                    "large_standard_deviation": [{"r": r * 0.2} for r in range(1, 5)],
                    "quantile": [{"q": q} for q in [.25, .5, .75, 1]],
                    "linear_trend": [{"attr": "pvalue"}, {"attr": "rvalue"}, {"attr": "intercept"},{"attr": "slope"}, {"attr": "stderr"}]}

In [153]:
from tsfresh import extract_features

extracted_features = extract_features(df1[["stay_id","charttime","Social work consult","Unable to assess nutrition / education","Back Care"]], column_id="stay_id", column_sort="charttime", default_fc_parameters=fc_parameters)

Feature Extraction: 100%|███████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.47it/s]


In [155]:
extracted_features.columns

Index(['Social work consult__length',
       'Social work consult__absolute_sum_of_changes',
       'Social work consult__maximum', 'Social work consult__mean',
       'Social work consult__mean_abs_change',
       'Social work consult__mean_change', 'Social work consult__median',
       'Social work consult__minimum',
       'Social work consult__standard_deviation',
       'Social work consult__variance',
       'Social work consult__large_standard_deviation__r_0.2',
       'Social work consult__large_standard_deviation__r_0.4',
       'Social work consult__large_standard_deviation__r_0.6000000000000001',
       'Social work consult__large_standard_deviation__r_0.8',
       'Social work consult__quantile__q_0.25',
       'Social work consult__quantile__q_0.5',
       'Social work consult__quantile__q_0.75',
       'Social work consult__quantile__q_1',
       'Social work consult__linear_trend__attr_"pvalue"',
       'Social work consult__linear_trend__attr_"rvalue"',
       'Social w