In [1]:
import pandas as pd
from datetime import datetime
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [2]:
def side_by_side(*objs, **kwds):
    from pandas.io.formats.printing import adjoin
    space = kwds.get('space', 4)
    reprs = [repr(obj).split('\n') for obj in objs]
    print (adjoin(space, *reprs))
    print()
    return

## Load Data 

## 1.1 Heart Rate 

In [3]:
heart_rate_df = pd.read_csv("Data_3173_TS/heart_rate.csv")  

In [4]:
heart_rate_df.head()

Unnamed: 0,subject_id_x,HADM_ID,CHARTTIME,VALUENUM,subject_id_y,ADMITTIME,DISCHTIME,Dead
0,10824,143406,2126-07-01 06:00:00,145.0,10824,2126-06-12 22:08:00,2126-07-15 13:40:00,0
1,10824,143406,2126-07-01 07:00:00,136.0,10824,2126-06-12 22:08:00,2126-07-15 13:40:00,0
2,10824,143406,2126-07-03 16:00:00,147.0,10824,2126-06-12 22:08:00,2126-07-15 13:40:00,0
3,10824,143406,2126-06-18 16:00:00,153.0,10824,2126-06-12 22:08:00,2126-07-15 13:40:00,0
4,10824,143406,2126-06-18 17:00:00,151.0,10824,2126-06-12 22:08:00,2126-07-15 13:40:00,0


In [5]:
heart_rate_df['HADM_ID'].nunique()

3299

In [6]:
heart_rate_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1542782 entries, 0 to 1542781
Data columns (total 8 columns):
 #   Column        Non-Null Count    Dtype  
---  ------        --------------    -----  
 0   subject_id_x  1542782 non-null  int64  
 1   HADM_ID       1542782 non-null  int64  
 2   CHARTTIME     1542782 non-null  object 
 3   VALUENUM      1542367 non-null  float64
 4   subject_id_y  1542782 non-null  int64  
 5   ADMITTIME     1542782 non-null  object 
 6   DISCHTIME     1542782 non-null  object 
 7   Dead          1542782 non-null  int64  
dtypes: float64(1), int64(4), object(3)
memory usage: 94.2+ MB


In [7]:
start = datetime.now()
heart_rate_df['CHARTTIME'] = heart_rate_df['CHARTTIME'].astype('datetime64[ns]')
heart_rate_df['ADMITTIME'] = heart_rate_df['ADMITTIME'].astype('datetime64[ns]')

print("Computing time takes %s" %(datetime.now()-start))

Computing time takes 0:00:01.094173


In [8]:
## Sort dataframe 
heart_rate_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
heart_rate_df['row'] = heart_rate_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
heart_rate_1 = heart_rate_df[heart_rate_df.row == 1]

In [9]:
heart_rate_1 = heart_rate_df[heart_rate_df.row == 1]

In [12]:
heart_rate_1.head()

Unnamed: 0,subject_id_x,HADM_ID,CHARTTIME,VALUENUM,subject_id_y,ADMITTIME,DISCHTIME,Dead,row
196004,14509,100029,2185-04-17 11:00:00,154.0,14509,2185-04-17 11:50:00,2185-05-02 12:00:00,0,1
993739,677,100044,2195-08-12 15:13:00,154.0,677,2195-08-12 15:36:00,2195-09-14 10:50:00,0,1
1344058,7174,100062,2169-08-03 12:45:00,156.0,7174,2169-08-03 13:26:00,2169-08-10 12:30:00,0,1
960792,32597,100081,2156-11-17 15:00:00,165.0,32597,2156-11-17 15:25:00,2156-11-21 14:00:00,0,1
23930,11221,100096,2156-07-21 20:00:00,158.0,11221,2156-07-21 20:46:00,2156-09-03 16:45:00,0,1


In [13]:
heart_rate_1.columns 

Index(['subject_id_x', 'HADM_ID', 'CHARTTIME', 'VALUENUM', 'subject_id_y',
       'ADMITTIME', 'DISCHTIME', 'Dead', 'row'],
      dtype='object')

In [14]:
heart_rate_1['DIFF'] = heart_rate_1['CHARTTIME'] - heart_rate_1['ADMITTIME']

In [15]:
heart_rate_1.describe()

Unnamed: 0,subject_id_x,HADM_ID,VALUENUM,subject_id_y,Dead,row,DIFF
count,3299.0,3299.0,3294.0,3299.0,3299.0,3299.0,3299
mean,15437.648985,150056.039406,154.541591,15437.648985,0.015762,1.0,0 days 02:16:26.626250378
std,9141.38702,28901.234203,17.217494,9141.38702,0.124574,0.0,1 days 08:48:50.821757
min,10.0,100029.0,78.0,10.0,0.0,1.0,-1 days +14:15:00
25%,7541.0,125071.0,142.0,7541.0,0.0,1.0,-1 days +23:16:00
50%,15306.0,149769.0,156.0,15306.0,0.0,1.0,0 days 00:03:00
75%,23073.0,175310.0,166.0,23073.0,0.0,1.0,0 days 00:22:00
max,32806.0,199918.0,300.0,32806.0,1.0,1.0,55 days 07:13:00


In [16]:
heart_rate_1['TIME_INTERVAL'] = heart_rate_1['CHARTTIME'] - heart_rate_1['ADMITTIME']

In [17]:
heart_rate_1[(heart_rate_1.TIME_INTERVAL <= '1 days') & (heart_rate_1.TIME_INTERVAL >= '-1 days')]

Unnamed: 0,subject_id_x,HADM_ID,CHARTTIME,VALUENUM,subject_id_y,ADMITTIME,DISCHTIME,Dead,row,DIFF,TIME_INTERVAL
196004,14509,100029,2185-04-17 11:00:00,154.0,14509,2185-04-17 11:50:00,2185-05-02 12:00:00,0,1,-1 days +23:10:00,-1 days +23:10:00
993739,677,100044,2195-08-12 15:13:00,154.0,677,2195-08-12 15:36:00,2195-09-14 10:50:00,0,1,-1 days +23:37:00,-1 days +23:37:00
1344058,7174,100062,2169-08-03 12:45:00,156.0,7174,2169-08-03 13:26:00,2169-08-10 12:30:00,0,1,-1 days +23:19:00,-1 days +23:19:00
960792,32597,100081,2156-11-17 15:00:00,165.0,32597,2156-11-17 15:25:00,2156-11-21 14:00:00,0,1,-1 days +23:35:00,-1 days +23:35:00
23930,11221,100096,2156-07-21 20:00:00,158.0,11221,2156-07-21 20:46:00,2156-09-03 16:45:00,0,1,-1 days +23:14:00,-1 days +23:14:00
...,...,...,...,...,...,...,...,...,...,...,...
1203958,4440,199901,2173-04-05 20:00:00,145.0,4440,2173-04-05 19:40:00,2173-04-16 14:40:00,0,1,0 days 00:20:00,0 days 00:20:00
555361,21938,199904,2184-03-28 13:45:00,138.0,21938,2184-03-28 14:11:00,2184-04-14 16:30:00,0,1,-1 days +23:34:00,-1 days +23:34:00
162904,13756,199913,2197-12-13 16:15:00,167.0,13756,2197-12-13 15:44:00,2198-01-06 18:45:00,0,1,0 days 00:31:00,0 days 00:31:00
1149878,3077,199917,2113-01-30 04:00:00,178.0,3077,2113-01-30 02:55:00,2113-02-08 14:15:00,0,1,0 days 01:05:00,0 days 01:05:00


In [18]:
heart_rate_1.shape 

(3299, 11)

In [19]:
admissions = heart_rate_1['HADM_ID']
admissions = admissions.to_frame()

In [20]:
type(admissions)

pandas.core.frame.DataFrame

In [21]:
heart_rate_24 = heart_rate_1[(heart_rate_1.TIME_INTERVAL <= '1 days') & (heart_rate_1.TIME_INTERVAL >= '-1 days')]

In [23]:
heart_rate_24['HADM_ID'].nunique()

3233

In [22]:
admissions['HeartRate'] = admissions['HADM_ID'].isin(heart_rate_24['HADM_ID'])
admissions.head()

Unnamed: 0,HADM_ID,HeartRate
196004,100029,True
993739,100044,True
1344058,100062,True
960792,100081,True
23930,100096,True


In [65]:
admissions[admissions['HeartRate'] == True].shape

(3233, 22)

## 1.2 Resp Rate 

In [24]:
resp_rate_df = pd.read_csv("Data_3173_TS/resp_rate.csv")  

resp_rate_df['CHARTTIME'] = resp_rate_df['CHARTTIME'].astype('datetime64[ns]')
resp_rate_df['ADMITTIME'] = resp_rate_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
resp_rate_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
resp_rate_df['row'] = resp_rate_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
resp_rate_1 = resp_rate_df[resp_rate_df.row == 1]

resp_rate_1['TIME_INTERVAL'] = resp_rate_1['CHARTTIME'] - resp_rate_1['ADMITTIME'] 

resp_rate_24 = resp_rate_1[(resp_rate_1.TIME_INTERVAL <= '1 days') & (resp_rate_1.TIME_INTERVAL >= '-1 days')]

resp_rate_24.shape

(3221, 10)

In [25]:
admissions['RespRate'] = admissions['HADM_ID'].isin(resp_rate_24['HADM_ID'])
admissions.head()

Unnamed: 0,HADM_ID,HeartRate,RespRate
196004,100029,True,True
993739,100044,True,True
1344058,100062,True,True
960792,100081,True,True
23930,100096,True,True


## 1.3 SaO2 

In [26]:
sao2_df = pd.read_csv("Data_3173_TS/sao2.csv")  

sao2_df['CHARTTIME'] = sao2_df['CHARTTIME'].astype('datetime64[ns]')
sao2_df['ADMITTIME'] = sao2_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
sao2_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
sao2_df['row'] = sao2_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
sao2_1 = sao2_df[sao2_df.row == 1]

sao2_1['TIME_INTERVAL'] = sao2_1['CHARTTIME'] - sao2_1['ADMITTIME'] 

sao2_24 = sao2_1[(sao2_1.TIME_INTERVAL <= '1 days') & (sao2_1.TIME_INTERVAL >= '-1 days')]

sao2_24.shape

(3226, 10)

In [27]:
admissions['SaO2'] = admissions['HADM_ID'].isin(sao2_24['HADM_ID'])
admissions.head()

Unnamed: 0,HADM_ID,HeartRate,RespRate,SaO2
196004,100029,True,True,True
993739,100044,True,True,True
1344058,100062,True,True,True
960792,100081,True,True,True
23930,100096,True,True,True


## 1.4 HR Alarm High

In [28]:
hr_alarm_high_df = pd.read_csv("Data_3173_TS/hr_alarm_high.csv")  

hr_alarm_high_df['CHARTTIME'] = hr_alarm_high_df['CHARTTIME'].astype('datetime64[ns]')
hr_alarm_high_df['ADMITTIME'] = hr_alarm_high_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
hr_alarm_high_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
hr_alarm_high_df['row'] = hr_alarm_high_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
hr_alarm_high_1 = hr_alarm_high_df[hr_alarm_high_df.row == 1]

hr_alarm_high_1['TIME_INTERVAL'] = hr_alarm_high_1['CHARTTIME'] - hr_alarm_high_1['ADMITTIME'] 

hr_alarm_high_24 = hr_alarm_high_1[(hr_alarm_high_1.TIME_INTERVAL <= '1 days') & (hr_alarm_high_1.TIME_INTERVAL >= '-1 days')]

hr_alarm_high_24.shape

(3225, 10)

In [29]:
admissions['hrAlarmHigh'] = admissions['HADM_ID'].isin(hr_alarm_high_24['HADM_ID'])
admissions.head()

Unnamed: 0,HADM_ID,HeartRate,RespRate,SaO2,hrAlarmHigh
196004,100029,True,True,True,True
993739,100044,True,True,True,True
1344058,100062,True,True,True,True
960792,100081,True,True,True,True
23930,100096,True,True,True,True


## 1.5 HR Alarm Low 

In [30]:
hr_alarm_low_df = pd.read_csv("Data_3173_TS/hr_alarm_low.csv")  

hr_alarm_low_df['CHARTTIME'] = hr_alarm_low_df['CHARTTIME'].astype('datetime64[ns]')
hr_alarm_low_df['ADMITTIME'] = hr_alarm_low_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
hr_alarm_low_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
hr_alarm_low_df['row'] = hr_alarm_low_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
hr_alarm_low_1 = hr_alarm_low_df[hr_alarm_low_df.row == 1]

hr_alarm_low_1['TIME_INTERVAL'] = hr_alarm_low_1['CHARTTIME'] - hr_alarm_low_1['ADMITTIME'] 

hr_alarm_low_24 = hr_alarm_low_1[(hr_alarm_low_1.TIME_INTERVAL <= '1 days') & (hr_alarm_low_1.TIME_INTERVAL >= '-1 days')]

hr_alarm_low_24.shape

(3225, 10)

In [31]:
admissions['hrAlarmLow'] = admissions['HADM_ID'].isin(hr_alarm_low_24['HADM_ID'])
admissions.head()

Unnamed: 0,HADM_ID,HeartRate,RespRate,SaO2,hrAlarmHigh,hrAlarmLow
196004,100029,True,True,True,True,True
993739,100044,True,True,True,True,True
1344058,100062,True,True,True,True,True
960792,100081,True,True,True,True,True
23930,100096,True,True,True,True,True


## 1.6 SaO2 Alarm High 

In [32]:
sao2_alarm_high_df = pd.read_csv("Data_3173_TS/sao2_alarm_high.csv")  

sao2_alarm_high_df['CHARTTIME'] = sao2_alarm_high_df['CHARTTIME'].astype('datetime64[ns]')
sao2_alarm_high_df['ADMITTIME'] = sao2_alarm_high_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
sao2_alarm_high_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
sao2_alarm_high_df['row'] = sao2_alarm_high_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
sao2_alarm_high_1 = sao2_alarm_high_df[sao2_alarm_high_df.row == 1]

sao2_alarm_high_1['TIME_INTERVAL'] = sao2_alarm_high_1['CHARTTIME'] - sao2_alarm_high_1['ADMITTIME'] 

sao2_alarm_high_24 = sao2_alarm_high_1[(sao2_alarm_high_1.TIME_INTERVAL <= '1 days') & (sao2_alarm_high_1.TIME_INTERVAL >= '-1 days')]

sao2_alarm_high_24.shape

(3221, 10)

In [33]:
admissions['sao2AlarmHigh'] = admissions['HADM_ID'].isin(sao2_alarm_high_24['HADM_ID'])
admissions.head()

Unnamed: 0,HADM_ID,HeartRate,RespRate,SaO2,hrAlarmHigh,hrAlarmLow,sao2AlarmHigh
196004,100029,True,True,True,True,True,True
993739,100044,True,True,True,True,True,True
1344058,100062,True,True,True,True,True,True
960792,100081,True,True,True,True,True,True
23930,100096,True,True,True,True,True,True


## 1.7 SaO2 Alarm Low 

In [34]:
sao2_alarm_low_df = pd.read_csv("Data_3173_TS/sao2_alarm_low.csv")  

sao2_alarm_low_df['CHARTTIME'] = sao2_alarm_low_df['CHARTTIME'].astype('datetime64[ns]')
sao2_alarm_low_df['ADMITTIME'] = sao2_alarm_low_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
sao2_alarm_low_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
sao2_alarm_low_df['row'] = sao2_alarm_low_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
sao2_alarm_low_1 = sao2_alarm_low_df[sao2_alarm_low_df.row == 1]

sao2_alarm_low_1['TIME_INTERVAL'] = sao2_alarm_low_1['CHARTTIME'] - sao2_alarm_low_1['ADMITTIME'] 

sao2_alarm_low_24 = sao2_alarm_low_1[(sao2_alarm_low_1.TIME_INTERVAL <= '1 days') & (sao2_alarm_low_1.TIME_INTERVAL >= '-1 days')]

sao2_alarm_low_24.shape

(3221, 10)

In [35]:
admissions['sao2AlarmLow'] = admissions['HADM_ID'].isin(sao2_alarm_low_24['HADM_ID'])
admissions.head()

Unnamed: 0,HADM_ID,HeartRate,RespRate,SaO2,hrAlarmHigh,hrAlarmLow,sao2AlarmHigh,sao2AlarmLow
196004,100029,True,True,True,True,True,True,True
993739,100044,True,True,True,True,True,True,True
1344058,100062,True,True,True,True,True,True,True
960792,100081,True,True,True,True,True,True,True
23930,100096,True,True,True,True,True,True,True


## 1.8 Temperature 

In [36]:
temperature_df = pd.read_csv("Data_3173_TS/temperature.csv")  

temperature_df['CHARTTIME'] = temperature_df['CHARTTIME'].astype('datetime64[ns]')
temperature_df['ADMITTIME'] = temperature_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
temperature_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
temperature_df['row'] = temperature_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
temperature_1 = temperature_df[temperature_df.row == 1]

temperature_1['TIME_INTERVAL'] = temperature_1['CHARTTIME'] - temperature_1['ADMITTIME'] 

temperature_24 = temperature_1[(temperature_1.TIME_INTERVAL <= '1 days') & (temperature_1.TIME_INTERVAL >= '-1 days')]

temperature_24.shape

(3225, 10)

In [37]:
admissions['temperature'] = admissions['HADM_ID'].isin(temperature_24['HADM_ID'])
admissions.head()

Unnamed: 0,HADM_ID,HeartRate,RespRate,SaO2,hrAlarmHigh,hrAlarmLow,sao2AlarmHigh,sao2AlarmLow,temperature
196004,100029,True,True,True,True,True,True,True,True
993739,100044,True,True,True,True,True,True,True,True
1344058,100062,True,True,True,True,True,True,True,True
960792,100081,True,True,True,True,True,True,True,True
23930,100096,True,True,True,True,True,True,True,True


## 1.9 Skin Temperature 

In [38]:
skin_temperature_df = pd.read_csv("Data_3173_TS/skin_temperature.csv")  

skin_temperature_df['CHARTTIME'] = skin_temperature_df['CHARTTIME'].astype('datetime64[ns]')
skin_temperature_df['ADMITTIME'] = skin_temperature_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
skin_temperature_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
skin_temperature_df['row'] = skin_temperature_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
skin_temperature_1 = skin_temperature_df[skin_temperature_df.row == 1]

skin_temperature_1['TIME_INTERVAL'] = skin_temperature_1['CHARTTIME'] - skin_temperature_1['ADMITTIME'] 

skin_temperature_24 = skin_temperature_1[(skin_temperature_1.TIME_INTERVAL <= '1 days') & (skin_temperature_1.TIME_INTERVAL >= '-1 days')]

skin_temperature_24.shape

(3218, 10)

In [39]:
admissions['SkinTemperature'] = admissions['HADM_ID'].isin(skin_temperature_24['HADM_ID'])
admissions.head()

Unnamed: 0,HADM_ID,HeartRate,RespRate,SaO2,hrAlarmHigh,hrAlarmLow,sao2AlarmHigh,sao2AlarmLow,temperature,SkinTemperature
196004,100029,True,True,True,True,True,True,True,True,True
993739,100044,True,True,True,True,True,True,True,True,True
1344058,100062,True,True,True,True,True,True,True,True,True
960792,100081,True,True,True,True,True,True,True,True,True
23930,100096,True,True,True,True,True,True,True,True,True


## 1.10 BP Cuff Diastolic 

In [40]:
bp_cuff_diastolic_df = pd.read_csv("Data_3173_TS/bp_cuff_diastolic.csv")  

bp_cuff_diastolic_df['CHARTTIME'] = bp_cuff_diastolic_df['CHARTTIME'].astype('datetime64[ns]')
bp_cuff_diastolic_df['ADMITTIME'] = bp_cuff_diastolic_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
bp_cuff_diastolic_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
bp_cuff_diastolic_df['row'] = bp_cuff_diastolic_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
bp_cuff_diastolic_1 = bp_cuff_diastolic_df[bp_cuff_diastolic_df.row == 1]

bp_cuff_diastolic_1['TIME_INTERVAL'] = bp_cuff_diastolic_1['CHARTTIME'] - bp_cuff_diastolic_1['ADMITTIME'] 

bp_cuff_diastolic_24 = bp_cuff_diastolic_1[(bp_cuff_diastolic_1.TIME_INTERVAL <= '1 days') & (bp_cuff_diastolic_1.TIME_INTERVAL >= '-1 days')]

bp_cuff_diastolic_24.shape

(3228, 10)

In [41]:
admissions['BPCuffDiastolic'] = admissions['HADM_ID'].isin(bp_cuff_diastolic_24['HADM_ID'])
admissions.head()

Unnamed: 0,HADM_ID,HeartRate,RespRate,SaO2,hrAlarmHigh,hrAlarmLow,sao2AlarmHigh,sao2AlarmLow,temperature,SkinTemperature,BPCuffDiastolic
196004,100029,True,True,True,True,True,True,True,True,True,True
993739,100044,True,True,True,True,True,True,True,True,True,True
1344058,100062,True,True,True,True,True,True,True,True,True,True
960792,100081,True,True,True,True,True,True,True,True,True,True
23930,100096,True,True,True,True,True,True,True,True,True,True


## 1.11 BP Cuff Mean 

In [42]:
bp_cuff_mean_df = pd.read_csv("Data_3173_TS/bp_cuff_mean.csv")  

bp_cuff_mean_df['CHARTTIME'] = bp_cuff_mean_df['CHARTTIME'].astype('datetime64[ns]')
bp_cuff_mean_df['ADMITTIME'] = bp_cuff_mean_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
bp_cuff_mean_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
bp_cuff_mean_df['row'] = bp_cuff_mean_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
bp_cuff_mean_1 = bp_cuff_mean_df[bp_cuff_mean_df.row == 1]

bp_cuff_mean_1['TIME_INTERVAL'] = bp_cuff_mean_1['CHARTTIME'] - bp_cuff_mean_1['ADMITTIME'] 

bp_cuff_mean_24 = bp_cuff_mean_1[(bp_cuff_mean_1.TIME_INTERVAL <= '1 days') & (bp_cuff_mean_1.TIME_INTERVAL >= '-1 days')]

bp_cuff_mean_24.shape

(3228, 10)

In [43]:
admissions['BPCuffMean'] = admissions['HADM_ID'].isin(bp_cuff_mean_24['HADM_ID'])
admissions.head()

Unnamed: 0,HADM_ID,HeartRate,RespRate,SaO2,hrAlarmHigh,hrAlarmLow,sao2AlarmHigh,sao2AlarmLow,temperature,SkinTemperature,BPCuffDiastolic,BPCuffMean
196004,100029,True,True,True,True,True,True,True,True,True,True,True
993739,100044,True,True,True,True,True,True,True,True,True,True,True
1344058,100062,True,True,True,True,True,True,True,True,True,True,True
960792,100081,True,True,True,True,True,True,True,True,True,True,True
23930,100096,True,True,True,True,True,True,True,True,True,True,True


## 1.12 BP Cuff Systolic 

In [44]:
bp_cuff_systolic_df = pd.read_csv("Data_3173_TS/bp_cuff_systolic.csv")  

bp_cuff_systolic_df['CHARTTIME'] = bp_cuff_systolic_df['CHARTTIME'].astype('datetime64[ns]')
bp_cuff_systolic_df['ADMITTIME'] = bp_cuff_systolic_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
bp_cuff_systolic_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
bp_cuff_systolic_df['row'] = bp_cuff_systolic_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
bp_cuff_systolic_1 = bp_cuff_systolic_df[bp_cuff_systolic_df.row == 1]

bp_cuff_systolic_1['TIME_INTERVAL'] = bp_cuff_systolic_1['CHARTTIME'] - bp_cuff_systolic_1['ADMITTIME'] 

bp_cuff_systolic_24 = bp_cuff_systolic_1[(bp_cuff_systolic_1.TIME_INTERVAL <= '1 days') & (bp_cuff_systolic_1.TIME_INTERVAL >= '-1 days')]

bp_cuff_systolic_24.shape

(3228, 10)

In [45]:
admissions['BPCuffSystolic'] = admissions['HADM_ID'].isin(bp_cuff_systolic_24['HADM_ID'])
admissions.head()

Unnamed: 0,HADM_ID,HeartRate,RespRate,SaO2,hrAlarmHigh,hrAlarmLow,sao2AlarmHigh,sao2AlarmLow,temperature,SkinTemperature,BPCuffDiastolic,BPCuffMean,BPCuffSystolic
196004,100029,True,True,True,True,True,True,True,True,True,True,True,True
993739,100044,True,True,True,True,True,True,True,True,True,True,True,True
1344058,100062,True,True,True,True,True,True,True,True,True,True,True,True
960792,100081,True,True,True,True,True,True,True,True,True,True,True,True
23930,100096,True,True,True,True,True,True,True,True,True,True,True,True


## 1.13 Glucometer

In [46]:
glucometer_df = pd.read_csv("Data_3173_TS/glucometer.csv")  

glucometer_df['CHARTTIME'] = glucometer_df['CHARTTIME'].astype('datetime64[ns]')
glucometer_df['ADMITTIME'] = glucometer_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
glucometer_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
glucometer_df['row'] = glucometer_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
glucometer_1 = glucometer_df[glucometer_df.row == 1]

glucometer_1['TIME_INTERVAL'] = glucometer_1['CHARTTIME'] - glucometer_1['ADMITTIME'] 

glucometer_24 = glucometer_1[(glucometer_1.TIME_INTERVAL <= '1 days') & (glucometer_1.TIME_INTERVAL >= '-1 days')]

glucometer_24.shape

(3224, 10)

In [47]:
admissions['Glucometer'] = admissions['HADM_ID'].isin(glucometer_24['HADM_ID'])
admissions.head()

Unnamed: 0,HADM_ID,HeartRate,RespRate,SaO2,hrAlarmHigh,hrAlarmLow,sao2AlarmHigh,sao2AlarmLow,temperature,SkinTemperature,BPCuffDiastolic,BPCuffMean,BPCuffSystolic,Glucometer
196004,100029,True,True,True,True,True,True,True,True,True,True,True,True,True
993739,100044,True,True,True,True,True,True,True,True,True,True,True,True,True
1344058,100062,True,True,True,True,True,True,True,True,True,True,True,True,True
960792,100081,True,True,True,True,True,True,True,True,True,True,True,True,True
23930,100096,True,True,True,True,True,True,True,True,True,True,True,True,True


# Other Chart Events 

## 2.1 Birth Weight 

In [48]:
birth_weight_df = pd.read_csv("Data_3173_SV/birth_weight.csv")  

birth_weight_df['CHARTTIME'] = birth_weight_df['CHARTTIME'].astype('datetime64[ns]')
birth_weight_df['ADMITTIME'] = birth_weight_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
birth_weight_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
birth_weight_df['row'] = birth_weight_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
birth_weight_1 = birth_weight_df[birth_weight_df.row == 1]

birth_weight_1['TIME_INTERVAL'] = birth_weight_1['CHARTTIME'] - birth_weight_1['ADMITTIME'] 

birth_weight_24 = birth_weight_1[(birth_weight_1.TIME_INTERVAL <= '1 days') & (birth_weight_1.TIME_INTERVAL >= '-1 days')]

birth_weight_24.shape

(3216, 11)

In [49]:
admissions['BirthWeight'] = admissions['HADM_ID'].isin(birth_weight_1['HADM_ID'])
admissions.head()

Unnamed: 0,HADM_ID,HeartRate,RespRate,SaO2,hrAlarmHigh,hrAlarmLow,sao2AlarmHigh,sao2AlarmLow,temperature,SkinTemperature,BPCuffDiastolic,BPCuffMean,BPCuffSystolic,Glucometer,BirthWeight
196004,100029,True,True,True,True,True,True,True,True,True,True,True,True,True,True
993739,100044,True,True,True,True,True,True,True,True,True,True,True,True,True,True
1344058,100062,True,True,True,True,True,True,True,True,True,True,True,True,True,True
960792,100081,True,True,True,True,True,True,True,True,True,True,True,True,True,True
23930,100096,True,True,True,True,True,True,True,True,True,True,True,True,True,True


## 2.2 Head Circ 

In [50]:
head_circ_df = pd.read_csv("Data_3173_SV/head_circ.csv")  

head_circ_df['CHARTTIME'] = head_circ_df['CHARTTIME'].astype('datetime64[ns]')
head_circ_df['ADMITTIME'] = head_circ_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
head_circ_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
head_circ_df['row'] = head_circ_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
head_circ_1 = head_circ_df[head_circ_df.row == 1]

head_circ_1['TIME_INTERVAL'] = head_circ_1['CHARTTIME'] - head_circ_1['ADMITTIME'] 

head_circ_24 = head_circ_1[(head_circ_1.TIME_INTERVAL <= '10 days') & (head_circ_1.TIME_INTERVAL >= '-1 days')]

head_circ_24.shape

(3282, 10)

In [51]:
admissions['HeadCirc'] = admissions['HADM_ID'].isin(head_circ_1['HADM_ID'])
admissions.shape 

(3299, 16)

# Blood Test 

## 3.1 Bands 

In [52]:
bands_df = pd.read_csv("Data_3173_SV/bands.csv")  

bands_df['CHARTTIME'] = bands_df['CHARTTIME'].astype('datetime64[ns]')
bands_df['ADMITTIME'] = bands_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
bands_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
bands_df['row'] = bands_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
bands_1 = bands_df[bands_df.row == 1]

bands_1['TIME_INTERVAL'] = bands_1['CHARTTIME'] - bands_1['ADMITTIME'] 

bands_24 = bands_1[(bands_1.TIME_INTERVAL <= '1 days') & (bands_1.TIME_INTERVAL >= '-1 days')]

bands_24.shape

(3184, 10)

In [53]:
admissions['Bands'] = admissions['HADM_ID'].isin(bands_24['HADM_ID'])
admissions.shape 

(3299, 17)

## 3.2 Eosinophils

In [54]:
eosinophils_df = pd.read_csv("Data_3173_SV/eosinophils.csv")  

eosinophils_df['CHARTTIME'] = eosinophils_df['CHARTTIME'].astype('datetime64[ns]')
eosinophils_df['ADMITTIME'] = eosinophils_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
eosinophils_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
eosinophils_df['row'] = eosinophils_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
eosinophils_1 = eosinophils_df[eosinophils_df.row == 1]

eosinophils_1['TIME_INTERVAL'] = eosinophils_1['CHARTTIME'] - eosinophils_1['ADMITTIME'] 

eosinophils_24 = eosinophils_1[(eosinophils_1.TIME_INTERVAL <= '1 days') & (eosinophils_1.TIME_INTERVAL >= '-1 days')]

eosinophils_24.shape

(3192, 10)

In [55]:
admissions['Eosinophils'] = admissions['HADM_ID'].isin(eosinophils_24['HADM_ID'])
admissions.shape 

(3299, 18)

## 3.3 Lymphs

In [56]:
lymphs_df = pd.read_csv("Data_3173_SV/lymphs.csv")  

lymphs_df['CHARTTIME'] = lymphs_df['CHARTTIME'].astype('datetime64[ns]')
lymphs_df['ADMITTIME'] = lymphs_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
lymphs_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
lymphs_df['row'] = lymphs_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
lymphs_1 = lymphs_df[lymphs_df.row == 1]

lymphs_1['TIME_INTERVAL'] = lymphs_1['CHARTTIME'] - lymphs_1['ADMITTIME'] 

lymphs_24 = lymphs_1[(lymphs_1.TIME_INTERVAL <= '1 days') & (lymphs_1.TIME_INTERVAL >= '-1 days')]

lymphs_24.shape

(3192, 14)

In [57]:
admissions['Lymphs'] = admissions['HADM_ID'].isin(lymphs_24['HADM_ID'])
admissions.shape 

(3299, 19)

## 3.4 MONOs

In [58]:
monos_df = pd.read_csv("Data_3173_SV/monos.csv")  

monos_df['CHARTTIME'] = monos_df['CHARTTIME'].astype('datetime64[ns]')
monos_df['ADMITTIME'] = monos_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
monos_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
monos_df['row'] = monos_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
monos_1 = monos_df[monos_df.row == 1]

monos_1['TIME_INTERVAL'] = monos_1['CHARTTIME'] - monos_1['ADMITTIME'] 

monos_24 = monos_1[(monos_1.TIME_INTERVAL <= '1 days') & (monos_1.TIME_INTERVAL >= '-1 days')]

monos_24.shape

(3192, 10)

In [59]:
admissions['Monos'] = admissions['HADM_ID'].isin(monos_24['HADM_ID'])
admissions.shape 

(3299, 20)

## 3.5 NEUTs 

In [60]:
neuts_df = pd.read_csv("Data_3173_SV/neuts.csv")  

neuts_df['CHARTTIME'] = neuts_df['CHARTTIME'].astype('datetime64[ns]')
neuts_df['ADMITTIME'] = neuts_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
neuts_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
neuts_df['row'] = neuts_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
neuts_1 = neuts_df[neuts_df.row == 1]

neuts_1['TIME_INTERVAL'] = neuts_1['CHARTTIME'] - neuts_1['ADMITTIME'] 

neuts_24 = neuts_1[(neuts_1.TIME_INTERVAL <= '1 days') & (neuts_1.TIME_INTERVAL >= '-1 days')]

neuts_24.shape

(3192, 10)

In [61]:
admissions['Neuts'] = admissions['HADM_ID'].isin(neuts_24['HADM_ID'])
admissions.shape 

(3299, 21)

## 3.6 Platelet

In [62]:
platelet_df = pd.read_csv("Data_3173_SV/platelet.csv")  

platelet_df['CHARTTIME'] = platelet_df['CHARTTIME'].astype('datetime64[ns]')
platelet_df['ADMITTIME'] = platelet_df['ADMITTIME'].astype('datetime64[ns]')

## Sort dataframe 
platelet_df.sort_values(by=['HADM_ID', 'CHARTTIME'], inplace=True)  

## Giving row number to every row 
platelet_df['row'] = platelet_df.groupby(['HADM_ID']).cumcount()+1

## Take the first row 
platelet_1 = platelet_df[platelet_df.row == 1]

platelet_1['TIME_INTERVAL'] = platelet_1['CHARTTIME'] - platelet_1['ADMITTIME'] 

platelet_24 = platelet_1[(platelet_1.TIME_INTERVAL <= '1 days') & (platelet_1.TIME_INTERVAL >= '-1 days')]

platelet_24.shape

(3196, 11)

In [63]:
admissions['Platelet'] = admissions['HADM_ID'].isin(platelet_24['HADM_ID'])
admissions.shape 

(3299, 22)

In [64]:
admissions.to_csv("admissions_3K.csv", index=False)