In [89]:
import pandas as pd
import datetime as datetime
import numpy as np
from dateutil.relativedelta import relativedelta
from sklearn.preprocessing import OneHotEncoder

In [154]:
del ptnt_demog

# MIMIC-III Critical Care Database

MIMIC-III (Medical Information Mart for Intensive Care III) is a large, freely-available database comprising deidentified health-related data associated with over forty thousand patients who stayed in critical care units of the Beth Israel Deaconess Medical Center between 2001 and 2012.

The database includes information such as demographics, vital sign measurements made at the bedside (~1 data point per hour), laboratory test results, procedures, medications, caregiver notes, imaging reports, and mortality (both in and out of hospital).

MIMIC supports a diverse range of analytic studies spanning epidemiology, clinical decision-rule improvement, and electronic tool development. It is notable for three factors:

it is freely available to researchers worldwide
it encompasses a diverse and very large population of ICU patients
it contains high temporal resolution data including lab results, electronic documentation, and bedside monitor trends and waveforms.

Citations: 
MIMIC-III, a freely accessible critical care database. Johnson AEW, Pollard TJ, Shen L, Lehman L, Feng M, Ghassemi M, Moody B, Szolovits P, Celi LA, and Mark RG. Scientific Data (2016). DOI: 10.1038/sdata.2016.35. Available at: http://www.nature.com/articles/sdata201635

Pollard, T. J. & Johnson, A. E. W. The MIMIC-III Clinical Database http://dx.doi.org/10.13026/C2XW26 (2016).



# IMPORTING DATA
The mimic III database was downloaded and reconstructed locally using posgresql. The database was managed graphically using Portico. 
A query was run on the mimic III database to generate demographic data and data concerning hospital and ICU stays for patients diagnosed with sepsis. The query was exported from Porticoto the file ADMISSIONS_ICUSTAY_SEPSIS.csv. The data was read into a pandas dataframe ptnt_demog 


In [155]:
ptnt_demog = pd.DataFrame.from_csv('ADMISSIONS_ICUSTAY_SEPSIS.csv')
ptnt_demog.head()

Unnamed: 0_level_0,gender,dob,dod,expire_flag,admittime,dischtime,deathtime,diagnosis,hospital_expire_flag,insurance,marital_status,ethnicity,first_careunit,last_careunit,intime,outtime
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
21,M,2047-04-04 00:00:00,2135-02-08 00:00:00,1,2135-01-30 20:50:00,2135-02-08 02:08:00,2135-02-08 02:08:00,SEPSIS,1,Medicare,MARRIED,WHITE,MICU,MICU,2135-01-30 20:53:34,2135-02-08 05:38:46
94,M,2101-09-20 00:00:00,2178-08-17 00:00:00,1,2176-02-25 16:49:00,2176-02-29 17:45:00,,SEPSIS,0,Medicare,MARRIED,ASIAN,CCU,CCU,2176-02-25 16:50:03,2176-02-26 19:11:57
157,M,2025-12-03 00:00:00,2106-08-25 00:00:00,1,2106-06-17 19:51:00,2106-06-24 17:50:00,,SEPSIS,0,Medicare,SINGLE,WHITE,MICU,MICU,2106-06-17 19:51:46,2106-06-21 22:43:29
166,F,1805-01-23 00:00:00,2105-01-24 00:00:00,1,2105-01-23 22:57:00,2105-01-24 03:48:00,2105-01-24 03:48:00,SEPSIS,1,Medicare,WIDOWED,WHITE,MICU,CCU,2105-01-23 23:00:10,2105-01-24 05:39:49
188,M,2105-05-18 00:00:00,2162-01-17 00:00:00,1,2157-03-07 11:08:00,2157-03-10 13:50:00,,SEPSIS,0,Private,MARRIED,WHITE,SICU,SICU,2157-03-07 11:08:51,2157-03-08 15:42:30


Date and time data imported in string format is converted to pandas.datetime objects

In [156]:
#transforms date time data imported in string format to pandas.datetime objects. 
dates_and_times = ['dob', 'dod', 'admittime', 'dischtime', 'deathtime', 'intime', 'outtime']
for thing in dates_and_times:
    ptnt_demog[thing] = pd.to_datetime(ptnt_demog[thing])
    
ptnt_demog.head()


Unnamed: 0_level_0,gender,dob,dod,expire_flag,admittime,dischtime,deathtime,diagnosis,hospital_expire_flag,insurance,marital_status,ethnicity,first_careunit,last_careunit,intime,outtime
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
21,M,2047-04-04,2135-02-08,1,2135-01-30 20:50:00,2135-02-08 02:08:00,2135-02-08 02:08:00,SEPSIS,1,Medicare,MARRIED,WHITE,MICU,MICU,2135-01-30 20:53:34,2135-02-08 05:38:46
94,M,2101-09-20,2178-08-17,1,2176-02-25 16:49:00,2176-02-29 17:45:00,NaT,SEPSIS,0,Medicare,MARRIED,ASIAN,CCU,CCU,2176-02-25 16:50:03,2176-02-26 19:11:57
157,M,2025-12-03,2106-08-25,1,2106-06-17 19:51:00,2106-06-24 17:50:00,NaT,SEPSIS,0,Medicare,SINGLE,WHITE,MICU,MICU,2106-06-17 19:51:46,2106-06-21 22:43:29
166,F,1805-01-23 00:00:00,2105-01-24,1,2105-01-23 22:57:00,2105-01-24 03:48:00,2105-01-24 03:48:00,SEPSIS,1,Medicare,WIDOWED,WHITE,MICU,CCU,2105-01-23 23:00:10,2105-01-24 05:39:49
188,M,2105-05-18,2162-01-17,1,2157-03-07 11:08:00,2157-03-10 13:50:00,NaT,SEPSIS,0,Private,MARRIED,WHITE,SICU,SICU,2157-03-07 11:08:51,2157-03-08 15:42:30


In [157]:
#for patient age, only the date information is needed so datetime data is converted to date
ptnt_demog['dob'] = ptnt_demog['dob'].apply(lambda x: x.date())
ptnt_demog['dod'] = ptnt_demog['dod'].apply(lambda x: x.date())
ptnt_demog[['dob', 'dod']].head()

Unnamed: 0_level_0,dob,dod
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1
21,2047-04-04,2135-02-08
94,2101-09-20,2178-08-17
157,2025-12-03,2106-08-25
166,1805-01-23,2105-01-24
188,2105-05-18,2162-01-17


In [171]:
#dates_and_times = ['dob', 'dod', 'admittime', 'dischtime', 'deathtime', 'intime', 'outtime']
#checking for null values in the data
for item in dates_and_times:
    isanan = ptnt_demog[item].isnull().values.any()
    print "{}   {}".format(item, isanan)

dob   False
dod   True
admittime   False
dischtime   False
deathtime   True
intime   False
outtime   False


In [179]:
#dod and deathtime are NaT for patients who did not die in hospital or who had not been confirmed dead
#at the time of the data collection. 
ptnt_demog[ptnt_demog['dod'].isnull()].head()

Unnamed: 0_level_0,gender,dob,dod,expire_flag,admittime,dischtime,deathtime,diagnosis,hospital_expire_flag,insurance,marital_status,ethnicity,first_careunit,last_careunit,intime,outtime
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
366,M,2112-05-22,NaT,0,2164-11-18 20:27:00,2164-11-22 15:18:00,NaT,SEPSIS,0,Medicare,SINGLE,HISPANIC OR LATINO,MICU,MICU,2164-11-18 20:28:22,2164-11-21 00:36:11
406,F,2058-01-29,NaT,0,2119-08-25 22:37:00,2119-09-01 18:00:00,NaT,SEPSIS,0,Medicaid,MARRIED,WHITE,MICU,MICU,2119-08-25 22:38:07,2119-08-28 18:40:47
406,F,2058-01-29,NaT,0,2126-03-11 23:06:00,2126-03-26 16:17:00,NaT,SEPSIS,0,Medicare,MARRIED,WHITE,MICU,CCU,2126-03-11 23:07:15,2126-03-18 22:17:14
406,F,2058-01-29,NaT,0,2120-09-17 05:03:00,2120-09-28 12:15:00,NaT,SEPSIS,0,Medicaid,MARRIED,WHITE,MICU,MICU,2120-09-17 05:04:59,2120-09-20 18:42:54
558,M,2149-10-22,NaT,0,2200-03-11 00:33:00,2200-03-14 15:39:00,NaT,SEPSIS,0,Private,MARRIED,WHITE,MICU,MICU,2200-03-11 00:34:54,2200-03-12 12:20:43


# Calculating Patient Age, Hospital and ICU Stays
The age of patient at the time of admission was calculated. Also calculated was the duration of 
hospital stay and duration of ICU stay. 
    

In [180]:
'''the code calculates age of patient at time of admission, hospital stay and ICU stay. 
    using relativedelta.years to get the age rounded to years,
    relativedelta.days to get hospital stay in days and relativedelta.hours
    to get icu stay in hours.
    '''
# age at time of admission is calculated as admittime - dob
for index, row in ptnt_demog.iterrows():
    age_val = relativedelta(row['admittime'], row['dob']).years
    ptnt_demog.set_value(index, 'age', age_val)
        
    #adm_stay in days
    adm_stay_val = relativedelta(row['dischtime'],row['admittime'])
    adm_stay_val = adm_stay_val.weeks*7 + adm_stay_val.days
    #icu_stay in hours
    icu_stay_val = relativedelta(row['outtime'],row['intime'])
    icu_stay_val = icu_stay_val.weeks*7*24 + icu_stay_val.days*24 + icu_stay_val.hours
    ptnt_demog.set_value(index, 'adm_stay_days', adm_stay_val)
    ptnt_demog.set_value(index, 'icu_stay_hours', icu_stay_val)
    
ptnt_demog.head()

Unnamed: 0_level_0,gender,dob,dod,expire_flag,admittime,dischtime,deathtime,diagnosis,hospital_expire_flag,insurance,marital_status,ethnicity,first_careunit,last_careunit,intime,outtime,age,adm_stay_days,icu_stay_hours
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
21,M,2047-04-04,2135-02-08,1,2135-01-30 20:50:00,2135-02-08 02:08:00,2135-02-08 02:08:00,SEPSIS,1,Medicare,MARRIED,WHITE,MICU,MICU,2135-01-30 20:53:34,2135-02-08 05:38:46,87.0,15.0,368.0
94,M,2101-09-20,2178-08-17,1,2176-02-25 16:49:00,2176-02-29 17:45:00,NaT,SEPSIS,0,Medicare,MARRIED,ASIAN,CCU,CCU,2176-02-25 16:50:03,2176-02-26 19:11:57,74.0,4.0,26.0
157,M,2025-12-03,2106-08-25,1,2106-06-17 19:51:00,2106-06-24 17:50:00,NaT,SEPSIS,0,Medicare,SINGLE,WHITE,MICU,MICU,2106-06-17 19:51:46,2106-06-21 22:43:29,80.0,6.0,98.0
166,F,1805-01-23,2105-01-24,1,2105-01-23 22:57:00,2105-01-24 03:48:00,2105-01-24 03:48:00,SEPSIS,1,Medicare,WIDOWED,WHITE,MICU,CCU,2105-01-23 23:00:10,2105-01-24 05:39:49,300.0,0.0,6.0
188,M,2105-05-18,2162-01-17,1,2157-03-07 11:08:00,2157-03-10 13:50:00,NaT,SEPSIS,0,Private,MARRIED,WHITE,SICU,SICU,2157-03-07 11:08:51,2157-03-08 15:42:30,51.0,3.0,28.0


In [145]:
#relativedelta(ptnt_demog.iloc[0]['dod'], ptnt_demog.iloc[0]['dob']).years
ptnt_demog.columns

Index([u'gender', u'dob', u'dod', u'expire_flag', u'admittime', u'dischtime',
       u'deathtime', u'diagnosis', u'hospital_expire_flag', u'insurance',
       u'marital_status', u'ethnicity', u'first_careunit', u'last_careunit',
       u'intime', u'outtime', u'age', u'adm_stay_days', u'icu_stay_hours'],
      dtype='object')

In [146]:
#rename column
#ptnt_demog.rename(index=str, columns={"adm_stay_days": "hosp_stay_days"})

# Reality Check on Ages and Durations of Stay
Checking durations of stay and ages on low end for values < 0. Checking ages on the high end at 110yrs. 
Because I'm not sure what an unreasonable durations of stay would be for ICU or hospital stay will look for 
outliers in the data using statistical analysis later in preprocessing. 


In [181]:
ptnt_demog[ptnt_demog['age'] <  0]

Unnamed: 0_level_0,gender,dob,dod,expire_flag,admittime,dischtime,deathtime,diagnosis,hospital_expire_flag,insurance,marital_status,ethnicity,first_careunit,last_careunit,intime,outtime,age,adm_stay_days,icu_stay_hours
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1


## There are approximately 93 patients whos calculated age is > 110. All are > 300yrs. 

In [200]:
age_replace_vals = list(ptnt_demog[ptnt_demog['age'] > 110]['age'].unique())


In [205]:
age_replace_vals

[300, 307.0, 302.0, 305.0, 306.0, 303.0, 301.0, 310.0]

In [206]:
ptnt_demog['age'].replace(age_replace_vals, np.nan, inplace = True)
ptnt_demog['age'].head()

subject_id
21     87.0
94     74.0
157    80.0
166     NaN
188    51.0
Name: age, dtype: float64

In [207]:
#checking again for age values > 110
ptnt_demog[ptnt_demog['age'] > 110]

Unnamed: 0_level_0,gender,dob,dod,expire_flag,admittime,dischtime,deathtime,diagnosis,hospital_expire_flag,insurance,marital_status,ethnicity,first_careunit,last_careunit,intime,outtime,age,adm_stay_days,icu_stay_hours
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1


In [208]:
ptnt_demog[ptnt_demog['adm_stay_days'] < 0]

Unnamed: 0_level_0,gender,dob,dod,expire_flag,admittime,dischtime,deathtime,diagnosis,hospital_expire_flag,insurance,marital_status,ethnicity,first_careunit,last_careunit,intime,outtime,age,adm_stay_days,icu_stay_hours
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1


In [209]:
ptnt_demog[ptnt_demog['adm_stay_days'] > 365]

Unnamed: 0_level_0,gender,dob,dod,expire_flag,admittime,dischtime,deathtime,diagnosis,hospital_expire_flag,insurance,marital_status,ethnicity,first_careunit,last_careunit,intime,outtime,age,adm_stay_days,icu_stay_hours
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1


In [149]:
#ptnt_demog[ptnt_demog[['icu_stay']]<datetime.timedelta(days =0)] = np.nan
ptnt_demog[ptnt_demog['icu_stay_hours'] < 0]

Unnamed: 0_level_0,gender,dob,dod,expire_flag,admittime,dischtime,deathtime,diagnosis,hospital_expire_flag,insurance,marital_status,ethnicity,first_careunit,last_careunit,intime,outtime,age,adm_stay_days,icu_stay_hours
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1


## Check for ICU stay longer than hospital stay 
shows that 347 patients had ICU stays longer than their hospital stays. 
Previously believed admittime and dischtime encompassed all of hospital stay including ICU
Need to review data descriptions to better understand the relationship


In [218]:
ptnt_demog[ptnt_demog['icu_stay_hours'] > (ptnt_demog['adm_stay_days']*24)].shape

(347, 19)

In [210]:
type(ptnt_demog.iloc[0]['age'])


numpy.float64

In [211]:
ptnt_demog.head()

Unnamed: 0_level_0,gender,dob,dod,expire_flag,admittime,dischtime,deathtime,diagnosis,hospital_expire_flag,insurance,marital_status,ethnicity,first_careunit,last_careunit,intime,outtime,age,adm_stay_days,icu_stay_hours
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
21,M,2047-04-04,2135-02-08,1,2135-01-30 20:50:00,2135-02-08 02:08:00,2135-02-08 02:08:00,SEPSIS,1,Medicare,MARRIED,WHITE,MICU,MICU,2135-01-30 20:53:34,2135-02-08 05:38:46,87.0,15.0,368.0
94,M,2101-09-20,2178-08-17,1,2176-02-25 16:49:00,2176-02-29 17:45:00,NaT,SEPSIS,0,Medicare,MARRIED,ASIAN,CCU,CCU,2176-02-25 16:50:03,2176-02-26 19:11:57,74.0,4.0,26.0
157,M,2025-12-03,2106-08-25,1,2106-06-17 19:51:00,2106-06-24 17:50:00,NaT,SEPSIS,0,Medicare,SINGLE,WHITE,MICU,MICU,2106-06-17 19:51:46,2106-06-21 22:43:29,80.0,6.0,98.0
166,F,1805-01-23,2105-01-24,1,2105-01-23 22:57:00,2105-01-24 03:48:00,2105-01-24 03:48:00,SEPSIS,1,Medicare,WIDOWED,WHITE,MICU,CCU,2105-01-23 23:00:10,2105-01-24 05:39:49,,0.0,6.0
188,M,2105-05-18,2162-01-17,1,2157-03-07 11:08:00,2157-03-10 13:50:00,NaT,SEPSIS,0,Private,MARRIED,WHITE,SICU,SICU,2157-03-07 11:08:51,2157-03-08 15:42:30,51.0,3.0,28.0


In [153]:
ptnt_demog.drop(['dob', 'expire_flag', 'dod', 'admittime', 'dischtime', 'deathtime', 'intime', 'outtime'],axis = 1, inplace = True)
ptnt_demog.head()

Unnamed: 0_level_0,gender,diagnosis,hospital_expire_flag,insurance,marital_status,ethnicity,first_careunit,last_careunit,age,adm_stay_days,icu_stay_hours
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
21,M,SEPSIS,1,Medicare,MARRIED,WHITE,MICU,MICU,87.0,15.0,368.0
94,M,SEPSIS,0,Medicare,MARRIED,ASIAN,CCU,CCU,76.0,4.0,26.0
157,M,SEPSIS,0,Medicare,SINGLE,WHITE,MICU,MICU,80.0,6.0,98.0
166,F,SEPSIS,1,Medicare,WIDOWED,WHITE,MICU,CCU,300.0,0.0,6.0
188,M,SEPSIS,0,Private,MARRIED,WHITE,SICU,SICU,56.0,3.0,28.0


In [25]:
ptnt_demog[(ptnt_demog['age'] < datetime.timedelta(days = 0))] 

Unnamed: 0_level_0,gender,diagnosis,hospital_expire_flag,insurance,marital_status,ethnicity,first_careunit,last_careunit,admission_stay,icu_stay,age,hosp_stay
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
166,F,SEPSIS,1,Medicare,WIDOWED,WHITE,MICU,CCU,0 days 04:51:00,0 days 06:39:39,-103930 days +00:25:26.290448,0 days 04:51:00
1621,F,SEPSIS,0,Medicare,WIDOWED,WHITE,SICU,MICU,38 days 06:51:00,5 days 12:50:14,-103843 days +00:25:26.290448,38 days 06:51:00
1621,F,SEPSIS,0,Medicare,WIDOWED,WHITE,MICU,MICU,38 days 06:51:00,5 days 05:25:11,-103843 days +00:25:26.290448,38 days 06:51:00
2598,F,SEPSIS,0,Medicare,WIDOWED,WHITE,MICU,MICU,6 days 00:33:00,1 days 06:38:53,-101049 days +00:25:26.290448,6 days 00:33:00
2882,M,SEPSIS,1,Medicare,WIDOWED,WHITE,CSRU,CSRU,8 days 20:00:00,2 days 03:38:11,-103922 days +00:25:26.290448,8 days 20:00:00
3122,M,SEPSIS,0,Medicare,MARRIED,WHITE,MICU,MICU,9 days 06:28:00,7 days 10:43:34,-103467 days +00:25:26.290448,9 days 06:28:00
3310,F,SEPSIS,1,Medicare,WIDOWED,WHITE,MICU,MICU,9 days 04:34:00,8 days 21:11:09,-103922 days +00:25:26.290448,9 days 04:34:00
4257,F,SEPSIS,0,Medicare,WIDOWED,WHITE,MICU,MICU,3 days 21:23:00,1 days 22:39:23,-103892 days +00:25:26.290448,3 days 21:23:00
4276,F,SEPSIS,1,Medicare,WIDOWED,BLACK/AFRICAN AMERICAN,MICU,MICU,6 days 18:35:00,6 days 20:20:01,-103924 days +00:25:26.290448,6 days 18:35:00
4549,F,SEPSIS,1,Medicare,SINGLE,UNKNOWN/NOT SPECIFIED,MICU,MICU,0 days 21:21:00,1 days 00:54:44,-103930 days +00:25:26.290448,0 days 21:21:00


In [45]:
def to_timedelta(time_string): 
    age = time_string.split()
    days = age[0]
    hms = age[2].split(":")
    print hms
    delta = timedelta(days = int(days), hours = int(hms[0]), minutes = int(hms[1]), seconds = int(hms[2]))
    return delta




In [63]:
'''this was code to manually convert the dates in the file to datetime objects
age = ptnt_demog.age.iloc[0]
print age
dummy = age.split()
print dummy
days = dummy[0]
print days
hms = dummy[2].split(":")
print hms

datetime_object = datetime.strptime(dob, '%Y %m %d %H:%M:%S')




and these are scraps of where i figured out i can just use to_datetime
ptnt_demog['dod2'] = pd.to_datetime(ptnt_demog['dod']) 
ptnt_demog.head()
onedate = to_timedelta(ptnt_demog.age.iloc[0])
otherdate =  to_timedelta(ptnt_demog.age.iloc[1])
print onedate - otherdate
print onedate



'''

32077 days 20:50:00
['32077', 'days', '20:50:00']
32077
['20', '50', '00']


In [30]:
'''example code for timedelta
from datetime import datetime, timedelta
# we specify the input and the format...
t = datetime.strptime("05:20:25","%H:%M:%S")
# ...and use datetime's hour, min and sec properties to build a timedelta
delta = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
'''

#durations like icu stay etc are imported as strings. the code below 
#converts durations in strings to timedelta for use as variable
from datetime import timedelta
from datetime import datetime

delta = timedelta(days = int(duration[0]), hours = int(duration[1]), minutes = int(duration[2]), seconds = int(duration[3]))
print delta


32077 days, 20:50:00


TypeError: unsupported type for timedelta days component: str

#code for converting sql dates to date-time objects
from datetime import datetime
dob = ptnt_demog.iloc[0].dob.split()
ymd = dob[0].split('-')
print ymd
ymd.append(dob[1])
print ymd
dob = " ".join(ymd)

datetime_object = datetime.strptime(dob, '%Y %m %d %H:%M:%S')