# Admission Table Cleaning

In [138]:
import pandas as pd  # type: ignore
import os 
import glob 
from datetime import datetime
import numpy as np

import warnings
# Temporarily suppress FutureWarnings
with warnings.catch_warnings():
    warnings.simplefilter("ignore", FutureWarning)

In [139]:
directory = '/Users/maxb/Library/CloudStorage/OneDrive-UniversityofWaterloo/Hospital Research/Datasets/MIMIC-III_demo'
os.chdir(directory)
admis_df = pd.read_csv("ADMISSIONS.csv")

In [140]:
admis_df["edregtime"] = admis_df["edregtime"].fillna("1677-09-22 00:00:00")                                             
admis_df["edouttime"] = admis_df["edouttime"].fillna("1677-09-22 00:00:00")

admis_df['admittime'] = admis_df['admittime'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
admis_df['dischtime'] = admis_df['dischtime'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
admis_df['edregtime'] = admis_df['edregtime'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
admis_df['edouttime'] = admis_df['edouttime'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))

# Sorting the dates from lowest to highest
admis_df = admis_df.sort_values('admittime')            

Creating readmission feature

In [141]:
# Unsure why but I need to re-import this without the prior made specification. This allows the use of the time.delta function
import datetime

# Gathering the patients with a 30d readmission

subjects = admis_df['subject_id'].unique()
print(len(subjects))
read_30d = []

for j in subjects:                                                                                                                          # Looping over all unique patients

    if len(admis_df.loc[(admis_df['subject_id'] == j)]) >= 2 :                                                                              # Checking whether they have more than 1 entry

        mult_entries = admis_df.loc[(admis_df['subject_id'] == j)].reset_index()                                                            # Subsetting dataframe
        admis_df.loc[(admis_df['subject_id'] == j), '30d_read'] = 2                                                                         # Flagging all other entries with 2 to be removed     
        admis_df.loc[(admis_df['subject_id'] == j) & (admis_df['admittime'] == mult_entries['admittime'][0]), '30d_read'] = 0               # Keeping the first patient entry
        
        for k in range(len(mult_entries['admittime'])-1):
            if abs(mult_entries['dischtime'][k] - mult_entries['admittime'][k+1]) <= datetime.timedelta(days=30):                           # Checking whether they have a readmission within 30d of discharge
                admis_df.loc[(admis_df['subject_id'] == j) & (admis_df['admittime'] == mult_entries['admittime'][k]) , '30d_read'] = 1      # If they do; replace the preious entry with a readmission flag and discard all other entries
                admis_df.loc[(admis_df['subject_id'] == j) & (admis_df['admittime'] != mult_entries['admittime'][k]) , '30d_read'] = 2

    else:
        admis_df.loc[(admis_df['subject_id'] == j), '30d_read'] = 0

100


In [142]:
admis_df.loc[(admis_df['subject_id'] == subjects[3])]

Unnamed: 0,row_id,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,religion,marital_status,ethnicity,edregtime,edouttime,diagnosis,hospital_expire_flag,has_chartevents_data,30d_read
117,40992,43879,158100,2106-08-30 15:43:00,2106-08-31 15:15:00,,EMERGENCY,CLINIC REFERRAL/PREMATURE,HOME,Medicare,ENGL,PROTESTANT QUAKER,MARRIED,BLACK/AFRICAN AMERICAN,1677-09-22,1677-09-22,PLEURAL EFFUSION,0,1,0.0


IMPORTANT NOTE: Need to remove patient's second stay and add readmission flag to prior stay

In [143]:
print(len(admis_df[admis_df['hospital_expire_flag'] == 1]))
admis_df = admis_df.drop(admis_df[admis_df['hospital_expire_flag'] == 1].index)                             # Removing All observations where a patient died
admis_df = admis_df.drop(admis_df[admis_df['30d_read'] == 2].index)                                         # Dropping al flagsg of irrelevant entries
admis_df['marital_status'] = admis_df['marital_status'].apply(lambda x: 1 if x == "MARRIED" else 0)         # Setting Marital Status to binary variables

# admis_df['insurance'] = admis_df['insurance'].apply(lambda x: 0 if x == "Private" else 1 if x == "medicare" else 2)

40


Creating Features:

In [144]:
admis_df['admit_duration'] = admis_df['dischtime'] - admis_df['admittime']                          # Admission Duration time
admis_df['ed_duration'] = admis_df['edouttime'] - admis_df['edregtime']                             # Admission Duration time

Removing unnecessary variables

In [145]:
col_drops = ["row_id", "language", "religion", "hospital_expire_flag", "hadm_id", "has_chartevents_data", "edregtime", "edouttime", "deathtime", "diagnosis"]

for i in col_drops:
    admis_df = admis_df.drop(i, axis=1)

In [146]:
cols = list(admis_df.columns)

for i in cols:
    check_nan = admis_df[i].isnull().values.any()
    if check_nan == True:
        print(i)
        print("Nan value found")
        break

# Admission table has been cleaned

In [147]:
print("Admission:", admis_df.shape)
admis_df.head()

Admission: (67, 12)


Unnamed: 0,subject_id,admittime,dischtime,admission_type,admission_location,discharge_location,insurance,marital_status,ethnicity,30d_read,admit_duration,ed_duration
98,42231,2102-08-29 07:15:00,2102-09-06 16:20:00,ELECTIVE,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Medicare,1,WHITE,0.0,8 days 09:05:00,0 days 00:00:00
118,43881,2104-09-24 17:31:00,2104-09-30 16:17:00,EMERGENCY,EMERGENCY ROOM ADMIT,HOME HEALTH CARE,Private,1,WHITE,1.0,5 days 22:46:00,0 days 06:43:00
117,43879,2106-08-30 15:43:00,2106-08-31 15:15:00,EMERGENCY,CLINIC REFERRAL/PREMATURE,HOME,Medicare,1,BLACK/AFRICAN AMERICAN,0.0,0 days 23:32:00,0 days 00:00:00
30,10088,2107-01-04 11:59:00,2107-01-11 15:45:00,EMERGENCY,EMERGENCY ROOM ADMIT,SNF,Medicare,0,WHITE,1.0,7 days 03:46:00,0 days 03:51:00
22,10061,2107-01-16 11:33:00,2107-02-10 11:30:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,REHAB/DISTINCT PART HOSP,Medicare,0,WHITE,0.0,24 days 23:57:00,0 days 00:00:00


In [148]:
print(len(admis_df.loc[admis_df['30d_read'] == 1]))

9


In [149]:
admis_df.to_csv('/Users/maxb/Library/CloudStorage/OneDrive-UniversityofWaterloo/Hospital Research/Datasets/Cleaned MIMIC-III Dataset/ADMISSION.csv')