# Prediction of Hospital Readmissions

This notebook's goal is to reproduce the claims from Zebin and Chaussalet's paper, 'Design and implementation of a deep recurrent model for prediction of readadmission in urgent care using electronic health records'.



## Claims

When predicting ICU readmissions:

1. LSTM+CNN produced higher accuracy than logistic regression, random forest, and SVM.
2. LSTM+CNN produced higher precision than logistic regression, random forest, and SVM.
3. LSTM+CNN produced higher recall than logistic regression and SVM

In [None]:
import numpy as np
import pandas as pd
import torch
import rdkit
import sklearn
import networkx
import tqdm
#import pyhealth
import matplotlib.pyplot as plt
import csv
import os

In [None]:
patients = pd.read_csv('./mimic-iii/PATIENTS.csv')
patients.head()

In [None]:
admissions = pd.read_csv('./mimic-iii/ADMISSIONS.csv')
admissions.head()

In [None]:
transfers = pd.read_csv('./mimic-iii/TRANSFERS.csv')
transfers.head()

In [None]:
# NOTE: This file is 34 GB!!!

# chartEvents = pd.read_csv('./mimic-iii/CHARTEVENTS.csv')
# chartEvents.head()


In [None]:

# rows_to_read = 10000000
# rows_to_skip = 0
# TOTAL_ROWS =  330712483
# file_name = 'data1.csv'

# chartEvents = pd.read_csv('./mimic-iii/CHARTEVENTS.csv', usecols=['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID','ITEMID','CHARTTIME'], skiprows=rows_to_skip, nrows=rows_to_read)
# chartEvents.head()

In [None]:
# These are some of the IDs I found for admission, transfer, discharge
transferID = [920, 924, 4175, 1517, 227088, 226892, 227022, 220003, 228111, 225279, 225406, 227345, 227848, 3543, 227089, 226516]

In [None]:
# if os.path.isfile(file_name):
#     os.remove(file_name)
# out = chartEvents[np.in1d(chartEvents['ITEMID'], transferID )]
# print(out.head())
# with open(file_name, 'w', newline='') as file:
#     w = csv.writer(file)
#     w.writerow(out.columns)
#     for row in out.values:
#         w.writerow(row)

In [None]:
# #out = chartEvents[(chartEvents['SUBJECT_ID'] == 250) & (chartEvents['CHARTTIME'] >= '2188-11-22 00:00:00') ]
# while chartEvents.shape[0] >= rows_to_read:
    
#     rows_to_skip += rows_to_read
#     chartEvents = pd.read_csv('./mimic-iii/CHARTEVENTS.csv', usecols=['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID','ITEMID','CHARTTIME'], skiprows=range(1,rows_to_skip+1), nrows=rows_to_read)
#     #chartEvents = pd.read_csv('./mimic-iii/CHARTEVENTS.csv',usecols=[0,1,2,3,4,5], skiprows=rows_to_skip, nrows=rows_to_read, header=None)
#     #print(chartEvents.head())
#     out = chartEvents[np.in1d(chartEvents['ITEMID'], transferID )]
#     print('{:.2%}'.format(rows_to_skip / TOTAL_ROWS), ':\t', chartEvents.shape[0])
#     with open(file_name, 'a',newline='') as file:
#         w = csv.writer(file)
#         for row in out.values:
#             w.writerow(row)
    
    
    

In [None]:
# chartEvents = pd.read_csv('./CHARTEVENTS_TRANSFER.csv', usecols=['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID','ITEMID','CHARTTIME'])
# chartEvents = pd.read_csv('./CHARTEVENTS_TRANSFER.csv')
# chartEvents.head()

In [None]:
#np.sum(patients['EXPIRE_FLAG'])

## Example

Patient with `SUBJECT_ID = 250`

In [None]:
patients[patients['SUBJECT_ID'] == 250]

In [None]:
admissions[admissions['SUBJECT_ID'] == 250]

In [None]:
transfers[transfers['SUBJECT_ID'] == 250]

## Attempt to Read CHARTEVENTS

In [None]:
# chartEvents[chartEvents['SUBJECT_ID'] == 250]

In [None]:
import dask.dataframe as dd
#pd.read_csv('./mimic-iii/CHARTEVENTS.csv', usecols=['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID','ITEMID','CHARTTIME'], skiprows=rows_to_skip, nrows=rows_to_read)
testdd = dd.read_csv('./mimic-iii/CHARTEVENTS.csv', usecols=['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID','ITEMID','CHARTTIME'], 
                     dtype={'SUBJECT_ID':np.int64, 'HADM_ID':np.int64, 'ICUSTAY_ID':np.float64,'ITEMID':np.int64,'CHARTTIME':np.str_},
                      assume_missing=True )


# testpd = pd.read_csv('./mimic-iii/CHARTEVENTS.csv', usecols=['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID','ITEMID','CHARTTIME'],
#                      dtype={'SUBJECT_ID':np.int32, 'HADM_ID':np.int32, 'ICUSTAY_ID':np.int32,'ITEMID':np.int32,'CHARTTIME':np.str_},
#                      nrows= 100 )


testdd.head()

### Success

Successful read of `CHARTEVENTS` using `Dask` and converting to `Pandas`

In [None]:
# testdd[testdd['SUBJECT_ID'] == 250].head()
# testdd[testdd.ITEMID == 250]
testpd = testdd.compute()

In [None]:
testpd[testpd['SUBJECT_ID']==250]

In [None]:
testpd.shape

## Find Unique Visits

SUBJECT_ID  291, 283, 250

In [None]:
len(transfers.HADM_ID.unique())

In [None]:
patients.head()

In [None]:
icustays = pd.read_csv('./mimic-iii/ICUSTAYS.csv')
icustays[icustays.SUBJECT_ID == 291]

In [None]:
transfers[transfers['SUBJECT_ID'] == 291]

In [None]:
len(icustays.SUBJECT_ID.unique())

In [None]:
admissions[admissions.SUBJECT_ID == 291]

In [None]:
patients

In [None]:
patients['DOB'] = pd.to_datetime(patients['DOB'], errors='coerce')
patients['DOD'] = pd.to_datetime(patients['DOD'], errors='coerce')
patients

In [None]:
((patients.DOD[1] - patients.DOB[1]) / np.timedelta64(1, 'Y'))

In [None]:
# patients_over_18 = patients[(((patients['DOD'] - patients['DOB']) / np.timedelta64(1, 'Y')) > 18) | (patients.EXPIRE_FLAG == 0)]
patients_over_18 = patients
patients_over_18['AGE'] = ((patients.DOD[0] - patients.DOB[0]) )


In [None]:
patients_over_18

In [None]:
patients[patients['SUBJECT_ID'] == 1234]

In [None]:
admissions[admissions['SUBJECT_ID'] == 1234]

In [None]:
admissions['ADMITTIME'] = pd.to_datetime(admissions['ADMITTIME'], errors='coerce')
admissions['DISCHTIME'] = pd.to_datetime(admissions['DISCHTIME'], errors='coerce')
admissions['DEATHTIME'] = pd.to_datetime(admissions['DEATHTIME'], errors='coerce')

admissions[admissions['SUBJECT_ID'] == 1234]

In [None]:

patients = patients.drop('AGE',axis=1)
patients.head()

In [None]:
for pat in patients.itertuples(name='patient'):
    print(pat)
    print(len(pat))
    break

In [None]:
addmissions_subset = admissions[['SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME', 'DEATHTIME']]
addmissions_subset.head()

In [None]:
addmissions_subset = pd.merge(addmissions_subset, patients[['SUBJECT_ID','GENDER','DOB','DOD','DOD_HOSP','DOD_SSN','EXPIRE_FLAG']], how='left', on='SUBJECT_ID')
addmissions_subset.head(5)

In [None]:
addmissions_subset=addmissions_subset.drop('DEATHTIME', axis=1)

In [None]:
addmissions_subset.head(1)

In [None]:
addmissions_subset['ADMITTIME'] = pd.to_datetime(addmissions_subset['ADMITTIME'],errors='coerce')
addmissions_subset['DISCHTIME'] = pd.to_datetime(addmissions_subset['DISCHTIME'],errors='coerce')
addmissions_subset['DOB'] = pd.to_datetime(addmissions_subset['DOB'],errors='coerce')
addmissions_subset['DOD'] = pd.to_datetime(addmissions_subset['DOD'],errors='coerce')
addmissions_subset['DOD_HOSP'] = pd.to_datetime(addmissions_subset['DOD_HOSP'],errors='coerce')
addmissions_subset['DOD_SSN'] = pd.to_datetime(addmissions_subset['DOD_SSN'],errors='coerce')

In [None]:
addmissions_subset['AGE'] = ((pd.to_datetime(addmissions_subset['ADMITTIME']).dt.date - pd.to_datetime(addmissions_subset['DOB']).dt.date) / np.timedelta64(1, 'Y')).astype(int)

In [None]:
addmissions_subset.head()