In [1]:
from tensorflow.keras import backend as K
import pandas as pd
import tensorflow as tf
import keras
from tensorflow.keras.models import Model
from sklearn.preprocessing import MinMaxScaler

import numpy as np


In [2]:
# Load the CSV file
icu_data = pd.read_csv('C:/Users/two_s/OneDrive/Desktop/ICUSTAYS.csv')

In [3]:
icu_data.head()

Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,dbsource,first_careunit,last_careunit,first_wardid,last_wardid,intime,outtime,los
0,12742,10006,142345,206504,carevue,MICU,MICU,52,52,2164-10-23 21:10:15,2164-10-25 12:21:07,1.6325
1,12747,10011,105331,232110,carevue,MICU,MICU,15,15,2126-08-14 22:34:00,2126-08-28 18:59:00,13.8507
2,12749,10013,165520,264446,carevue,MICU,MICU,15,15,2125-10-04 23:38:00,2125-10-07 15:13:52,2.6499
3,12754,10017,199207,204881,carevue,CCU,CCU,7,7,2149-05-29 18:52:29,2149-05-31 22:19:17,2.1436
4,12755,10019,177759,228977,carevue,MICU,MICU,15,15,2163-05-14 20:43:56,2163-05-16 03:47:04,1.2938


In [None]:
# Ensure intime and outtime are datetime objects
icu_data['intime'] = pd.to_datetime(icu_data['intime'])
icu_data['outtime'] = pd.to_datetime(icu_data['outtime'])

# Calculate the total ICU stay duration in hours
icu_data['icu_duration_hours'] = (icu_data['outtime'] - icu_data['intime']).dt.total_seconds() / 3600

# Extract useful time-based features from intime
icu_data['admission_hour'] = icu_data['intime'].dt.hour
icu_data['admission_dayofweek'] = icu_data['intime'].dt.dayofweek
icu_data = icu_data.drop(['intime', 'outtime'], axis=1)

icu_data.head()

Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,dbsource,first_careunit,last_careunit,first_wardid,last_wardid,los,icu_duration_hours,admission_hour,admission_dayofweek
0,12742,10006,142345,206504,carevue,MICU,MICU,52,52,1.6325,39.181111,21,1
1,12747,10011,105331,232110,carevue,MICU,MICU,15,15,13.8507,332.416667,22,2
2,12749,10013,165520,264446,carevue,MICU,MICU,15,15,2.6499,63.597778,23,3
3,12754,10017,199207,204881,carevue,CCU,CCU,7,7,2.1436,51.446667,18,3
4,12755,10019,177759,228977,carevue,MICU,MICU,15,15,1.2938,31.052222,20,5


In [5]:

careunit_data=icu_data['first_careunit']

careunit_encoded = tf.keras.utils.to_categorical(
    careunit_data.astype('category').cat.codes
)

# Convert back to DataFrame for better readability
careunit_encoded_df = pd.DataFrame(
    careunit_encoded,
    columns=[f"careunit_{cat}" for cat in careunit_data.unique()]
)

icu_data_encoded = pd.concat([icu_data, careunit_encoded_df], axis=1)
icu_data_encoded.drop('first_careunit', axis=1, inplace=True)

# One-hot encode 'last_careunit'
last_careunit_data = icu_data['last_careunit']

last_careunit_encoded = tf.keras.utils.to_categorical(
    last_careunit_data.astype('category').cat.codes
)
last_careunit_encoded_df = pd.DataFrame(
    last_careunit_encoded,
    columns=[f"lastcareunit_{cat}" for cat in last_careunit_data.unique()]
)
icu_data_encoded = pd.concat([icu_data_encoded, last_careunit_encoded_df], axis=1)
icu_data_encoded.drop('last_careunit', axis=1, inplace=True)

# One-hot encode only 'dbsource' without losing other columns
dbsource_encoded = pd.get_dummies(icu_data['dbsource'], prefix='dbsource')
icu_data_encoded = pd.concat([icu_data_encoded, dbsource_encoded], axis=1)
icu_data_encoded.drop('dbsource', axis=1, inplace=True)

icu_data_encoded.head()


Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,first_wardid,last_wardid,los,icu_duration_hours,admission_hour,admission_dayofweek,...,careunit_SICU,careunit_CSRU,careunit_TSICU,lastcareunit_MICU,lastcareunit_CCU,lastcareunit_SICU,lastcareunit_CSRU,lastcareunit_TSICU,dbsource_carevue,dbsource_metavision
0,12742,10006,142345,206504,52,52,1.6325,39.181111,21,1,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,True,False
1,12747,10011,105331,232110,15,15,13.8507,332.416667,22,2,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,True,False
2,12749,10013,165520,264446,15,15,2.6499,63.597778,23,3,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,True,False
3,12754,10017,199207,204881,7,7,2.1436,51.446667,18,3,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,True,False
4,12755,10019,177759,228977,15,15,1.2938,31.052222,20,5,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,True,False


In [6]:


# Select numerical columns
numerical_cols = ['los', 'icu_duration_hours', 'admission_hour']

icu_data_encoded['los'] = icu_data_encoded['los'].apply(lambda x: min(x, 30))

# Apply Min-Max scaling
scaler = MinMaxScaler()
icu_data_encoded[numerical_cols] = scaler.fit_transform(icu_data_encoded[numerical_cols])

icu_data_encoded.head()
#print(icu_data_encoded[['los', 'icu_duration_hours', 'admission_hour']])


Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,first_wardid,last_wardid,los,icu_duration_hours,admission_hour,admission_dayofweek,...,careunit_SICU,careunit_CSRU,careunit_TSICU,lastcareunit_MICU,lastcareunit_CCU,lastcareunit_SICU,lastcareunit_CSRU,lastcareunit_TSICU,dbsource_carevue,dbsource_metavision
0,12742,10006,142345,206504,52,52,0.051067,0.043246,0.913043,1,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,True,False
1,12747,10011,105331,232110,15,15,0.459783,0.389364,0.956522,2,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,True,False
2,12749,10013,165520,264446,15,15,0.0851,0.072066,1.0,3,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,True,False
3,12754,10017,199207,204881,7,7,0.068164,0.057724,0.782609,3,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,True,False
4,12755,10019,177759,228977,15,15,0.039737,0.033651,0.869565,5,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,True,False
