In [38]:
from tensorflow.keras import backend as K
import pandas as pd
import tensorflow as tf
import keras
from tensorflow.keras.layers import Input, Dense, Lambda, Layer
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.models import Model
from sklearn.preprocessing import MinMaxScaler

import numpy as np


In [11]:
# Load the CSV file
#icu_data = pd.read_csv('C:/Users/two_s/OneDrive/Desktop/ICUSTAYS.csv')
icu_data = pd.read_csv('C:/Users/two_s/OneDrive/Desktop/ICUSTAYS.csv')

In [12]:
icu_data.head()

Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,dbsource,first_careunit,last_careunit,first_wardid,last_wardid,intime,outtime,los
0,12742,10006,142345,206504,carevue,MICU,MICU,52,52,2164-10-23 21:10:15,2164-10-25 12:21:07,1.6325
1,12747,10011,105331,232110,carevue,MICU,MICU,15,15,2126-08-14 22:34:00,2126-08-28 18:59:00,13.8507
2,12749,10013,165520,264446,carevue,MICU,MICU,15,15,2125-10-04 23:38:00,2125-10-07 15:13:52,2.6499
3,12754,10017,199207,204881,carevue,CCU,CCU,7,7,2149-05-29 18:52:29,2149-05-31 22:19:17,2.1436
4,12755,10019,177759,228977,carevue,MICU,MICU,15,15,2163-05-14 20:43:56,2163-05-16 03:47:04,1.2938


In [13]:
# Ensure intime and outtime are datetime objects
icu_data['intime'] = pd.to_datetime(icu_data['intime'])
icu_data['outtime'] = pd.to_datetime(icu_data['outtime'])

# Calculate the total ICU stay duration in hours
icu_data['icu_duration_hours'] = (icu_data['outtime'] - icu_data['intime']).dt.total_seconds() / 3600

# Extract useful time-based features from intime
icu_data['admission_hour'] = icu_data['intime'].dt.hour
icu_data['admission_dayofweek'] = icu_data['intime'].dt.dayofweek
icu_data = icu_data.drop(['intime', 'outtime'], axis=1)

icu_data.head()

Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,dbsource,first_careunit,last_careunit,first_wardid,last_wardid,los,icu_duration_hours,admission_hour,admission_dayofweek
0,12742,10006,142345,206504,carevue,MICU,MICU,52,52,1.6325,39.181111,21,1
1,12747,10011,105331,232110,carevue,MICU,MICU,15,15,13.8507,332.416667,22,2
2,12749,10013,165520,264446,carevue,MICU,MICU,15,15,2.6499,63.597778,23,3
3,12754,10017,199207,204881,carevue,CCU,CCU,7,7,2.1436,51.446667,18,3
4,12755,10019,177759,228977,carevue,MICU,MICU,15,15,1.2938,31.052222,20,5


In [14]:

careunit_data=icu_data['first_careunit']

careunit_encoded = tf.keras.utils.to_categorical(
    careunit_data.astype('category').cat.codes
)

# Convert back to DataFrame for better readability
careunit_encoded_df = pd.DataFrame(
    careunit_encoded,
    columns=[f"careunit_{cat}" for cat in careunit_data.unique()]
)

icu_data_encoded = pd.concat([icu_data, careunit_encoded_df], axis=1)
icu_data_encoded.drop('first_careunit', axis=1, inplace=True)

# One-hot encode 'last_careunit'
last_careunit_data = icu_data['last_careunit']

last_careunit_encoded = tf.keras.utils.to_categorical(
    last_careunit_data.astype('category').cat.codes
)
last_careunit_encoded_df = pd.DataFrame(
    last_careunit_encoded,
    columns=[f"lastcareunit_{cat}" for cat in last_careunit_data.unique()]
)
icu_data_encoded = pd.concat([icu_data_encoded, last_careunit_encoded_df], axis=1)
icu_data_encoded.drop('last_careunit', axis=1, inplace=True)

# One-hot encode only 'dbsource' without losing other columns
dbsource_encoded = pd.get_dummies(icu_data['dbsource'], prefix='dbsource')
icu_data_encoded = pd.concat([icu_data_encoded, dbsource_encoded], axis=1)
icu_data_encoded.drop('dbsource', axis=1, inplace=True)

icu_data_encoded.head()


Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,first_wardid,last_wardid,los,icu_duration_hours,admission_hour,admission_dayofweek,...,careunit_SICU,careunit_CSRU,careunit_TSICU,lastcareunit_MICU,lastcareunit_CCU,lastcareunit_SICU,lastcareunit_CSRU,lastcareunit_TSICU,dbsource_carevue,dbsource_metavision
0,12742,10006,142345,206504,52,52,1.6325,39.181111,21,1,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,True,False
1,12747,10011,105331,232110,15,15,13.8507,332.416667,22,2,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,True,False
2,12749,10013,165520,264446,15,15,2.6499,63.597778,23,3,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,True,False
3,12754,10017,199207,204881,7,7,2.1436,51.446667,18,3,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,True,False
4,12755,10019,177759,228977,15,15,1.2938,31.052222,20,5,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,True,False


In [65]:


# Select numerical columns
numerical_cols = ['los', 'icu_duration_hours', 'admission_hour']

icu_data_encoded['los'] = icu_data_encoded['los'].apply(lambda x: min(x, 30))

# Apply Min-Max scaling
scaler = MinMaxScaler()
icu_data_encoded[numerical_cols] = scaler.fit_transform(icu_data_encoded[numerical_cols])
icu_data_encoded['dbsource_carevue'] = icu_data_encoded['dbsource_carevue'].astype('float32')
icu_data_encoded['dbsource_metavision'] = icu_data_encoded['dbsource_metavision'].astype('float32')


#icu_data_encoded.head()
#data = scaler.fit_transform(icu_data_encoded.values)  # Ensure icu_data_encoded is preprocessed
#print(data.shape)
#print(icu_data_encoded[['los', 'icu_duration_hours', 'admission_hour']])
icu_data_encoded = icu_data_encoded.astype('float32')
print(icu_data_encoded.dtypes)



row_id                 float32
subject_id             float32
hadm_id                float32
icustay_id             float32
first_wardid           float32
last_wardid            float32
los                    float32
icu_duration_hours     float32
admission_hour         float32
admission_dayofweek    float32
careunit_MICU          float32
careunit_CCU           float32
careunit_SICU          float32
careunit_CSRU          float32
careunit_TSICU         float32
lastcareunit_MICU      float32
lastcareunit_CCU       float32
lastcareunit_SICU      float32
lastcareunit_CSRU      float32
lastcareunit_TSICU     float32
dbsource_carevue       float32
dbsource_metavision    float32
dtype: object


In [66]:


# Define latent space size
latent_dim = 10
input_dim = icu_data_encoded.shape[1]

# Encoder setup
inputs = Input(shape=(input_dim,))
x = Dense(64, activation='relu')(inputs)
x = Dense(32, activation='relu')(x)

z_mean = Dense(latent_dim)(x)
z_log_var = Dense(latent_dim)(x)

# Reparameterization trick
def sampling(args):
    z_mean, z_log_var = args
    epsilon = tf.random.normal(shape=tf.shape(z_mean))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

z = Lambda(sampling)([z_mean, z_log_var])

# Decoder setup
decoder_input = Input(shape=(latent_dim,))
decoder_hidden = Dense(32, activation='relu')(decoder_input)
decoder_hidden = Dense(64, activation='relu')(decoder_hidden)
decoder_output = Dense(input_dim, activation='sigmoid')(decoder_hidden)

encoder = Model(inputs, [z_mean, z_log_var, z], name="encoder")
decoder = Model(decoder_input, decoder_output, name="decoder")

# Define the full VAE model
outputs = decoder(z)

# Custom VAE Model Class
class VAE(Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder

    def call(self, inputs):
        z_mean, z_log_var, z = self.encoder(inputs)
        reconstructed = self.decoder(z)
        return reconstructed

    # Custom loss function integrated directly into the model
    def compute_loss(self, inputs, outputs, z_mean, z_log_var):
        reconstruction_loss = tf.reduce_mean(tf.square(inputs - outputs))
        kl_loss = -0.5 * tf.reduce_mean(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
        return reconstruction_loss + kl_loss

    def train_step(self, data):
        # `data` is the input data (no separate target needed for autoencoders)
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            outputs = self.decoder(z)
            loss = self.compute_loss(data, outputs, z_mean, z_log_var)

        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        return {"loss": loss}

# Initialize VAE model
vae = VAE(encoder, decoder)

# Compile the model
vae.compile(optimizer='adam')

# Check your data shape
print("Data shape:", icu_data_encoded.shape)

# Check the VAE model's input shape
#print("Model input shape:", vae.input_shape)

# Fit the model
vae.fit(
    icu_data_encoded,  # Only pass the input data
    epochs=10,
    batch_size=32,
    validation_split=0.2
)

Data shape: (136, 22)
Epoch 1/10
[1m1/4[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m5s[0m 2s/step - loss: nan

ValueError: Tried to convert 'y' to a tensor and failed. Error: None values not supported.