In [19]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from sklearn.ensemble import RandomForestRegressor
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.layers import Dense, Dropout, LayerNormalization
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [21]:
data = pd.read_csv("df_imputed.csv")
data.drop(columns=['Unnamed: 0'], inplace=True)
data

Unnamed: 0,id,aniongap,bicarbonate,bun,calcium,chloride,creatinine,glucose,sodium,potassium,...,admission_type_URGENT,first_careunit_Cardiac Vascular Intensive Care Unit (CVICU),first_careunit_Coronary Care Unit (CCU),first_careunit_Medical Intensive Care Unit (MICU),first_careunit_Medical/Surgical Intensive Care Unit (MICU/SICU),first_careunit_Neuro Intermediate,first_careunit_Neuro Stepdown,first_careunit_Neuro Surgical Intensive Care Unit (Neuro SICU),first_careunit_Surgical Intensive Care Unit (SICU),first_careunit_Trauma SICU (TSICU)
0,20001305.0,15.0,23.0,47.0,11.4,108.0,0.8,154.0,142.0,4.3,...,0,0,0,0,1,0,0,0,0,0
1,20001305.0,13.0,25.0,48.0,10.8,107.0,0.9,149.0,140.0,4.7,...,0,0,0,0,1,0,0,0,0,0
2,20001305.0,13.0,24.0,50.0,10.8,108.0,0.9,131.0,141.0,4.1,...,0,0,0,0,1,0,0,0,0,0
3,20001361.0,14.0,22.0,28.0,6.3,107.0,2.5,161.0,137.0,5.8,...,0,0,0,0,1,0,0,0,0,0
4,20001361.0,15.0,20.0,32.0,6.5,108.0,2.5,124.0,137.0,5.7,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93599,29999498.0,20.0,22.0,13.0,11.2,100.0,1.2,76.0,137.0,4.6,...,0,0,0,0,1,0,0,0,0,0
93600,29999498.0,15.0,21.0,13.0,10.1,103.0,1.2,102.0,135.0,4.2,...,0,0,0,0,1,0,0,0,0,0
93601,29999625.0,17.0,21.0,14.0,8.5,104.0,1.2,109.0,142.0,4.6,...,0,0,0,0,0,0,0,1,0,0
93602,29999625.0,15.0,24.0,21.0,8.6,110.0,1.6,122.0,149.0,4.1,...,0,0,0,0,0,0,0,1,0,0


In [22]:
deceased_df = data[data['icu_death'] == 1]
survived_df = data[data['icu_death'] == 0]

In [23]:
deceased_df.drop('id', axis=1, inplace=True)
correlation_matrix = deceased_df.corr()
abs_target_correlation = correlation_matrix['los_icu'].abs()

print(abs_target_correlation.drop('los_icu', axis=0))
N = 30 
top_features = abs_target_correlation.drop('los_icu', axis=0).nlargest(N).index.tolist()

X = deceased_df[top_features]
y=deceased_df['los_icu']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


aniongap                                                          0.113425
bicarbonate                                                       0.072689
bun                                                               0.034244
calcium                                                           0.035887
chloride                                                          0.003144
                                                                    ...   
first_careunit_Neuro Intermediate                                 0.006795
first_careunit_Neuro Stepdown                                     0.007410
first_careunit_Neuro Surgical Intensive Care Unit (Neuro SICU)    0.022016
first_careunit_Surgical Intensive Care Unit (SICU)                0.062911
first_careunit_Trauma SICU (TSICU)                                0.009650
Name: los_icu, Length: 97, dtype: float64


In [24]:
# Data normalization
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Partition the data set
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [25]:
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = tfa.layers.MultiHeadAttention(head_size=embed_dim // num_heads, num_heads=num_heads)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(embed_dim)  # Make sure the output of FFN matches embed_dim
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att([inputs, inputs, inputs])
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
    


In [None]:
def build_model(embed_dim, num_heads, ff_dim):
    input_shape = (sequence_length, embed_dim)  # input shape
    model = Sequential([
        TransformerBlock(embed_dim, num_heads, ff_dim),
        Dense(1)  # Adjust the output layer to the task
    ])
    model.compile(optimizer="adam", loss="mse")
    return model

# Use KerasClassifier to wrap the model
model = KerasClassifier(build_fn=build_model, verbose=0)

# 定义你的参数网格
param_grid = {
    'embed_dim': [30,60],  # Input feature dimensions
    'num_heads': [2,3,4],    # Head count
    'ff_dim': [64,128,256],    # Internal dimensions of FFN
}

# Create GridSearchCV object
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error')

# Perform a grid search
grid_result = grid_search.fit(X_train, y_train,batch_size=32, epochs=10, validation_split=0.2)  # 使用填充后的数据


print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))


In [26]:
sequence_length = 3  

if X_train.shape[1] < sequence_length:
    # If the number of features is less than the sequence length, you may need to pad the data
    padding = np.zeros((X_train.shape[0], sequence_length - X_train.shape[1], X_train.shape[2]))
    X_train_padded = np.concatenate([X_train, padding], axis=1)
else:
    # If the number of features is more than or equal to the sequence length, the data can be reshaped or split
    X_train_reshaped = X_train.reshape(-1, sequence_length, X_train.shape[1] // sequence_length)
    
if X_test.shape[1] < sequence_length:
    # If the number of features is less than the sequence length, you may need to pad the data
    padding = np.zeros((X_test.shape[0], sequence_length - X_test.shape[1], X_test.shape[2]))
    X_test_padded = np.concatenate([X_test, padding], axis=1)
else:
    # If the number of features is more than or equal to the sequence length, the data can be reshaped or split
    X_test_reshaped = X_test.reshape(-1, sequence_length, X_test.shape[1] // sequence_length)    


In [27]:

embed_dim = 30  
num_heads = 2   
ff_dim = 64   

model = Sequential([
    TransformerBlock(embed_dim, num_heads, ff_dim),
    Dense(1)  
])


model.compile(optimizer="adam", loss="mse")
history = model.fit(X_train, y_train, batch_size=32, epochs=100, validation_split=0.2)

loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')


predictions = model.predict(X_test)

Epoch 1/100


  f"The initializer {self.__class__.__name__} is unseeded "


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

In [28]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)


r2 = r2_score(y_test, predictions)
print(f'Test MSE: {mse}')
print(f'Test MAE: {mae}')
print(f"R-squared (R2): {r2}")

Test MSE: 11.167887762631565
Test MAE: 2.1457099391729515
R-squared (R2): 0.854511921153014


In [29]:
import pandas as pd

predictions_df = pd.DataFrame(predictions, columns=['Predicted LOS_ICU'])
y_test_df = pd.DataFrame(y_test, columns=['los_icu'])


predictions_df.reset_index(drop=True, inplace=True)
y_test_df.reset_index(drop=True, inplace=True)

comparison_df = pd.concat([y_test_df, predictions_df], axis=1)

print(comparison_df.head())



   los_icu  Predicted LOS_ICU
0     4.94           6.716133
1     1.01           0.238064
2     2.70           4.192300
3     3.18           3.707989
4     8.96           7.711873


In [30]:
survived_df.drop('id', axis=1, inplace=True)
correlation_matrix = survived_df.corr()
abs_target_correlation = correlation_matrix['los_icu'].abs()

print(abs_target_correlation.drop('los_icu', axis=0))
N = 30  
top_features = abs_target_correlation.drop('los_icu', axis=0).nlargest(N).index.tolist()

X = survived_df[top_features]
y=survived_df['los_icu']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


aniongap                                                          0.059016
bicarbonate                                                       0.070384
bun                                                               0.034100
calcium                                                           0.038278
chloride                                                          0.014794
                                                                    ...   
first_careunit_Neuro Intermediate                                 0.004587
first_careunit_Neuro Stepdown                                     0.009522
first_careunit_Neuro Surgical Intensive Care Unit (Neuro SICU)    0.030888
first_careunit_Surgical Intensive Care Unit (SICU)                0.047393
first_careunit_Trauma SICU (TSICU)                                0.037423
Name: los_icu, Length: 97, dtype: float64


In [31]:
model.compile(optimizer="adam", loss="mse")


history = model.fit(X_train, y_train, batch_size=32, epochs=100, validation_split=0.2)


loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')


predictions = model.predict(X_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [32]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)


r2 = r2_score(y_test, predictions)
print(f'Test MSE: {mse}')
print(f'Test MAE: {mae}')
print(f"R-squared (R2): {r2}")

Test MSE: 8.757076177429752
Test MAE: 1.8287517482141735
R-squared (R2): 0.8859184282247166
