In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from keras.models import Sequential
from keras.layers import Dense

In [3]:
# Load the dataset
data = pd.read_csv("mimic_mean_final.csv")

# Exclude non-numeric columns from mean calculation
numeric_columns = data.select_dtypes(include=['number']).columns
data.fillna(data[numeric_columns].mean(), inplace=True)

# Encode categorical variables
label_encoder = LabelEncoder()
data['gender'] = label_encoder.fit_transform(data['gender'])

# Perform one-hot encoding for the "race" column
data = pd.get_dummies(data, columns=['race'], drop_first=True)

In [4]:
# Split the dataset at row 39040
split_index = 39040
data_train = data.iloc[:split_index]
data_test = data.iloc[split_index:]

# Split the dataset into features and target variable
X_train = data_train.drop(['los', 'subject_id', 'hadm_id'], axis=1)  # Features
y_train = data_train['los']  # Target variable

X_test = data_test.drop(['los', 'subject_id', 'hadm_id'], axis=1)  # Features
y_test = data_test['los']  # Target variable

# Feature scaling (important for neural networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [5]:
# Initialize the ANN
model = Sequential()

# Add input layer and first hidden layer
model.add(Dense(units=6, activation='relu', input_dim=X_train_scaled.shape[1]))

# Add second hidden layer
model.add(Dense(units=6, activation='relu'))

# Add output layer
model.add(Dense(units=1, activation='linear'))

# Compile the ANN
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the ANN on the training set
model.fit(X_train_scaled, y_train, batch_size=32, epochs=50, verbose=1)

# Evaluate the model on the testing set
loss = model.evaluate(X_test_scaled, y_test)
print("Mean Squared Error on Test Set:", loss)

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 36.4665
Epoch 2/50
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 21.4430
Epoch 3/50
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 18.4666
Epoch 4/50
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 17.4144
Epoch 5/50
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 15.7351
Epoch 6/50
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 15.3521
Epoch 7/50
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 14.6584
Epoch 8/50
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 14.1465
Epoch 9/50
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 13.6001
Epoch 10/50
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s

In [6]:
# Make predictions on the testing set
predictions = model.predict(X_test_scaled)

# Print some predictions
print(predictions[:15])

[1m524/524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[[4.5090456]
 [4.6571574]
 [4.723707 ]
 [4.8879986]
 [4.901382 ]
 [4.914765 ]
 [4.916346 ]
 [5.0667663]
 [4.954913 ]
 [4.9682965]
 [4.891222 ]
 [4.995062 ]
 [5.055572 ]
 [5.30387  ]
 [5.0352106]]
