In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from keras.models import Sequential
from keras.layers import Dense

# Load the dataset
data = pd.read_csv("mimic_mean_final.csv")

# Exclude non-numeric columns from mean calculation
numeric_columns = data.select_dtypes(include=['number']).columns
data.fillna(data[numeric_columns].mean(), inplace=True)

# Encode categorical variables (if any)
label_encoder = LabelEncoder()
data['gender'] = label_encoder.fit_transform(data['gender'])

# Perform one-hot encoding for the "race" column
data = pd.get_dummies(data, columns=['race'], drop_first=True)

# Split the dataset into features and target variable
X = data.drop(['los', 'subject_id', 'hadm_id'], axis=1)  # Features
y = data['los']  # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling (important for neural networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [7]:
# Initialize the ANN
model = Sequential()

# Add input layer and first hidden layer
model.add(Dense(units=6, activation='relu', input_dim=X_train_scaled.shape[1]))

# Add second hidden layer
model.add(Dense(units=6, activation='relu'))

# Add output layer
model.add(Dense(units=1, activation='linear'))  # Assuming length of stay is a continuous variable

# Compile the ANN
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the ANN on the training set
model.fit(X_train_scaled, y_train, batch_size=32, epochs=30, verbose=1)

# Evaluate the model on the testing set
loss = model.evaluate(X_test_scaled, y_test)
print("Mean Squared Error on Test Set:", loss)

Epoch 1/30


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1395/1395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 35.3669
Epoch 2/30
[1m1395/1395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 23.1770
Epoch 3/30
[1m1395/1395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 20.3824
Epoch 4/30
[1m1395/1395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 17.6530
Epoch 5/30
[1m1395/1395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 16.8410
Epoch 6/30
[1m1395/1395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 15.3194
Epoch 7/30
[1m1395/1395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 15.1867
Epoch 8/30
[1m1395/1395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 14.1441
Epoch 9/30
[1m1395/1395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 14.0917
Epoch 10/30
[1m1395/1395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s

In [8]:
# Make predictions on the testing set
predictions = model.predict(X_test_scaled)

# Optionally, you can inverse transform the predictions if you scaled the target variable
# predictions = scaler.inverse_transform(predictions)

# Print some predictions
print(predictions[:10])

[1m349/349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[[ 2.1402745]
 [ 3.0452056]
 [ 6.4126544]
 [ 2.949476 ]
 [ 2.646571 ]
 [ 1.8934245]
 [ 7.26917  ]
 [ 3.4834762]
 [ 2.324341 ]
 [18.058338 ]]
