# TensorFlow Testing Field

In [1]:
"""
imports several Python libraries
and modules commonly used in machine
learning tasks
"""

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_squared_error, mean_squared_log_error, r2_score

In [2]:
# Load the dataset
data = pd.read_csv("mimic_mean_final.csv")

external_data = pd.read_csv("mimic_mean_final.csv") # Not included yet

## Handling missing values by filling with mean (ANN can't handle missing values)

In [3]:
"""
Select columns based on their data types.
By specifying include=['number'], we select
only columns with numeric data types.
The .columns attribute then retrieves the
names of these selected columns, storing them
in the numeric_columns variable.
"""
numeric_columns = data.select_dtypes(include=['number']).columns

"""
Fill missing values with the mean
value of each respective column. 
"""
data.fillna(data[numeric_columns].mean(), inplace=True)

In [4]:
"""
Convert categorical variables to numerical
"""
label_encoder = LabelEncoder()

"""
Convert 'gender' column from
categorical to numeric. 
Male become 1 and Female 0.
The specific numeric values assigned
to each category are determined based
on the order of appearance of the unique
categories in the data.
"""
data['gender'] = label_encoder.fit_transform(data['gender'])

#reverse the gender from numerical to categorical.
#data['gender'] = label_encoder.inverse_transform(data['gender'])

In [5]:
"""
After this line of code is executed,
the "race" column will be replaced
with one or more columns, each
representing a category of race, with
binary values indicating the presence or
absence of that category for each row.
"""

data = pd.get_dummies(data, columns=['race'], drop_first=True)

In [6]:
"""
I have calculate the split point.
Every patient has 16 rows of observations,
we don't want to have the same patient
to be both in training and test set
"""

# Split the dataset at row 39040 for Mimic and 60384 for eICU
split_index = 39040
data_train = data.iloc[:split_index]
data_test = data.iloc[split_index:]

In [7]:
"""
x_train and x_test used for training and testing
the model. We remove columns that are not usefulls
in training and testing ('los', 'subject_id', 
'hadm_id',  'Time_Zone', 'row_count') and we leave
the rests which represent the wanted features.

y_train and y_test represent the label
"""
# Split the dataset into features and label variable
X_train = data_train.drop(['los', 'subject_id', 'hadm_id', 'Time_Zone', 'row_count'], axis=1)  # Features
y_train = data_train['los']  # label variable

X_test = data_test.drop(['los', 'subject_id', 'hadm_id', 'Time_Zone', 'row_count'], axis=1)  # Features
y_test = data_test['los']  # label variable

In [8]:
"""
Computes the mean and standard deviation of each feature
"""

# Feature scaling (important for neural networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
# Initialize the ANN, it's a common way to build ANN in Keras
model = Sequential()

# Add input layer and first hidden layer
model.add(Dense(units=117, activation='relu', input_dim=X_train_scaled.shape[1]))

# Add second hidden layer
model.add(Dense(units=117, activation='relu'))

# Add third hidden layer
model.add(Dense(units=56, activation='relu'))

# Add fourth hidden layer
model.add(Dense(units=20, activation='relu'))

# Add output layer
model.add(Dense(units=1, activation='linear'))

# Compile the ANN
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the ANN on the training set
model.fit(X_train_scaled, y_train, batch_size=32, epochs=25, verbose=1)

# Predictions on the test set
y_pred = model.predict(X_test_scaled)

Epoch 1/25


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 23.5303
Epoch 2/25
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 3.9690
Epoch 3/25
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 1.7053
Epoch 4/25
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 1.2716
Epoch 5/25
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 1.1138
Epoch 6/25
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.9736
Epoch 7/25
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.8683
Epoch 8/25
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.7731
Epoch 9/25
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.6675
Epoch 10/25
[1m1220/1220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms

In [10]:
# Metrics
print("Mean Square Error (MSE):", mean_squared_error(y_test, y_pred))
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test, y_pred))
print("Root Mean Squared Error (RMSE):", mean_squared_error(y_test, y_pred, squared=False))

# For MSLE calculation must not have negative values in y_test and y_pred
if (y_test >= 0).all() and (y_pred >= 0).all():
    print("Mean Squared Logarithmic Error (MSLE):", mean_squared_log_error(y_test, y_pred))
else:
    print("Mean Squared Logarithmic Error cannot be calculated because targets contain negative values.")
print("R-squared (R2):", r2_score(y_test, y_pred))

Mean Square Error (MSE): 125.46000395776358
Mean Absolute Error (MAE): 3.3290449455569835
Root Mean Squared Error (RMSE): 11.20089299822847
Mean Squared Logarithmic Error cannot be calculated because targets contain negative values.
R-squared (R2): -2.385594166620127


In [11]:
# Make predictions on the testing set
predictions = model.predict(X_test_scaled)

# Print some predictions
print(predictions[:15])

[1m524/524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[[5.3761144]
 [5.3905907]
 [5.290603 ]
 [5.122614 ]
 [5.122614 ]
 [5.122614 ]
 [5.066771 ]
 [4.947405 ]
 [5.122614 ]
 [5.122614 ]
 [4.948884 ]
 [5.122614 ]
 [4.956036 ]
 [4.505419 ]
 [5.122614 ]]
