# TensorFlow Testing Field

In [1]:
"""
imports several Python libraries
and modules commonly used in machine
learning tasks
"""

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_squared_error, mean_squared_log_error, r2_score

In [2]:
# Load the dataset
data = pd.read_csv("mimic_mean_final.csv")

external_data = pd.read_csv("mimic_mean_final.csv") # Not included yet

## Handling missing values by filling with mean (ANN can't handle missing values)

In [3]:
"""
Select columns based on their data types.
By specifying include=['number'], we select
only columns with numeric data types.
The .columns attribute then retrieves the
names of these selected columns, storing them
in the numeric_columns variable.
"""
numeric_columns = data.select_dtypes(include=['number']).columns

"""
Fill missing values with the mean
value of each respective column. 
"""
data.fillna(data[numeric_columns].mean(), inplace=True)

In [4]:
"""
Convert categorical variables to numerical
"""
label_encoder = LabelEncoder()

"""
Convert 'gender' column from
categorical to numeric. 
Male become 1 and Female 0.
The specific numeric values assigned
to each category are determined based
on the order of appearance of the unique
categories in the data.
"""
data['gender'] = label_encoder.fit_transform(data['gender'])

#reverse the gender from numerical to categorical.
#data['gender'] = label_encoder.inverse_transform(data['gender'])

In [5]:
"""
After this line of code is executed,
the "race" column will be replaced
with one or more columns, each
representing a category of race, with
binary values indicating the presence or
absence of that category for each row.
"""

data = pd.get_dummies(data, columns=['race'], drop_first=True)

In [6]:
"""
I have calculate the split point.
Every patient has 16 rows of observations,
we don't want to have the same patient
to be both in training and test set
"""

# Split the dataset at row 39040 for Mimic and 60384 for eICU
split_index = 39040
data_train = data.iloc[:split_index]
data_test = data.iloc[split_index:]

In [7]:
"""
x_train and x_test used for training and testing
the model. We remove columns that are not usefulls
in training and testing ('los', 'subject_id', 
'hadm_id',  'Time_Zone', 'row_count') and we leave
the rests which represent the wanted features.

y_train and y_test represent the label
"""
# Split the dataset into features and label variable
X_train = data_train.drop(['los', 'subject_id', 'hadm_id', 'Time_Zone', 'row_count'], axis=1)  # Features
y_train = data_train['los']  # label variable

X_test = data_test.drop(['los', 'subject_id', 'hadm_id', 'Time_Zone', 'row_count'], axis=1)  # Features
y_test = data_test['los']  # label variable

In [8]:
"""
Computes the mean and standard deviation of each feature
"""

# Feature scaling (important for neural networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Initialize the ANN, it's a common way to build ANN in Keras
model = Sequential()


"""
Neural network with multiple hidden layers and one output.
The ReLU activation function is used in the hidden layers
to introduce non-linearity, while the output layer uses a
linear activation function to produce continuous predictions.
"""
# Add input layer and first hidden layer
model.add(Dense(units=117, activation='relu', input_dim=X_train_scaled.shape[1]))

# Add second hidden layer
model.add(Dense(units=117, activation='relu'))

# Add third hidden layer
model.add(Dense(units=59, activation='relu'))

# Add fourth hidden layer
model.add(Dense(units=28, activation='relu'))

# Add output layer
model.add(Dense(units=1, activation='linear'))



"""
Compile the ANN model with optimizer and loss function

Optimizers: adam, sgd, rmsprop, adagrad, adadelta, adamax, nadam

Loss: mean_squared_error, mean_absolute_error, huber_loss,
      mean_squared_logarithmic_error, binary_crossentropy,
      categorical_crossentropy, sparse_categorical_crossentropy,
      kullback_leibler_divergence
"""
# Compile the ANN
model.compile(optimizer='rmsprop', loss='mean_squared_error')

"""
The batch size is a hyperparameter that defines the number of
samples to work through before updating the internal model parameters.
"""
# Train the ANN on the training set
model.fit(X_train_scaled, y_train, batch_size=10, epochs=5, verbose=1)

# Predictions on the test set
y_pred = model.predict(X_test_scaled)

In [None]:
# Metrics
print("Mean Square Error (MSE):", mean_squared_error(y_test, y_pred))
print("Mean Absolute Error (MAE):", mean_absolute_error(y_test, y_pred))
print("Root Mean Squared Error (RMSE):", mean_squared_error(y_test, y_pred, squared=False))

# For MSLE calculation must not have negative values in y_test and y_pred
if (y_test >= 0).all() and (y_pred >= 0).all():
    print("Mean Squared Logarithmic Error (MSLE):", mean_squared_log_error(y_test, y_pred))
else:
    print("Mean Squared Logarithmic Error cannot be calculated because targets contain negative values.")
print("R-squared (R2):", r2_score(y_test, y_pred))

In [None]:
# Make predictions on the testing set
predictions = model.predict(X_test_scaled)

# Print some predictions
print(predictions[:15])

# Hyperparameter (Testing)

In [None]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split


"""
This function creates an ANN taken two arguments:
input_shape: The shape of the input features.
hidden_units: A tuple specifying the number of units in each hidden layer.
"""
# Function to create the ANN model
def create_model(input_shape, hidden_units):
    model = keras.Sequential([
        layers.Input(shape=input_shape),
        layers.Dense(hidden_units[0], activation='relu'),
        layers.Dense(hidden_units[1], activation='relu'),
        layers.Dense(hidden_units[2], activation='relu'),
        layers.Dense(hidden_units[3], activation='relu'),
        layers.Dense(1, activation='linear')
    ])
    return model

"""
This function trains and evaluates an ANN model with given hyperparameters.
"""
# Function to train and evaluate a model with given hyperparameters
def train_and_evaluate_model(X_train, y_train, X_val, y_val, hyperparams):
    model = create_model(X_train.shape[1:], hyperparams['hidden_units'])
    model.compile(optimizer=hyperparams['optimizer'], loss=hyperparams['loss'])
    model.fit(X_train, y_train, batch_size=hyperparams['batch_size'], epochs=hyperparams['epochs'], verbose=0)
    loss = model.evaluate(X_val, y_val)
    return loss


"""
Hyperparameters such as the number of hidden units, optimizer,
loss function, batch size, and number of epochs are defined
with possible ranges of values.
"""
# Define hyperparameters ranges
hidden_units_range = [(64, 128, 64, 32), (32, 64, 32, 16)]  # Possible combinations of hidden units
optimizer_choices = ['adam', 'sgd', 'rmsprop']  # Possible optimizers
loss_choices = ['mean_squared_error', 'mean_absolute_error']  # Possible loss functions
batch_size_range = [16, 32, 64]  # Possible batch sizes
epochs_range = [5, 10, 15]  # Possible number of epochs

# Define population size
population_size = 5

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_scaled, y_train, test_size=0.2, random_state=42)

# Initialize population with random hyperparameters
population = []
for _ in range(population_size):
    hyperparams = {
        'hidden_units': hidden_units_range[np.random.choice(len(hidden_units_range))],
        'optimizer': np.random.choice(optimizer_choices),
        'loss': np.random.choice(loss_choices),
        'batch_size': np.random.choice(batch_size_range),
        'epochs': np.random.choice(epochs_range)
    }
    population.append(hyperparams)

# Perform Population-Based Training (PBT)
for generation in range(5):  # Number of generations
    print(f"Generation {generation+1}:")
    for i, hyperparams in enumerate(population):
        print(f"Model {i+1}: Hyperparameters - {hyperparams}")
        loss = train_and_evaluate_model(X_train, y_train, X_val, y_val, hyperparams)
        print(f"        Validation Loss: {loss}")
    
    # Update hyperparameters based on performance
    for i, hyperparams in enumerate(population):
        for j, other_hyperparams in enumerate(population):
            if i != j and np.random.rand() < 0.5:  # Randomly choose whether to exchange hyperparameters
                other_loss = train_and_evaluate_model(X_train, y_train, X_val, y_val, other_hyperparams)
                if loss < other_loss * 0.9:  # Threshold for replacing hyperparameters
                    other_hyperparams['hidden_units'] = hyperparams['hidden_units']
                    other_hyperparams['optimizer'] = hyperparams['optimizer']
                    other_hyperparams['loss'] = hyperparams['loss']
                    other_hyperparams['batch_size'] = hyperparams['batch_size']
                    other_hyperparams['epochs'] = hyperparams['epochs']
                    print(f"        Updated hyperparameters for model {j+1}: {other_hyperparams}")

    # Perturb hyperparameters
    for hyperparams in population:
        if np.random.rand() < 0.5:
            hyperparams['hidden_units'] = hidden_units_range[np.random.choice(len(hidden_units_range))]
        if np.random.rand() < 0.5:
            hyperparams['optimizer'] = np.random.choice(optimizer_choices)
        if np.random.rand() < 0.5:
            hyperparams['loss'] = np.random.choice(loss_choices)
        if np.random.rand() < 0.5:
            hyperparams['batch_size'] = np.random.choice(batch_size_range)
        if np.random.rand() < 0.5:
            hyperparams['epochs'] = np.random.choice(epochs_range)

Generation 1:
Model 1: Hyperparameters - {'hidden_units': (64, 128, 64, 32), 'optimizer': 'sgd', 'loss': 'mean_absolute_error', 'batch_size': 64, 'epochs': 10}
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.9230
        Validation Loss: 0.9082759618759155
Model 2: Hyperparameters - {'hidden_units': (64, 128, 64, 32), 'optimizer': 'adam', 'loss': 'mean_absolute_error', 'batch_size': 32, 'epochs': 10}
