### Implementing RNN

In [41]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import os
import time
from sklearn.metrics import mean_squared_error
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings
import matplotlib.pyplot as plt

# Suppress a specific warning
warnings.filterwarnings("ignore", category=UserWarning)

### Initially we will calculate the days remaining in the hospital which is given by the hospital length of stay - the day in the hospital

In [43]:
df = pd.read_csv("../Datasets/df_over_14.csv")
df = df.drop(['Unnamed: 0'],axis=1)
df['days_remaining'] = df['hospital_length_of_stay']-df['day']
df = df.drop(['hospital_length_of_stay','day'],axis=1)

# List of binary columns
columns_binary = [
    'intubated', 'cardiac_arrest', 'arrested_time', 'major_cardiac_events', 
    'clinically_diagnosed_infections', 'mechanical_ventilation', 'antiarrhythmic_therapies', 
    'renal_replacement_therapy_dialysis', 'cardiovascular_mechanical_support', 'echocardiogram', 
    'chest_x_ray', 'chest_ct', 'head_ct', 'antimicrobial', 'anticoagulation', 'steroid',
    'Bilateral Consolidation', 'Bilateral Ground Glass', 'Cardiomegaly', 'Edema', 'Effusion', 
    'Pneumothorax', 'Unilateral Consolidation', 'Unilateral Ground Glass', 'Bilateral Ground Glass Opacities',
    'Bilateral consolidationinfiltration', 'Subarachnoid Hemorrhage', 'Subdural Hemorrhage',
    'Emphysematous or Bronchiectasis changes', 'Emphysematous or Bronchiectatic changes', 
    'Pulmonary Embolism', 'Scarring or Fibrosis', 'Unilateral Ground Glass Opacities', 
    'Unilateral consolidationinfiltration'
]

# Define columns to exclude from scaling
columns_to_exclude = ['parent_id', 'days_remaining'] + columns_binary

# Select the columns to scale
columns_to_scale = [col for col in df.columns if col not in columns_to_exclude]

## Removing columns having days remaining less than 0
df = df[df['days_remaining']>0]

df

Unnamed: 0,parent_id,systolic_blood_pressure,diastolic_blood_pressure,heart_rate,respiratory_rate,oxygen_saturation,temperature,highest_mean_arterial_pressure,lowest_mean_arterial_pressure,highest_heart_rate,...,Bilateral Ground Glass,Cardiomegaly,Edema,Effusion,Unilateral Consolidation,Bilateral Ground Glass Opacities,Bilateral consolidationinfiltration,Pulmonary Embolism,Scarring or Fibrosis,days_remaining
0,6,127.0,76.0,68.0,19.0,95.0,36.6,92.0,80.0,77.0,...,0,0,0,0,0,0,0,0,0,31
1,6,97.0,60.0,68.0,22.0,98.0,36.4,71.0,67.0,77.0,...,0,0,0,0,0,0,0,0,0,30
2,6,140.0,68.0,72.0,22.0,99.0,36.5,84.0,84.0,97.0,...,0,0,0,0,0,0,0,0,0,29
3,6,108.0,63.0,98.0,22.0,95.0,36.5,77.0,77.0,107.0,...,0,0,0,1,0,0,0,0,0,28
4,6,126.0,77.0,68.0,24.0,98.0,36.5,92.0,92.0,77.0,...,0,0,0,0,0,1,1,0,0,27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
779,511,150.0,50.0,67.0,27.0,93.0,36.8,0.0,0.0,86.0,...,0,0,0,0,0,0,0,0,0,14
780,511,116.0,54.0,57.0,21.0,97.0,36.9,0.0,0.0,64.0,...,0,0,0,0,0,0,0,0,0,13
781,511,131.0,74.0,53.0,22.0,92.0,36.4,0.0,0.0,79.0,...,0,0,0,0,0,0,0,0,0,12
782,511,129.0,58.0,57.0,40.0,96.0,36.9,0.0,0.0,96.0,...,0,0,0,0,0,0,0,0,0,11


### Define a simple RNN model and other resuable functions

In [44]:
import torch
import torch.nn as nn
import torch.optim as optim

# This line defines a class SimpleRNN that inherits from torch.nn.Module. 
#In PyTorch, the Module class is the base class for all neural network modules,
#and it provides essential functions for building and training models.
class SimpleRNN(nn.Module):
    ## constructor that initiaes once the class SimpleRNN is called
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleRNN, self).__init__()
        ## this is an RNN layer that takes inputsize, hiddensize
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        ## this is a fully connected layer which will give us the output
        self.fc = nn.Linear(hidden_size, output_size)

   ## this is the way in which our RNN will operate in forward
    def forward(self, x):
        ## first the input values will be sent to the RNN
        ## it will give out two values out: This is the output of the RNN layer. 
            # It contains the hidden states of the RNN at all time steps.
        ##_: The second value returned by self.rnn(x) is the hidden state for the next time step
            #(which is usually not needed in a basic RNN setup like this, so we use _ to ignore it).
        out, _ = self.rnn(x)
        ## the out consists of the output from each of the timestamp 
        ## which is also the hidden layer for the next timestamp
        ## out[:, -1, :] gives the ouptut of the last layer 
        ## which is also the hidden layer of the next timestamp
        ## but in this case it will only be used to produce the output
        ## _ in the above gives the hidden state produced at timestamp t for the next timestamp (t+1)
        ## it is not used so discarded
        ## out[:,-1,:] and _ will give the same output
        out = out[:, -1, :] 
        ## below we will pass the output of the last timestep and pass to a fully connected layer
        out = self.fc(out)
        return out
    
    
# Define the data extraction function (many-to-many)
def get_data_for_parent(df, parent_id):
    data = df[df['parent_id'] == parent_id]
    features = data.drop(columns=['parent_id', 'days_remaining'])
    target = data['days_remaining'].values
    
    # Convert to tensors
    features_tensor = torch.tensor(features.values).float().unsqueeze(0)  # Add batch dimension
    target_tensor = torch.tensor(target).float().unsqueeze(0)  # Add batch dimension
    return features_tensor, target_tensor

# Define the ensemble prediction function
def ensemble_prediction(models, input_data):
    predictions = []
    
    # Get predictions from each model
    for model in models:
        model.eval()  # Switch to evaluation mode
        with torch.no_grad():
            output = model(input_data)
            predictions.append(output.squeeze().tolist())  # Store the predictions for each timestep
        
    # Average the predictions to form the final prediction at each timestep
    return torch.mean(torch.tensor(predictions), dim=0)

###  Using minmax scaler to scale the data and doing a train test split as well

In [45]:
from sklearn.preprocessing import MinMaxScaler

# Initialize the MinMaxScaler
scaler = MinMaxScaler()

# Fit and transform only the columns that need scaling
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])

# Step 2: Split based on unique parent_id
unique_parent_ids = df['parent_id'].unique()

# Randomly shuffle and split into 75% train and 25% test
train_parent_ids, test_parent_ids = train_test_split(unique_parent_ids, test_size=0.25, random_state=42)

# Filter the dataset based on the split
train_df = df[df['parent_id'].isin(train_parent_ids)]
test_df = df[df['parent_id'].isin(test_parent_ids)]

# Display the results
print(f"Total rows in the dataset: {len(df)}")
print(f"Train dataset rows: {len(train_df)}")
print(f"Test dataset rows: {len(test_df)}")

# Example: Pass 75% (train_df) to the next function
for parent_id in train_df['parent_id'].unique():
    features, target = get_data_for_parent(train_df, parent_id)
    # Example: Print the features and target shapes
    print(f"Parent ID: {parent_id}, Features Shape: {features.shape}, Target Shape: {target.shape}")


Total rows in the dataset: 784
Train dataset rows: 584
Test dataset rows: 200
Parent ID: 14, Features Shape: torch.Size([1, 8, 65]), Target Shape: torch.Size([1, 8])
Parent ID: 15, Features Shape: torch.Size([1, 8, 65]), Target Shape: torch.Size([1, 8])
Parent ID: 25, Features Shape: torch.Size([1, 8, 65]), Target Shape: torch.Size([1, 8])
Parent ID: 40, Features Shape: torch.Size([1, 8, 65]), Target Shape: torch.Size([1, 8])
Parent ID: 41, Features Shape: torch.Size([1, 8, 65]), Target Shape: torch.Size([1, 8])
Parent ID: 50, Features Shape: torch.Size([1, 8, 65]), Target Shape: torch.Size([1, 8])
Parent ID: 51, Features Shape: torch.Size([1, 8, 65]), Target Shape: torch.Size([1, 8])
Parent ID: 61, Features Shape: torch.Size([1, 8, 65]), Target Shape: torch.Size([1, 8])
Parent ID: 74, Features Shape: torch.Size([1, 8, 65]), Target Shape: torch.Size([1, 8])
Parent ID: 80, Features Shape: torch.Size([1, 8, 65]), Target Shape: torch.Size([1, 8])
Parent ID: 82, Features Shape: torch.Size(

### Hyperparameter tuning

In [1]:
# Hyperparameter tuning configuration
tuning_params = {
    'hidden_size': [32, 64],
    'learning_rate': [0.001, 0.005],
    'num_epochs': [500, 1000]
}
best_mse = float('inf')
best_params = None
models = []

# Training loop with hyperparameter tuning
input_size = train_df.drop(columns=['parent_id', 'days_remaining']).shape[1]
output_size = 1  # Single output for each timestep
criterion = nn.MSELoss()

output_dir = "Models"
os.makedirs(output_dir, exist_ok=True)
start_time = time.time()

for hidden_size in tuning_params['hidden_size']:
    for learning_rate in tuning_params['learning_rate']:
        for num_epochs in tuning_params['num_epochs']:
            print(f"Training with hidden_size={hidden_size}, learning_rate={learning_rate}, num_epochs={num_epochs}...")
            fold_models = []
            mse_list = []
            
            for parent_id in train_df['parent_id'].unique():
                # Data for current parent_id
                features, target = get_data_for_parent(train_df, parent_id)

                # Initialize model
                model = SimpleRNN(input_size, hidden_size, output_size)
                optimizer = optim.Adam(model.parameters(), lr=learning_rate)

                # Training
                model.train()
                for epoch in range(num_epochs):
                    optimizer.zero_grad()
                    output = model(features)
                    loss = criterion(output, target)
                    loss.backward()
                    optimizer.step()

                # Save model and record loss
                fold_models.append(model)
                model.eval()
                with torch.no_grad():
                    predictions = model(features)
                    mse = criterion(predictions, target).item()
                    mse_list.append(mse)

            # Compute mean MSE for this parameter configuration
            mean_mse = np.mean(mse_list)
            print(f"Mean MSE for configuration: {mean_mse}")

            # Save the best configuration
            if mean_mse < best_mse:
                best_mse = mean_mse
                best_params = {'hidden_size': hidden_size, 'learning_rate': learning_rate, 'num_epochs': num_epochs}
                models = fold_models

end_time = time.time()
print(f"Best Parameters: {best_params}")
print(f"Total training time: {end_time - start_time:.2f} seconds")

NameError: name 'os' is not defined

In [48]:
# Training function
def train_rnn(train_df, input_size, hidden_size, output_size, learning_rate, num_epochs):
    models = []
    criterion = nn.MSELoss()

    # Train a model for each unique parent_id
    for parent_id in train_df['parent_id'].unique():
        features, target = get_data_for_parent(train_df, parent_id)

        # Initialize model
        model = SimpleRNN(input_size, hidden_size, output_size)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        # Training
        model.train()
        for epoch in range(num_epochs):
            optimizer.zero_grad()
            output = model(features)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

        models.append(model)
    return models

# Evaluate the model on test data
def evaluate_rnn(models, test_df):
    X_test = test_df.drop(columns=['parent_id', 'days_remaining'])
    y_test = test_df['days_remaining']

    y_true, y_pred = [], []
    for i in range(len(X_test)):
        test_sample = torch.tensor(X_test.iloc[i].values).float().unsqueeze(0).unsqueeze(0)
        true_value = y_test.iloc[i]
        prediction = ensemble_prediction(models, test_sample)
        y_true.append(true_value)
        y_pred.append(prediction)

    mse = mean_squared_error(y_true, y_pred)
    return mse, y_true, y_pred

# Set hyperparameters
hidden_size = 64
learning_rate =0.005
num_epochs = 1000

# Prepare input size and output size
input_size = train_df.drop(columns=['parent_id', 'days_remaining']).shape[1]
output_size = 1

# Train the RNN with the best hyperparameters
print(f"Training with hidden_size={hidden_size}, learning_rate={learning_rate}, num_epochs={num_epochs}...")
start_time = time.time()
models = train_rnn(train_df, input_size, hidden_size, output_size, learning_rate, num_epochs)
end_time = time.time()
print(f"Training completed in {end_time - start_time:.2f} seconds.")

# Evaluate on test data
mse, y_true, y_pred = evaluate_rnn(models, test_df)
print(f"Best Model MSE on test data: {mse}")

Training with hidden_size=64, learning_rate=0.005, num_epochs=500...
Training completed in 24.88 seconds.
Best Model MSE on test data: 328.0070569380498
