In [5]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import logging
import h5py # Save - Load 3D tensor

In [2]:
# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("data_loading.log"),
        logging.StreamHandler()
    ]
)

In [7]:
# Load tensors from the HDF5 file
load_path = 'CSV/exports/tensors/o1_3D_four_dataframe.h5'

logging.info(f"Loading...")
with h5py.File(load_path, 'r') as hf:
    train_tensor = hf['train_tensor'][:]
    validate_tensor = hf['validate_tensor'][:]
    test_tensor = hf['test_tensor'][:]
    external_tensor = hf['external_tensor'][:]
    # los
    train_los_label = hf['train_los_label'][:]
    validate_los_label = hf['validate_los_label'][:]
    test_los_label = hf['test_los_label'][:]
    external_los_label = hf['external_los_label'][:]
    # mortality
    train_mortality_label = hf['train_mortality_label'][:]
    validate_mortality_label = hf['validate_mortality_label'][:]
    test_mortality_label = hf['test_mortality_label'][:]
    external_mortality_label = hf['external_mortality_label'][:]

logging.info(f"Train: {train_tensor.shape}, Los Label: {train_los_label.shape}, Mortality Label: {train_mortality_label.shape}")
logging.info(f"Validate: {validate_tensor.shape}, Los Label: {validate_los_label.shape}, Mortality Label: {validate_mortality_label.shape}")
logging.info(f"Test: {test_tensor.shape}, Los Label: {test_los_label.shape}, Mortality Label: {test_mortality_label.shape}")
logging.info(f"External: {external_tensor.shape}, Los Label: {external_los_label.shape}, Mortality Label: {external_mortality_label.shape}")

2024-12-16 23:50:28,751 - INFO - Loading...
2024-12-16 23:50:33,683 - INFO - Train: (122496, 346, 4), Los Label: (122496, 1), Mortality Label: (122496, 1)
2024-12-16 23:50:33,684 - INFO - Validate: (15312, 346, 4), Los Label: (15312, 1), Mortality Label: (15312, 1)
2024-12-16 23:50:33,685 - INFO - Test: (15312, 346, 4), Los Label: (15312, 1), Mortality Label: (15312, 1)
2024-12-16 23:50:33,688 - INFO - External: (234720, 346, 4), Los Label: (234720, 1), Mortality Label: (234720, 1)


In [3]:
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.2):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # RNN layer
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        
        # Fully connected layer for output
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden and cell states (h0 and c0)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate the RNN
        out, _ = self.rnn(x, (h0, c0))  # Output shape: (batch_size, time_steps, hidden_size)
        
        # Take the output of the last time step
        out = out[:, -1, :]  # Shape: (batch_size, hidden_size)
        
        # Pass through the fully connected layer
        out = self.fc(out)  # Shape: (batch_size, output_size)
        return out

In [8]:
# Load data (example with your pre-loaded tensors)
X_train = torch.tensor(train_tensor, dtype=torch.float32)
y_train = torch.tensor(train_los_label, dtype=torch.float32)

X_validate = torch.tensor(validate_tensor, dtype=torch.float32)
y_validate = torch.tensor(validate_los_label, dtype=torch.float32)

# Create TensorDatasets
train_dataset = TensorDataset(X_train, y_train)
validate_dataset = TensorDataset(X_validate, y_validate)

# Create DataLoaders
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validate_loader = DataLoader(validate_dataset, batch_size=batch_size, shuffle=False)

In [9]:
# Model parameters
input_size = X_train.shape[2]  # Number of features
hidden_size = 64  # Number of hidden units
num_layers = 2    # Number of RNN layers
output_size = 1   # Predicting LOS as a single output

model = RNNModel(input_size, hidden_size, num_layers, output_size)

# Define loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Early stopping parameters
patience = 5
min_delta = 0.001
best_val_loss = float('inf')
patience_counter = 0
early_stop = False

# Training loop
epochs = 50
logging.info(f"Training RNN for {epochs} epochs with batch size {batch_size}.")

for epoch in range(epochs):
    if early_stop:
        logging.info(f"Early stopping triggered at epoch {epoch}.")
        break

    # Training phase
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        predictions = model(X_batch)
        loss = criterion(predictions, y_batch.unsqueeze(1).squeeze(-1))
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)

    # Validation phase
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_val_batch, y_val_batch in validate_loader:
            val_predictions = model(X_val_batch)
            val_loss += criterion(val_predictions, y_val_batch.unsqueeze(1).squeeze(-1)).item()
    val_loss /= len(validate_loader)

    # Early stopping logic
    if best_val_loss - val_loss > min_delta:
        best_val_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            early_stop = True

    # Logging progress
    logging.info(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

2024-12-16 23:54:03,430 - INFO - Training RNN for 50 epochs with batch size 16.


In [None]:
# Ensure train_losses and val_losses are aligned
min_len = min(len(train_losses), len(val_losses))
train_losses = train_losses[:min_len]
val_losses = val_losses[:min_len]

# Plot Training Loss
fig, ax1 = plt.subplots(figsize=(10, 6))
line1 = ax1.plot(range(1, min_len + 1), train_losses, label='Training Loss', color='b')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Training Loss', color='b')
ax1.tick_params(axis='y', labelcolor='b')
ax1.grid(visible=True, linestyle='--', alpha=0.6)

# Plot Validation Loss on a Secondary Y-Axis
ax2 = ax1.twinx()
line2 = ax2.plot(range(1, min_len + 1), val_losses, label='Validation Loss', color='orange')
ax2.set_ylabel('Validation Loss', color='orange')
ax2.tick_params(axis='y', labelcolor='orange')

# Add Early Stopping Point
best_epoch = min_len - patience_counter  # Adjust based on patience
line3 = ax1.axvline(best_epoch, color='r', linestyle='--', label='Early Stopping Point')

# Combine legends from both axes
lines = line1 + line2 + [line3]
labels = [l.get_label() for l in lines]
ax1.legend(lines, labels, loc='upper right')

# Add Title
plt.title('Training and Validation Loss Over Epochs with Early Stopping')
fig.tight_layout()

# Save the Plot
#file_name = 'rnn_train_val_loss'  # Example file name
#plt.savefig(f'plots/01_train_vall_loss/{file_name}.png')
plt.show()

In [None]:
# Test phase
model.eval()
test_tensor = torch.tensor(test_tensor, dtype=torch.float32)
test_los_label = torch.tensor(test_los_label, dtype=torch.float32)

with torch.no_grad():
    test_predictions = model(test_tensor)
    test_loss = criterion(test_predictions, test_los_label.unsqueeze(1))
    logging.info(f"Test Loss: {test_loss:.4f}")