In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import datetime
import os
import xml.etree.ElementTree as ET

from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error

In [None]:
if torch.cuda.is_available():
    print("CUDA is available! Training on GPU...")
else:
    print("CUDA is not available. Training on CPU...")

In [None]:
sequence_window = 7 # *5
prediction_horizon = 6 # *5 minute ahead
hidden_size = 50
output_size = 1
num_epochs = 100

In [None]:
def preprocess_OhioT1DM(path):
    tree = ET.parse(path)
    root = tree.getroot()
    interval_timedelta = datetime.timedelta(minutes=6)

    res = []
    for item in root.findall("glucose_level"):
        entry0 = item[0].attrib
        res.append([float(entry0["value"])])
        for i in range(1, len(item)):
            last_entry = item[i - 1].attrib
            entry = item[i].attrib
            t1 = datetime.datetime.strptime(entry["ts"], "%d-%m-%Y %H:%M:%S")
            t0 = datetime.datetime.strptime(last_entry["ts"], "%d-%m-%Y %H:%M:%S")
            delt = t1 - t0
            if delt <= interval_timedelta:
                res[-1].append(float(entry["value"]))
            else:
                res.append([float(entry["value"])])
    return res


In [None]:
def create_sequences(data, seq_length, pred_step):
    """
    Create sequences and targets from time series data.

    Args:
    data (list or ndarray): The time series data.
    seq_length (int): Length of the sequence.
    pred_step (int): Steps ahead to predict.

    Returns:
    tuple of torch.Tensors: sequences and targets.
    """
    sequences = []
    targets = []
    for i in range(len(data) - seq_length - pred_step + 1):
        sequences.append(data[i:i+seq_length])
        targets.append(data[i+seq_length+pred_step-1])
    return torch.tensor(sequences, dtype=torch.float32), torch.tensor(targets, dtype=torch.float32)

In [None]:
# FOR OHIOT1DM

train_directory_path = r'C:\Users\anonymoususer\OneDrive\Desktop\BGprediction\OhioT1DM\2018\train'  # Use a raw string for paths on Windows

# List files without their extensions
train_file_names = [os.path.splitext(file)[0] for file in os.listdir(train_directory_path)
              if os.path.isfile(os.path.join(train_directory_path, file))]

# Print the list of file names
print(train_file_names)

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True, nonlinearity='relu')
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        out, _ = self.rnn(x)
        # print("Shape after RNN:", out.shape)  # Debugging: Check shape after RNN
        # out = out[:, -1, :]
        # print("Shape before FC:", out.shape)  # Debugging: Check shape before FC
        out = self.fc(out)
        return out

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_features, hidden_dim, output_features):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_features, hidden_dim, batch_first=True)
        self.relu = nn.ReLU()  # Define ReLU activation
        self.fc = nn.Linear(hidden_dim, output_features)
    
    def forward(self, x):
        # Forward pass through LSTM layer
        lstm_out, _ = self.lstm(x)
        # Apply ReLU activation to the output of the LSTM
        lstm_out = self.relu(lstm_out)  # Applying ReLU to only the last time step's output
        # Pass the output through the fully connected layer
        y_pred = self.fc(lstm_out)
        return y_pred

In [None]:
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        gru_out, _ = self.gru(x)
        # Use the output of the last time step
        out = self.fc(gru_out[:, -1, :])
        return out

# Assuming x_train_uni is a PyTorch tensor with shape (batch_size, seq_length, num_features)
input_size = x_train_uni.shape[-1]  # Number of features
hidden_size = 50
output_size = 1

model = GRUModel(input_size, hidden_size, output_size)

# Define loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

In [None]:
# Loss and optimizer
criterion = nn.MSELoss()


In [None]:
def train_epoch(model, loader, optimizer, device):
    model.train()
    total_loss = 0
    for data, target in loader:
        data, target = data.to(device), target.to(device)  # Move data to the correct device
        optimizer.zero_grad()
        
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def validate_epoch(model, loader, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)  # Move data to the correct device
            output = model(data)
            loss = criterion(output, target)
            total_loss += loss.item()
    return total_loss / len(loader)





In [None]:
def get_predictions_and_targets(model, data_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)  # Ensure model is on the right device
    model.eval()
    predictions = []
    targets = []
    with torch.no_grad():
        for data, target in data_loader:
            data, target = data.to(device), target.to(device)  # Move data & target to the same device as model
            output = model(data)
            predictions.extend(output.detach().cpu().numpy())  # Move output back to CPU for storage/manipulation
            targets.extend(target.detach().cpu().numpy())
    return predictions, targets


In [None]:
from sklearn.metrics import mean_squared_error

def rmse_pytorch(predictions, targets):
    # Convert predictions and targets to float tensors if they are not already
    predictions = predictions.float()
    targets = targets.float()
    
    # Calculate MSE
    # mse = torch.mean((predictions - targets) ** 2)
    mse = mean_squared_error(targets, predictions)
    
    # Calculate RMSE
    rmse = np.sqrt(mse)
    return rmse



In [None]:
# Based on RNN

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device.")
rmse_list = []

for subj in train_file_names:
    res = preprocess_OhioT1DM(f"../OhioT1DM/2018/train/{subj}.xml")
    # reconstruct_data = create_sequences(res[0], sequence_window, prediction_horizon)
    # Loop thru all subjects to stack the data
    X_stack_tensor = []
    y_stack_tensor = []

    for ii in res:
        reconstruct_data = create_sequences(ii, sequence_window, prediction_horizon) # sequence length = 6(30min), pediction_horizon = 3(predict 15 minutes ahead)
        X_stack_tensor.append(reconstruct_data[0])
        y_stack_tensor.append(reconstruct_data[1])
    
    all_sequences = torch.cat(X_stack_tensor, dim=0) # yy_0 includes the X
    all_targets = torch.cat(y_stack_tensor, dim=0) # yy_1 includes the y label
    sequences_train, sequences_val, targets_train, targets_val = train_test_split(
        all_sequences, all_targets, test_size=0.2, shuffle=False
    )

    # Build the data loader and model
    train_dataset = TensorDataset(sequences_train, targets_train)
    val_dataset = TensorDataset(sequences_val, targets_val)
    train_data_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
    val_data_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Model parameters
    input_size = sequences_train.shape[-1]  # Number of features

    # Model instantiation
    model_rnn = RNNModel(input_size, hidden_size, output_size).to(device)

    # Loss and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model_rnn.parameters())

    # Training
    for epoch in range(num_epochs):
        train_loss = train_epoch(model_rnn, train_data_loader, optimizer, device)
        val_loss = validate_epoch(model_rnn, val_data_loader, device)
        # Print every 10 epochs
        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1}: Train Loss {train_loss:.4f}, Val Loss {val_loss:.4f}')

    # Assuming model and val_loader are already defined
    predictions, targets = get_predictions_and_targets(model_rnn, val_data_loader)
    rmse_value = rmse_pytorch(torch.tensor(predictions), torch.tensor(targets))
    rmse_list.append(rmse_value)
    print(f'RMSE: {rmse_value.item()}')
    print(f"=================================================")


In [None]:
# Based on LSTM 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device.")
lstm_rmse_list = []

for subj in train_file_names[:10]:
    # res = preprocess_DiaTrend(f"../DiaTrend/train/{subj}.csv")
    res = preprocess_OhioT1DM(f"../OhioT1DM/2018/train/{subj}.xml")
    # reconstruct_data = create_sequences(res[0], sequence_window, prediction_horizon)
    # Loop thru all subjects to stack the data
    X_stack_tensor = []
    y_stack_tensor = []

    for ii in res:
        reconstruct_data = create_sequences(ii, sequence_window, prediction_horizon) # sequence length = 6(30min), pediction_horizon = 3(predict 15 minutes ahead)
        X_stack_tensor.append(reconstruct_data[0])
        y_stack_tensor.append(reconstruct_data[1])
    
    all_sequences = torch.cat(X_stack_tensor, dim=0) # yy_0 includes the X
    all_targets = torch.cat(y_stack_tensor, dim=0) # yy_1 includes the y label
    sequences_train, sequences_val, targets_train, targets_val = train_test_split(
        all_sequences, all_targets, test_size=0.2, shuffle=False
    )

    # Build the data loader and model
    train_dataset = TensorDataset(sequences_train, targets_train)
    val_dataset = TensorDataset(sequences_val, targets_val)
    train_data_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
    val_data_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Model parameters
    input_size = sequences_train.shape[-1]  # Number of features

    # Model instantiation
    model_lstm = LSTMModel(input_size, hidden_size, output_size).to(device)

    # Loss and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model_lstm.parameters())

    # Training
    for epoch in range(num_epochs):
        train_loss = train_epoch(model_lstm, train_data_loader, optimizer, device)
        val_loss = validate_epoch(model_lstm, val_data_loader, device)
        # Print every 10 epochs
        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1}: Train Loss {train_loss:.4f}, Val Loss {val_loss:.4f}')

    # Assuming model and val_loader are already defined
    predictions, targets = get_predictions_and_targets(model_lstm, val_data_loader)
    rmse_value = rmse_pytorch(torch.tensor(predictions), torch.tensor(targets))
    lstm_rmse_list.append(rmse_value)
    print(f'RMSE: {rmse_value.item()}')
    print(f"=================================================")


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device.")
gru_rmse_list = []

for subj in train_file_names[:10]:
    res = preprocess_OhioT1DM(f"../OhioT1DM/2018/train/{subj}.xml")
    X_stack_tensor = []
    y_stack_tensor = []

    for ii in res:
        reconstruct_data = create_sequences(ii, sequence_window, prediction_horizon)
        X_stack_tensor.append(reconstruct_data[0])
        y_stack_tensor.append(reconstruct_data[1])
    
    all_sequences = torch.cat(X_stack_tensor, dim=0)
    all_targets = torch.cat(y_stack_tensor, dim=0)
    sequences_train, sequences_val, targets_train, targets_val = train_test_split(
        all_sequences, all_targets, test_size=0.2, shuffle=False
    )

    train_dataset = TensorDataset(sequences_train, targets_train)
    val_dataset = TensorDataset(sequences_val, targets_val)
    train_data_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
    val_data_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    input_size = sequences_train.shape[-1]

    # Model instantiation
    model_gru = GRUModel(input_size, hidden_size, output_size).to(device)

    # Loss and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model_gru.parameters())

    # Training
    for epoch in range(num_epochs):
        train_loss = train_epoch(model_gru, train_data_loader, optimizer, device)
        val_loss = validate_epoch(model_gru, val_data_loader, device)
        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1}: Train Loss {train_loss:.4f}, Val Loss {val_loss:.4f}')

    predictions, targets = get_predictions_and_targets(model_gru, val_data_loader)
    rmse_value = rmse_pytorch(torch.tensor(predictions), torch.tensor(targets))
    gru_rmse_list.append(rmse_value)
    print(f'RMSE: {rmse_value.item()}')
    print(f"=================================================")

In [None]:
# Model hyperparameters
batch_size = 50
num_epochs = 200  # Increased for better convergence

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device.")
gru_rmse_list = []

for subj in train_file_names[:10]:
    # ... existing data preprocessing code ...

    # Modified DataLoader with new batch size and enabled shuffling
    train_dataset = TensorDataset(sequences_train, targets_train)
    val_dataset = TensorDataset(sequences_val, targets_val)
    train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)  # Enable shuffling
    val_data_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    input_size = sequences_train.shape[-1]

    # Create multiple optimizers for comparison
    optimizers = {
        'adamax': torch.optim.Adamax(model_gru.parameters(), lr=0.002),
        'rmsprop': torch.optim.RMSprop(model_gru.parameters(), lr=0.001),
        'adam': torch.optim.Adam(model_gru.parameters(), lr=0.001)
    }

    # Select optimizer (can be changed to test different optimizers)
    optimizer = optimizers['adamax']  # or 'rmsprop' or 'adam'

    # Training with early stopping
    best_val_loss = float('inf')
    patience = 10
    patience_counter = 0

    for epoch in range(num_epochs):
        train_loss = train_epoch(model_gru, train_data_loader, optimizer, device)
        val_loss = validate_epoch(model_gru, val_data_loader, device)
        
        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            
        if patience_counter >= patience:
            print(f'Early stopping triggered at epoch {epoch+1}')
            break
            
        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1}: Train Loss {train_loss:.4f}, Val Loss {val_loss:.4f}')

    # ... rest of the evaluation code ...