In [26]:
import torch
import torch.nn as nn
from sktime.forecasting.model_selection import SlidingWindowSplitter
import numpy as np
import os
import pandas as pd
import dask.dataframe
import torch.nn as nn

class TimeSeriesTransformer(nn.Module):
    def __init__(self, input_size, d_model, nhead, num_layers, window_length, forecast_horizon, epochs):
        super(TimeSeriesTransformer, self).__init__()
        
        self.d_model = d_model
        self.window_length = window_length
        self.forecast_horizon = forecast_horizon
        self.epochs = epochs  # Store the number of epochs

        # Linear layer to project input_size to d_model
        self.input_projection = nn.Linear(input_size, d_model)
        
        # Define transformer layers
        self.transformer = nn.Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_layers)

        # Output layer to project back to the original dimension
        self.output_layer = nn.Linear(d_model, input_size)

        self.criterion = nn.MSELoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        self.loss_history = []

    
    def forward(self, src, tgt):
        # Project input to d_model size
        src = self.input_projection(src)
        tgt = self.input_projection(tgt)
        
        # Pass through transformer model
        output = self.transformer(src, tgt)
        
        # Project back to the original input size
        output = self.output_layer(output)
        return output


    def fit(self, data):
    # Ensure 'time' column is set as the index (if it's not already set)
        if not isinstance(data.index, pd.DatetimeIndex):
            data.index = pd.to_datetime(data.index)  # Make sure the index is a DatetimeIndex
        
        # Ensure 'bg' is the target column and is passed as a pandas Series
        target_series = data['bg']  # This is a pandas Series with a proper DatetimeIndex
        
        # Check the shape of the target_series (for debugging purposes)
        print(f"Shape of target_series: {target_series.shape}")
        
        # Prepare data for cross-validation with SlidingWindowSplitter
        cv = SlidingWindowSplitter(window_length=self.window_length, fh=self.forecast_horizon, step_length=1)
    
        # Iterate over the sliding windows
        for fold, (train_idx, test_idx) in enumerate(cv.split(target_series)):
            # Extract the train and test data based on indices from SlidingWindowSplitter
            train_data = target_series.iloc[train_idx].values  # Convert the slice to a NumPy array
            test_data = target_series.iloc[test_idx].values  # Convert the slice to a NumPy array
    
            # Convert the train and test data slices into PyTorch tensors
            train_tensor = torch.tensor(train_data, dtype=torch.float32).view(-1, 1)
            test_tensor = torch.tensor(test_data, dtype=torch.float32).view(-1, 1)
    
            # Debugging prints to ensure the data types and sizes are correct
            print(f"Fold {fold + 1}:")
            print(f"Train Data Tensor Shape: {train_tensor.shape}")
            print(f"Test Data Tensor Shape: {test_tensor.shape}")
    
            # Prepare the source and target for the model
            src = train_tensor[:-1].unsqueeze(1)  # Training window
            tgt = train_tensor[1:].unsqueeze(1)   # Shifted by one timestep
    
            # Train the model
            for epoch in range(self.epochs):
                self.train()
                self.optimizer.zero_grad()
    
                # Forward pass through the model
                output = self(src, tgt)
                loss = self.criterion(output.view(-1), train_tensor[1:].view(-1))
    
                # Backward pass and optimization
                loss.backward()
                self.optimizer.step()
    
            # Log the loss for each fold
            self.loss_history.append(loss.item())
            print(f"Fold {fold + 1} Loss: {loss.item()}")

    def forecast(self, test_data):
        # Prepare test data for forecasting
        current_window = test_data[:self.window_length].values  # Initial window from the test set
        current_window = torch.tensor(current_window, dtype=torch.float32).view(-1, 1)  # Ensure correct dimensions
        
        predictions = []
        
        # Iteratively forecast the future values
        for i in range(self.forecast_horizon):
            self.eval()  # Set model to evaluation mode
            
            # Use current window for prediction
            with torch.no_grad():
                current_window_input = current_window.unsqueeze(1)  # Add batch dimension
                predicted = self(current_window_input, current_window_input)
                prediction = predicted[-1]  # Get the last predicted value
            
            # Store prediction
            predictions.append(prediction.item())
            
            # Simulate rolling window
            if i < len(test_data) - self.window_length:
                new_value = test_data.iloc[self.window_length + i]  # Get next true value
            else:
                new_value = prediction.item()  # Use the predicted value if ground truth is unavailable
    
            # Concatenate the new value and ensure the dimensions match
            new_tensor = torch.tensor([[new_value]], dtype=torch.float32).unsqueeze(1)  # Add necessary dimension
            current_window = torch.cat([current_window, new_tensor], dim=0)[1:]  # Roll the window by 1
    
        return predictions


    def save_model(self, path="transformer_model.pth"):
        # Save the model state, optimizer state, and loss history
        torch.save({
            'model_state_dict': self.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'loss_history': self.loss_history,
            'epochs': self.epochs
        }, path)
        print(f"Model saved to {path}")

    def load_model(self, path="transformer_model.pth"):
        # Load the model state, optimizer state, and loss history
        if os.path.isfile(path):
            checkpoint = torch.load(path)
            self.load_state_dict(checkpoint['model_state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            self.loss_history = checkpoint['loss_history']
            self.learning_rate = checkpoint['learning_rate']
            self.epochs = checkpoint['epochs']
            print(f"Model loaded from {path}")
        else:
            print(f"No model found at {path}")



In [27]:
time_series_data = pd.read_csv(r"..\data/processed/cleaned_up_patients/p12.csv")
time_series_data.set_index('time', inplace=True)

print(time_series_data.head)
print(time_series_data.shape)

# Forecast on the last portion of the dataset (test set)
test_set = time_series_data[-12:]  # Use the last 12 points as a test set
#predictions = model.forecast(test_set)
test_set
#print("Predictions:", predictions)

<bound method NDFrame.head of                       bg
time                    
2024-10-02 10:25:00  4.1
2024-10-02 11:25:00  6.4
2024-10-02 12:25:00  8.1
2024-10-02 13:25:00  5.8
2024-10-02 14:25:00  6.3
...                  ...
2025-01-01 19:25:00  7.7
2025-01-01 20:25:00  8.6
2025-01-01 21:25:00  7.7
2025-01-01 22:25:00  6.2
2025-01-01 23:25:00  9.3

[2198 rows x 1 columns]>
(2198, 1)


Unnamed: 0_level_0,bg
time,Unnamed: 1_level_1
2025-01-01 12:25:00,5.0
2025-01-01 13:25:00,5.5
2025-01-01 14:25:00,6.9
2025-01-01 15:25:00,6.0
2025-01-01 16:25:00,5.9
2025-01-01 17:25:00,7.9
2025-01-01 18:25:00,10.0
2025-01-01 19:25:00,7.7
2025-01-01 20:25:00,8.6
2025-01-01 21:25:00,7.7


In [28]:

# Example usage
if __name__ == "__main__":
    # Generate synthetic time series data (replace with your actual time series data)
    np.random.seed(42)
    time_series_data = pd.read_csv(r"..\data/processed/cleaned_up_patients/p12.csv")
    time_series_data.set_index('time', inplace=True)
    
    #print(time_series_data.head)
    #print(time_series_data.shape)

    model = TimeSeriesTransformer(input_size=1, d_model=16, nhead=2, num_layers=2, window_length=50, forecast_horizon=6, epochs=1)

    #print(model)
    # Fit the model on time series data
    model.fit(time_series_data)

    # Save the model after training
    #model.save_model("trained_transformer_model.pth")

    # Forecast on the last portion of the dataset (test set)
    test_set = time_series_data[-12:]  # Use the last 12 points as a test set
    predictions = model.forecast(test_set)

    print("Predictions:", predictions)

    # Load the model if needed
    model.load_model("trained_transformer_model.pth")

<bound method NDFrame.head of                       bg
time                    
2024-10-02 10:25:00  4.1
2024-10-02 11:25:00  6.4
2024-10-02 12:25:00  8.1
2024-10-02 13:25:00  5.8
2024-10-02 14:25:00  6.3
...                  ...
2025-01-01 19:25:00  7.7
2025-01-01 20:25:00  8.6
2025-01-01 21:25:00  7.7
2025-01-01 22:25:00  6.2
2025-01-01 23:25:00  9.3

[2198 rows x 1 columns]>
(2198, 1)
TimeSeriesTransformer(
  (input_projection): Linear(in_features=1, out_features=16, bias=True)
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-1): 2 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=16, out_features=16, bias=True)
          )
          (linear1): Linear(in_features=16, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=16, bias=True)
          (norm1): Laye



Fold 4 Loss: 56.515098571777344
Fold 5:
Train Data Tensor Shape: torch.Size([50, 1])
Test Data Tensor Shape: torch.Size([1, 1])
Fold 5 Loss: 55.038246154785156
Fold 6:
Train Data Tensor Shape: torch.Size([50, 1])
Test Data Tensor Shape: torch.Size([1, 1])
Fold 6 Loss: 54.388851165771484
Fold 7:
Train Data Tensor Shape: torch.Size([50, 1])
Test Data Tensor Shape: torch.Size([1, 1])
Fold 7 Loss: 53.7770881652832
Fold 8:
Train Data Tensor Shape: torch.Size([50, 1])
Test Data Tensor Shape: torch.Size([1, 1])
Fold 8 Loss: 53.63056564331055
Fold 9:
Train Data Tensor Shape: torch.Size([50, 1])
Test Data Tensor Shape: torch.Size([1, 1])
Fold 9 Loss: 53.34636688232422
Fold 10:
Train Data Tensor Shape: torch.Size([50, 1])
Test Data Tensor Shape: torch.Size([1, 1])
Fold 10 Loss: 53.041648864746094
Fold 11:
Train Data Tensor Shape: torch.Size([50, 1])
Test Data Tensor Shape: torch.Size([1, 1])
Fold 11 Loss: 52.514442443847656
Fold 12:
Train Data Tensor Shape: torch.Size([50, 1])
Test Data Tensor S

RuntimeError: Tensors must have same number of dimensions: got 2 and 3

In [3]:
%pip install dask[dataframe]

Note: you may need to restart the kernel to use updated packages.
