In [1]:
import pandas as pd
import sys
import torch
from torch.utils.data import DataLoader

# Add your system path here
sys_path = r'C:\Users\Max Tost\Desktop\Notebooks\PowerPrediction\ml-project-2-powerpredictors'
sys.path.append(sys_path)

from helpers import *

### Loading the data in a dataframe

In [None]:
path_x = sys_path + r'\data\X_small.csv'
data_clean = pd.read_csv(path_x)
data_clean

Unnamed: 0.1,Unnamed: 0,power_consumption,ghi,temp,wind,is_weekend,is_spring,is_summer,is_autumn,is_winter,is_holiday,is_daylight
0,0,0.218368,2.318664,0.448423,-0.837392,0,1,0,0,0,0,1
1,1,-0.111426,2.091075,0.567245,-0.368104,0,1,0,0,0,0,1
2,2,-0.166392,1.985427,0.692455,0.114987,0,1,0,0,0,0,1
3,3,-0.035848,1.270273,0.766559,0.018369,0,1,0,0,0,0,1
4,4,0.307687,1.713181,0.799778,-0.174867,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
19095,19095,-1.064181,-0.659829,-0.057527,-0.230078,0,0,1,0,0,0,0
19096,19096,-1.058483,-0.659829,-0.090746,-0.216275,0,0,1,0,0,0,0
19097,19097,-1.025229,-0.659829,-0.076692,-0.257683,0,0,1,0,0,0,0
19098,19098,-0.764142,-0.491930,0.053629,-0.547537,0,0,1,0,0,0,0


In [5]:
data = torch.from_numpy(data_clean.to_numpy()) # Convert to torch tensor
data

tensor([[ 0.0000e+00,  2.1837e-01,  2.3187e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0000e+00],
        [ 1.0000e+00, -1.1143e-01,  2.0911e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0000e+00],
        [ 2.0000e+00, -1.6639e-01,  1.9854e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0000e+00],
        ...,
        [ 1.9097e+04, -1.0252e+00, -6.5983e-01,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.9098e+04, -7.6414e-01, -4.9193e-01,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.9099e+04, -2.9126e-01, -1.3911e-01,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]], dtype=torch.float64)

## Creating features and targets with Dataloader to train the network
Here we will cut the whole data in slices of 7 days, which will be the features. \
The value of the power for the first hour of the 8th day should be the target. \
Then we will save them as features and targets to use them with pytorch

In [None]:
from torch.utils.data import Dataset

class MultiTimeSeriesDataset(Dataset):
    def __init__(self, dataset, seq_len=168): # 72 = 3*24 = hours in three days, since otherwise the RNN might have problems ..
        """
        Args:
            datasets (list of numpy.ndarray): List of time series datasets, 
                each of shape (n_hours, n_features).
            seq_len (int): Length of the input sequence.
        """
        self.data = []
        assert dataset.shape[0] > seq_len
        for i in range(dataset.shape[0] - seq_len):
            # Create input-output pairs for each dataset
            x = dataset[i:i + seq_len]
            y = dataset[i + seq_len]
            self.data.append((x, y))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x, y = self.data[idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

In [10]:
# Creating datasets
train_dataset = MultiTimeSeriesDataset(data[:int(0.8*len(data))]) # Using first 80% for training
val_dataset = MultiTimeSeriesDataset(data[int(0.8*len(data)):]) # Using last 20% for evaluation 

# DataLoader for batching
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=True)

## Creating features and Targets manually

## Setting up the Network


In [11]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout_prob):
        """
        Initialize the LSTM-based regression model.

        Args:
            input_size (int): Number of input features (e.g., temperature, GHI, etc.).
            hidden_size (int): Number of units in each LSTM layer.
            num_layers (int): Number of stacked LSTM layers.
            output_size (int): Number of output features (e.g., predicted demand, 1 for regression).
            dropout_prob (float): Dropout probability to apply between LSTM layers and before the fully connected layer.
        """
        super(LSTMModel, self).__init__()

        # LSTM Layer
        # - Processes sequential data and learns temporal dependencies.
        # - Supports multiple layers (num_layers) and applies dropout between layers.
        self.lstm = nn.LSTM(
            input_size=input_size, 
            hidden_size=hidden_size, 
            num_layers=num_layers,  # Input/output shape: (batch_size, seq_length, input_size)
            dropout=dropout_prob,
            batch_first=True
        )

        # Fully Connected (Linear) Layer
        # - Maps the LSTM's hidden state output to the desired output size.
        self.fc = nn.Linear(hidden_size, output_size)

        # Dropout Layer
        # - Reduces overfitting by randomly zeroing some activations during training.
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, x):
        """
        Forward pass for the LSTM model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, seq_length, input_size).

        Returns:
            torch.Tensor: Output predictions of shape (batch_size, output_size).
        """
        # LSTM Layer
        # - Returns the full sequence of hidden states and the final hidden/cell state tuple.
        # - We ignore the hidden/cell state tuple here (h_n, c_n).
        _ , (h_f, c_f) = self.lstm(x)

        # Dropout Layer
        # - Only uses the hidden state from the last time step for prediction.
        # - Applies dropout to prevent overfitting.
        out = self.dropout(h_f)  # Shape: (batch_size, hidden_size)

        # Fully Connected Layer
        # - Maps the LSTM's output to the desired output size (e.g., single regression output).
        out = self.fc(out)  # Shape: (batch_size, output_size)

        return out

## Training Loop

In [None]:
# 5 min per epoch here on this cpu, validation loss afer 1 epoch: 154743.1982 (Whatever that means)
import torch.optim as optim

num_epochs = 1

# Initialize model, loss function, and optimizer
model = LSTMModel(input_size=(8, hidden_size=64, num_layers=2, output_size=8, dropout_prob=0.2))
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(num_epochs):
    model.train()
    for x, y in train_loader:
        optimizer.zero_grad()

        # Reset hidden state between sequences
        output, hidden = model.lstm(x)
        output = model.fc(output[:, -1, :])  # Take last output for regression
        #assert output.shape == y.shape
        # Compute loss
        loss = criterion(output, y)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    # Validation (optional)
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for x_val, y_val in val_loader:
            output, _ = model.lstm(x_val)  # Reset hidden state for validation
            val_output = model.fc(output[:, -1, :])
            val_loss += criterion(val_output, y_val).item()
    val_loss /= len(val_loader)
    print(f"Epoch {epoch + 1}, Validation Loss: {val_loss:.4f}")


  return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)


KeyboardInterrupt: 

## Prediction - TO DO

In [144]:
def evaluate_model(model, initial_data, num_predictions=24):
    """
    Evaluate the LSTM to predict the next 24 hours recursively.

    Args:
        model (LSTMModel): Trained LSTM model.
        initial_data (torch.Tensor): Data from the last 7 days (shape: [168, num_features]).
        num_predictions (int): Number of hours to predict (default: 24).

    Returns:
        torch.Tensor: Predicted values for the next 24 hours (shape: [24, num_features]).
    """
    model.eval()
    predictions = []

    # Initialize hidden state with the last 7 days
    with torch.no_grad():
        input_seq = initial_data.unsqueeze(0)  # Shape: [1, seq_len=168, num_features]
        hidden = None  # Let the LSTM initialize hidden state

        # Process the last 7 days to initialize hidden state
        for t in range(initial_data.size(0)):
            _, hidden = model.lstm(input_seq[:, t:t+1, :], hidden)

        # Recursive prediction for the next 24 hours
        last_input = initial_data[-1, :].unsqueeze(0).unsqueeze(0)  # Shape: [1, 1, num_features]
        for _ in range(num_predictions):
            output, hidden = model.lstm(last_input, hidden)  # Predict next hour
            prediction = model.fc(output[:, -1, :])  # Map hidden state to output
            predictions.append(prediction.squeeze(0))

            # Use the predicted value as the next input
            last_input = prediction.unsqueeze(0).unsqueeze(0)

    return torch.stack(predictions)  # Shape: [24, num_features]

# Example usage
last_week_data = torch.tensor(data[-168:], dtype=torch.float32)  # Last 7 days of data
predictions = evaluate_model(model, last_week_data)
print(predictions)


  last_week_data = torch.tensor(data[-168:], dtype=torch.float32)  # Last 7 days of data


ValueError: LSTM: Expected input to be 2D or 3D, got 4D instead

## Uncertainty - TO DO

In [13]:
def monte_carlo_predictions(model, x, n_simulations):
    """
    Perform Monte Carlo Dropout predictions to estimate both 
    the mean prediction and uncertainty.

    Args:
        model (torch.nn.Module): The trained PyTorch model with dropout layers.
        x (torch.Tensor): Input tensor of shape (batch_size, seq_length, input_features).
        n_simulations (int): Number of stochastic forward passes to perform.

    Returns:
        tuple:
            - mean_pred (torch.Tensor): The mean prediction across all simulations.
              Shape: (batch_size, output_features).
            - uncertainty (torch.Tensor): The standard deviation of predictions 
              (representing uncertainty) across simulations.
              Shape: (batch_size, output_features).
    """
    # Set the model to train mode to enable dropout during inference
    # Dropout layers behave stochastically in train mode, which is necessary for Monte Carlo sampling
    model.train()

    # Perform n_simulations stochastic forward passes
    # Each simulation generates slightly different predictions due to dropout
    preds = torch.stack([model(x) for _ in range(n_simulations)])  # Shape: (n_simulations, batch_size, output_features)

    # Compute the mean prediction across all simulations
    mean_pred = preds.mean(dim=0)  # Shape: (batch_size, output_features)

    # Compute the standard deviation across simulations to estimate uncertainty
    uncertainty = preds.std(dim=0)  # Shape: (batch_size, output_features)

    return mean_pred, uncertainty


In [None]:
test_x = [x for x, y in val_loader] # Extracting x and y from validation sets

In [None]:
uncert = monte_carlo_predictions(model, test_x[0], 100) #finding uncertainty with one prediction

In [24]:
uncert = []
for x_t in test_x:
    uncert.append(monte_carlo_predictions(model, x_t, 100))

KeyboardInterrupt: 

In [19]:
test_x[0].shape

torch.Size([1, 168, 8])

In [21]:
model(test_x[1])

tensor([[ 8.8918e+02,  6.4641e+00,  1.5547e+01,  1.0813e+01, -6.0888e-01,
          2.5608e-01,  2.8174e-01, -4.7669e-01]], grad_fn=<AddmmBackward0>)