In [1]:
import pandas as pd
import sys

# Add your system path here
sys_path = r'C:\Users\Max Tost\Desktop\Notebooks\PowerPrediction\ml-project-2-powerpredictors'
sys.path.append(sys_path)

from helpers import *

### Loading the data in a dataframe

In [14]:
path_x = sys_path + r'\data\X.csv'
x = pd.read_csv(path_x)
x = x.drop('Unnamed: 0', axis=1)

path_y = sys_path + r'\data\Y.csv'
y = pd.read_csv(path_y)
y = y.drop('Unnamed: 0', axis=1)

In [15]:
x

Unnamed: 0,ghi,temp,wind,year,month,day,hour,is_monday,is_tuesday,is_wednesday,...,wind_lag_23,wind_lag_24,wind_diff,ghi_x_temp,ghi_x_wind,temp_x_wind,Temperature_Index,CDD,HDD,wind_power_density
0,733.01,15.20,0.76,2022,4,13,10,0,0,1,...,0.00,0.00,0.00,11141.7520,557.0876,11.5520,6.80,0.00,6.80,0.268873
1,677.00,16.13,1.10,2022,4,13,11,0,0,1,...,0.00,0.00,0.34,10920.0100,744.7000,17.7430,5.87,0.00,12.67,0.815238
2,651.00,17.11,1.45,2022,4,13,12,0,0,1,...,0.00,0.00,0.35,11138.6100,943.9500,24.8095,4.89,0.00,17.56,1.867283
3,475.00,17.69,1.38,2022,4,13,13,0,0,1,...,0.00,0.00,-0.07,8402.7500,655.5000,24.4122,4.31,0.00,21.87,1.609694
4,584.00,17.95,1.24,2022,4,13,14,0,0,1,...,0.00,0.00,-0.14,10482.8000,724.1600,22.2580,4.05,0.00,25.92,1.167807
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19095,0.00,11.24,1.20,2024,6,17,1,1,0,0,...,1.08,0.88,-0.02,0.0000,0.0000,13.4880,10.76,5737.47,202614.07,1.058400
19096,0.00,10.98,1.21,2024,6,17,2,1,0,0,...,1.02,1.08,0.01,0.0000,0.0000,13.2858,11.02,5737.47,202625.09,1.085081
19097,0.00,11.09,1.18,2024,6,17,3,1,0,0,...,0.88,1.02,-0.03,0.0000,0.0000,13.0862,10.91,5737.47,202636.00,1.006357
19098,41.32,12.11,0.97,2024,6,17,4,1,0,0,...,1.17,0.88,-0.21,500.3852,40.0804,11.7467,9.89,5737.47,202645.89,0.559012


In [23]:
x['hour']

0        10
1        11
2        12
3        13
4        14
         ..
19095     1
19096     2
19097     3
19098     4
19099     5
Name: hour, Length: 19100, dtype: int64

## Creating features and targets to train the network
Here we will cut the whole data in slices of 7 days, which will be the features. \
The value of the power for the first hour of the 8th day should be the target. \
Then we will save them as features and targets to use them with pytorch

In [None]:
from torch.utils.data import Dataset

class MultiTimeSeriesDataset(Dataset):
    def __init__(self, data_x, data_y, seq_len=1):
        """
        Args:
            datasets (list of numpy.ndarray): List of time series datasets, 
                each of shape (n_hours, n_features).
            seq_len (int): Length of the input sequence (1 for hour-by-hour training).
        """
        self.data = []
        for data in datasets:
            for i in range(len(data) - seq_len):
                # Create input-output pairs for each dataset
                x = data[i:i + seq_len]
                y = data[i + seq_len]
                self.data.append((x, y))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x, y = self.data[idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

In [None]:
# Example usage:
# Assuming datasets is a list of numpy arrays
multi_dataset = MultiTimeSeriesDataset(datasets=[dataset1, dataset2, dataset3])

# Split into training and validation sets
train_size = int(0.8 * len(multi_dataset))
val_size = len(multi_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(multi_dataset, [train_size, val_size])

# DataLoader for batching
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

## Setting up the Network


In [None]:
import torch
import torch.nn as nn
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout_prob):
        """
        Initialize the LSTM-based regression model.

        Args:
            input_size (int): Number of input features (e.g., temperature, GHI, etc.).
            hidden_size (int): Number of units in each LSTM layer.
            num_layers (int): Number of stacked LSTM layers.
            output_size (int): Number of output features (e.g., predicted demand, 1 for regression).
            dropout_prob (float): Dropout probability to apply between LSTM layers and before the fully connected layer.
        """
        super(LSTMModel, self).__init__()

        # LSTM Layer
        # - Processes sequential data and learns temporal dependencies.
        # - Supports multiple layers (num_layers) and applies dropout between layers.
        self.lstm = nn.LSTM(
            input_size=input_size, 
            hidden_size=hidden_size, 
            num_layers=num_layers, 
            batch_first=True,  # Input/output shape: (batch_size, seq_length, input_size)
            dropout=dropout_prob
        )

        # Fully Connected (Linear) Layer
        # - Maps the LSTM's hidden state output to the desired output size.
        self.fc = nn.Linear(hidden_size, output_size)

        # Dropout Layer
        # - Reduces overfitting by randomly zeroing some activations during training.
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, x):
        """
        Forward pass for the LSTM model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, seq_length, input_size).

        Returns:
            torch.Tensor: Output predictions of shape (batch_size, output_size).
        """
        # LSTM Layer
        # - Returns the full sequence of hidden states and the final hidden/cell state tuple.
        # - We ignore the hidden/cell state tuple here (h_n, c_n).
        out, _ = self.lstm(x)

        # Dropout Layer
        # - Only uses the hidden state from the last time step for prediction.
        # - Applies dropout to prevent overfitting.
        out = self.dropout(out[:, -1, :])  # Shape: (batch_size, hidden_size)

        # Fully Connected Layer
        # - Maps the LSTM's output to the desired output size (e.g., single regression output).
        out = self.fc(out)  # Shape: (batch_size, output_size)

        return out

## Training Loop

In [None]:
import torch.optim as optim

# Initialize model, loss function, and optimizer
model = LSTMModel(input_size=3, hidden_size=64, num_layers=2, output_size=3, dropout_prob=0.2)
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(num_epochs):
    model.train()
    for x, y in train_loader:
        optimizer.zero_grad()

        # Reset hidden state between sequences
        hidden = None  # Allows LSTM to initialize its hidden state
        output, hidden = model.lstm(x, hidden)
        output = model.fc(output[:, -1, :])  # Take last output for regression

        # Compute loss
        loss = criterion(output, y)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    # Validation (optional)
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for x_val, y_val in val_loader:
            output, _ = model.lstm(x_val, None)  # Reset hidden state for validation
            val_output = model.fc(output[:, -1, :])
            val_loss += criterion(val_output, y_val).item()
    val_loss /= len(val_loader)
    print(f"Epoch {epoch + 1}, Validation Loss: {val_loss:.4f}")


## Prediction

In [None]:
def evaluate_model(model, initial_data, num_predictions=24):
    """
    Evaluate the LSTM to predict the next 24 hours recursively.

    Args:
        model (LSTMModel): Trained LSTM model.
        initial_data (torch.Tensor): Data from the last 7 days (shape: [168, num_features]).
        num_predictions (int): Number of hours to predict (default: 24).

    Returns:
        torch.Tensor: Predicted values for the next 24 hours (shape: [24, num_features]).
    """
    model.eval()
    predictions = []

    # Initialize hidden state with the last 7 days
    with torch.no_grad():
        input_seq = initial_data.unsqueeze(0)  # Shape: [1, seq_len=168, num_features]
        hidden = None  # Let the LSTM initialize hidden state

        # Process the last 7 days to initialize hidden state
        for t in range(initial_data.size(0)):
            _, hidden = model.lstm(input_seq[:, t:t+1, :], hidden)

        # Recursive prediction for the next 24 hours
        last_input = initial_data[-1, :].unsqueeze(0).unsqueeze(0)  # Shape: [1, 1, num_features]
        for _ in range(num_predictions):
            output, hidden = model.lstm(last_input, hidden)  # Predict next hour
            prediction = model.fc(output[:, -1, :])  # Map hidden state to output
            predictions.append(prediction.squeeze(0))

            # Use the predicted value as the next input
            last_input = prediction.unsqueeze(0).unsqueeze(0)

    return torch.stack(predictions)  # Shape: [24, num_features]

# Example usage
last_week_data = torch.tensor(data[-168:], dtype=torch.float32)  # Last 7 days of data
predictions = evaluate_model(model, last_week_data)
print(predictions)


## Uncertainty

In [None]:
def monte_carlo_predictions(model, x, n_simulations):
    """
    Perform Monte Carlo Dropout predictions to estimate both 
    the mean prediction and uncertainty.

    Args:
        model (torch.nn.Module): The trained PyTorch model with dropout layers.
        x (torch.Tensor): Input tensor of shape (batch_size, seq_length, input_features).
        n_simulations (int): Number of stochastic forward passes to perform.

    Returns:
        tuple:
            - mean_pred (torch.Tensor): The mean prediction across all simulations.
              Shape: (batch_size, output_features).
            - uncertainty (torch.Tensor): The standard deviation of predictions 
              (representing uncertainty) across simulations.
              Shape: (batch_size, output_features).
    """
    # Set the model to train mode to enable dropout during inference
    # Dropout layers behave stochastically in train mode, which is necessary for Monte Carlo sampling
    model.train()

    # Perform n_simulations stochastic forward passes
    # Each simulation generates slightly different predictions due to dropout
    preds = torch.stack([model(x) for _ in range(n_simulations)])  # Shape: (n_simulations, batch_size, output_features)

    # Compute the mean prediction across all simulations
    mean_pred = preds.mean(dim=0)  # Shape: (batch_size, output_features)

    # Compute the standard deviation across simulations to estimate uncertainty
    uncertainty = preds.std(dim=0)  # Shape: (batch_size, output_features)

    return mean_pred, uncertainty
