# Black Box Model 2: LSTM in PyTorch

In [1]:
import pandas as pd
from tqdm.notebook import tqdm
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import TensorDataset, Dataset, DataLoader
import lightning as L

from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

%load_ext blackcellmagic

## Data Preparation

In [2]:
df = pd.read_csv("../00 Data Retrieval and Cleaning/0_df_final_de-ch.csv")

In [18]:
list(df.columns)

['date',
 'dst',
 'auction_price',
 'allocatedCapacity',
 'ATC',
 'day_ahead_price_at',
 'day_ahead_price_ch',
 'day_ahead_price_de',
 'day_ahead_price_fr',
 'actual_load_at',
 'actual_load_ch',
 'actual_load_de',
 'actual_load_fr',
 'actual_load_it',
 'solar_forecast_at',
 'wind_onshore_forecast_at',
 'solar_forecast_ch',
 'wind_onshore_forecast_ch',
 'solar_forecast_de',
 'wind_onshore_forecast_de',
 'wind_offshore_forecast_de',
 'solar_forecast_fr',
 'wind_onshore_forecast_fr',
 'wind_offshore_forecast_fr',
 'solar_forecast_it',
 'wind_onshore_forecast_it',
 'biomass_actual_aggregated_at',
 'biomass_actual_consumption_at',
 'fossil_gas_actual_aggregated_at',
 'fossil_gas_actual_consumption_at',
 'fossil_hard_coal_actual_aggregated_at',
 'fossil_hard_coal_actual_consumption_at',
 'fossil_oil_actual_aggregated_at',
 'fossil_oil_actual_consumption_at',
 'geothermal_actual_aggregated_at',
 'geothermal_actual_consumption_at',
 'hydro_pumped_storage_actual_aggregated_at',
 'hydro_pumped_s

In [14]:
X = df.filter(["auction_price", "day_ahead_price_ch", "actual_load_at"]).dropna()
y = df.filter(["auction_price", "day_ahead_price_ch", "actual_load_at"]).dropna().auction_price

train_size = int(0.6 * X.shape[0])
val_size = int(0.2 * X.shape[0])
test_size = X.shape[0] - train_size - val_size

sc = StandardScaler()

X_train = X.iloc[:train_size, :]
X_val = X.iloc[train_size:train_size + val_size, :]
X_test = X.iloc[train_size + val_size:, :]

y_train = y.iloc[:train_size].to_numpy()
y_val = y.iloc[train_size:train_size + val_size].to_numpy()
y_test = y.iloc[train_size + val_size:].to_numpy()

X_train = sc.fit_transform(X_train)
X_val = sc.transform(X_val)
X_test = sc.transform(X_test)

In [15]:
X

Unnamed: 0,auction_price,day_ahead_price_ch,actual_load_at
0,36.29,35.70,6544.50
1,35.46,43.07,7040.75
2,47.78,55.11,6225.25
3,9.88,64.80,8747.50
4,8.00,64.29,8830.50
...,...,...,...
44569,9.56,74.04,6501.50
44570,0.40,79.81,7284.25
44571,3.51,70.36,6420.50
44572,10.19,68.20,6039.25


## Model

In [238]:
class LightningLSTM(L.LightningModule):
    def __init__(self, input_size, hidden_size, num_layers, output_size, lr=0.01,
                 noise_std=0.01):
        super().__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.noise_std = noise_std
        self.lr = lr

        # Define LSTM layer
        self.lstm = nn.LSTM(
            input_size=input_size,  # number of features in input data
            hidden_size=self.hidden_size,  # number of output values
            num_layers=self.num_layers,
            batch_first=True,
        )
        self.add_noise_to_weights(self.lstm, self.noise_std)

        # Define fully connected layer
        self.fc = nn.Linear(self.hidden_size, output_size)

    def add_noise_to_weights(self, layer, std):
        for param in layer.parameters():
            if param.requires_grad:
                param.data.add_(torch.randn(param.size()) * std)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        
        return out
    
    def configure_optimizers(self):
        return Adam(self.parameters(), self.lr)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)

        # Calculate evaluation metric
        loss = nn.functional.l1_loss(y_pred, y)
        eval_metric = mean_absolute_error(y_pred.detach().numpy(), y.detach().numpy())

        # Log the evaluation metric to the training progress bar
        self.log("train/train_eval_metric", eval_metric, on_step=True, on_epoch=False)

        return {"loss": loss, "eval_metric": eval_metric}
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        eval_metric = mean_absolute_error(y_pred.detach().numpy(), y.detach().numpy())
        
        # Log the evaluation metric to the validation progress bar
        self.log("val/val_eval_metric", eval_metric, on_step=True, on_epoch=False)
        
        return {"val_eval_metric": eval_metric}
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        eval_metric = mean_absolute_error(y_pred.detach().numpy(), y.detach().numpy())
        
        # Log the evaluation metric to the validation progress bar
        self.log("val/test_eval_metric", eval_metric, on_step=True, on_epoch=False)
        
        return {"test_eval_metric": eval_metric}


In [239]:
# Set the default data type for torch tensors to float32
torch.set_default_dtype(torch.float32)

In [240]:
# Create Sequences and Targets
def create_sequences(X, y, sequence_length, target_length):
    """Create sequences and targets from data.

    Args:
        X (np.ndarray): Input data.
        y (pd.Series): Target data.
        sequence_length (int): Length of the sequence.
        target_length (int): Length of the target/forecasting horizon in periods.

    Returns:
        _type_: _description_
    """
    sequences = []
    targets = []
    for i in tqdm(range(X.shape[0] - sequence_length - target_length + 1)):
        # seq = X[i:i + sequence_length, :]
        # target = y[i + sequence_length:i + sequence_length + target_length]
        seq = X[i:(i + sequence_length + target_length), :]
        target = y[(i + sequence_length):(i + sequence_length + target_length)]
        sequences.append(seq)
        targets.append(target)
    sequences = np.array(sequences)
    targets = np.array(targets)
    
    return torch.tensor(sequences, dtype = torch.float32), torch.tensor(targets, dtype = torch.float32)

In [241]:
X_train_seq, y_train_seq = create_sequences(X_train, y_train, 48, 24)
X_val_seq, y_val_seq = create_sequences(X_val, y_val, 48, 24)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, 48, 24)

  0%|          | 0/26635 [00:00<?, ?it/s]

  0%|          | 0/8831 [00:00<?, ?it/s]

  0%|          | 0/8831 [00:00<?, ?it/s]

In [242]:
class CustomDataset(Dataset):
    def __init__(self, inputs, targets):
        """
        Args:
            inputs (list): List of input sequences (PyTorch tensors or NumPy arrays).
            targets (list): List of target sequences (PyTorch tensors or NumPy arrays).
        """
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        input_sequence = self.inputs[idx]
        target_sequence = self.targets[idx]

        return input_sequence, target_sequence

In [243]:
X.size

133530

In [244]:
train_set = CustomDataset(X_train_seq, y_train_seq)
val_set = CustomDataset(X_val_seq, y_val_seq)
test_set = CustomDataset(X_test_seq, y_test_seq)

train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=64, shuffle=False)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

In [172]:
# Create a Lightning model with the sampled hyperparameters
model = LightningLSTM(
    input_size=X.shape[1],
    hidden_size=128,
    num_layers=2,
    output_size=24,
    lr=0.01,
    noise_std=0.01,
)

model

LightningLSTM(
  (lstm): LSTM(3, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=24, bias=True)
)

In [173]:
# Create a PyTorch Lightning Trainer
trainer = L.Trainer(max_epochs=10)  # Adjust the number of epochs as needed

# Train the model
trainer.fit(model, train_loader, val_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name | Type   | Params
--------------------------------
0 | lstm | LSTM   | 200 K 
1 | fc   | Linear | 3.1 K 
--------------------------------
203 K     Trainable params
0         Non-trainable params
203 K     Total params
0.813     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\mathi\miniconda3\envs\general\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
c:\Users\mathi\miniconda3\envs\general\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [174]:
# Return the metric you want to optimize (e.g., validation loss)
trainer.logged_metrics.get("val/val_eval_metric").item()

4.1978440284729

### Making Predictions on the Test Period

In [229]:
X_test_seq.shape

model.eval()

with torch.no_grad():
    y_hat = model(X_test_seq)

In [246]:
model.freeze()

In [249]:
predictions = []
labels = []

for item in tqdm(test_set):
    print(item)

  0%|          | 0/8831 [00:00<?, ?it/s]

(tensor([[-0.4601, -0.2958,  1.8238],
        [-0.4570, -0.3895,  1.8395],
        [-0.4106, -0.3389,  1.4957],
        [-0.4562, -0.5389, -0.0859],
        [-0.4556, -0.2325,  2.0883],
        [-0.4300, -0.3453,  1.8918],
        [-0.3563, -0.3441,  1.6262],
        [-0.3293, -0.4772,  0.8674],
        [-0.4420, -0.5012,  0.8221],
        [-0.4028, -0.6003, -0.3028],
        [-0.4250, -0.6423, -0.4456],
        [-0.4412, -0.5714,  0.3156],
        [-0.3196, -0.6036, -0.0782],
        [-0.4069, -0.4806,  0.7563],
        [-0.2876, -0.5957,  0.0715],
        [-0.3010, -0.6389, -0.1391],
        [-0.2220, -0.4482,  1.9271],
        [-0.1794, -0.4744,  1.3923],
        [-0.2470, -0.4655,  1.5475],
        [-0.1942, -0.5011,  0.8196],
        [-0.1263, -0.5052,  0.6633],
        [-0.1914, -0.6249, -0.2529],
        [-0.1358, -0.4790,  1.7077],
        [-0.4253, -0.4934,  1.4569],
        [-0.4303, -0.4287,  1.3655],
        [-0.1116, -0.5283,  0.9713],
        [-0.2014, -0.5148,  1.3568],


KeyboardInterrupt: 