# LSTM for Multivariate Time Series Prediction

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torchmetrics import MeanAbsoluteError
from torch.utils.data import Dataset
import optuna
import pickle
import matplotlib.pyplot as plt
plt.style.use("seaborn-whitegrid")

from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
from skimpy import clean_columns
from tqdm.notebook import tqdm


import pytorch_lightning as L
from torch.utils.data import TensorDataset, DataLoader

%load_ext blackcellmagic


2023-09-19 21:43:19.770948: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  plt.style.use("seaborn-whitegrid")


## LSTM by Hand

In [2]:
power = clean_columns(pd.read_csv('powerconsumption.csv'))
power['date_time'] = pd.to_datetime(power['date_time'])
power['month'] = power['date_time'].dt.month
power['day'] = power['date_time'].dt.day
power['hour'] = power['date_time'].dt.hour

In [3]:
X = power.drop(['date_time', 'zone_1_power_consumption', 'zone_2_power_consumption', 'zone_3_power_consumption'], axis=1)
y = power['zone_1_power_consumption']
train_size = int(0.8 * X.shape[0])
val_size = int(0.1 * X.shape[0])
test_size = X.shape[0] - train_size - val_size


sc = StandardScaler()

X_train = X.iloc[:train_size, :]
X_val = X.iloc[train_size:train_size + val_size, :]
X_test = X.iloc[train_size + val_size:, :]

y_train = y.iloc[:train_size]
y_val = y.iloc[train_size:train_size + val_size]
y_test = y.iloc[train_size + val_size:]

X_train = sc.fit_transform(X_train)
X_val = sc.transform(X_val)
X_test = sc.transform(X_test)

In [4]:
class LightningLSTM(L.LightningModule):
    def __init__(
        self,
        input_size,
        hidden_size,
        num_layers,
        output_size,
        lr=0.01,
        noise_stddev=0.01,
    ):
        super(LightningLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.noise_stddev = noise_stddev
        self.lr = lr

        # Define LSTM layer with noise
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=self.hidden_size,
            num_layers=self.num_layers,
            batch_first=True,
        )
        self.add_noise_to_weights(self.lstm, self.noise_stddev)

        # Fully connected layer
        self.fc = nn.Linear(self.hidden_size, output_size)

        # Initialize MAE metric
        self.mae = MeanAbsoluteError()

    def add_noise_to_weights(self, layer, noise_stddev):
        for param in layer.parameters():
            if param.requires_grad:
                param.data += torch.randn_like(param) * noise_stddev

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        # input.view(len(input), 1)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

    def configure_optimizers(self):
        return Adam(self.parameters(), self.lr)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)

        # Calculate Mean Absolute Error
        loss = nn.functional.l1_loss(y_pred, y)
        mae = self.mae(y_pred, y)

        # Log the MAE to the training progress bar
        self.log("train/train_mae", mae, on_step=True, on_epoch=False)

        return {"loss": loss, "mae": mae}
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        mae = self.mae(y_pred, y)
        
        # Log the MAE to the validation progress bar
        self.log("val/val_mae", mae, on_step=True, on_epoch=False)
        
        return {"val_mae": mae}
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        mae = self.mae(y_pred, y)
        
        # Log the MAE to the test progress bar
        self.log("test/test_mae", mae, on_step=True, on_epoch=False)
        
        return {"test_mae": mae}
        

In [5]:
# Set the default data type for torch tensors to float32
torch.set_default_dtype(torch.float32)

# Create Sequences and Targets
def create_sequences(X, y, sequence_length, target_length):
    """Create sequences and targets from data.

    Args:
        X (np.ndarray): Input data.
        y (pd.Series): Target data.
        sequence_length (int): Length of the sequence.
        target_length (int): Length of the target/forecasting horizon in periods.

    Returns:
        _type_: _description_
    """
    y = y.to_numpy()
    sequences = []
    targets = []
    for i in tqdm(range(X.shape[0] - sequence_length - target_length + 1)):
        seq = X[i:i + sequence_length, :]
        target = y[i + sequence_length:i + sequence_length + target_length]
        sequences.append(seq)
        targets.append(target)
    sequences = np.array(sequences)
    targets = np.array(targets)
    
    return torch.tensor(sequences, dtype = torch.float32), torch.tensor(targets, dtype = torch.float32)

X_train_seq, y_train_seq = create_sequences(X_train, y_train, 400, 6)
X_val_seq, y_val_seq = create_sequences(X_val, y_val, 400, 6)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, 400, 6)

  0%|          | 0/41527 [00:00<?, ?it/s]

  0%|          | 0/4836 [00:00<?, ?it/s]

  0%|          | 0/4838 [00:00<?, ?it/s]

In [6]:
class CustomDataset(Dataset):
    def __init__(self, inputs, targets):
        """
        Args:
            inputs (list): List of input sequences (PyTorch tensors or NumPy arrays).
            targets (list): List of target sequences (PyTorch tensors or NumPy arrays).
        """
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        input_sequence = self.inputs[idx]
        target_sequence = self.targets[idx]

        return input_sequence, target_sequence

In [7]:
train_set = CustomDataset(X_train_seq, y_train_seq)
val_set = CustomDataset(X_val_seq, y_val_seq)
test_set = CustomDataset(X_test_seq, y_test_seq)

train_loader = DataLoader(train_set, batch_size=32, num_workers=28, shuffle=True)
val_loader = DataLoader(val_set, batch_size=64, num_workers=28, shuffle=False)
test_loader = DataLoader(test_set, batch_size=64, num_workers=28, shuffle=False)

In [16]:
def objective(trial):
    """Objective function for Optuna optimization.

    Args:
        trial (int): Trial number.

    Returns:
        float: Metric to minimize or maximize.
    """
    # Sample hyperparameters
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-1)
    hidden_size = trial.suggest_int("hidden_size", 16, 256)
    num_layers = trial.suggest_int("num_layers", 1, 3)
    noise_stddev = trial.suggest_loguniform("noise_stddev", 1e-5, 1e-1)

    # Create a Lightning model with the sampled hyperparameters
    model = LightningLSTM(8, hidden_size, num_layers, 6, lr, noise_stddev)

    # Create a PyTorch Lightning Trainer
    trainer = L.Trainer(max_epochs = 1)  # Adjust the number of epochs as needed

    # Train the model
    trainer.fit(model, train_loader, val_loader)

    # Return the metric you want to optimize (e.g., validation loss)
    val_mae = trainer.logged_metrics.get("val/val_mae").item()
    return val_mae

In [17]:
if __name__ == "__main__":
    torch.set_float32_matmul_precision('medium')
    study = optuna.create_study(direction="minimize")  # or "maximize" for maximizing a metric
    study.optimize(objective, n_trials=10)  # Adjust the number of trials as needed

    best_params = study.best_params
    best_value = study.best_value
    print(f"Best hyperparameters: {best_params}")
    print(f"Best validation loss: {best_value}")

[I 2023-09-19 21:47:49,560] A new study created in memory with name: no-name-416745d0-7283-4961-9a85-b92044939c44

suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.


suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type              | Params
-------------------------------------------
0 | lstm | LSTM              | 355 K 
1 | fc   | Linear            | 798   
2 | mae  | MeanAbsoluteError | 0     
-------------------------------------------
356 K     Trainable params
0         Non-trainable params
356 K    

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
[I 2023-09-19 21:48:30,785] Trial 0 finished with value: 26871.859375 and parameters: {'lr': 0.00564136235325736, 'hidden_size': 132, 'num_layers': 3, 'noise_stddev': 0.010836524096985647}. Best is trial 0 with value: 26871.859375.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type              | Params
-------------------------------------------
0 | lstm | LSTM              | 318 K 
1 | fc   | Linear            | 972   
2 | mae  | MeanAbsoluteError | 0     
-------------------------------------------
319 K     Trainable params
0         Non-trainable params
319 K     Total params
1.279     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
[I 2023-09-19 21:49:15,679] Trial 1 finished with value: 25409.228515625 and parameters: {'lr': 0.027100168305661875, 'hidden_size': 161, 'num_layers': 2, 'noise_stddev': 4.68421086167117e-05}. Best is trial 1 with value: 25409.228515625.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type              | Params
-------------------------------------------
0 | lstm | LSTM              | 181 K 
1 | fc   | Linear            | 1.3 K 
2 | mae  | MeanAbsoluteError | 0     
-------------------------------------------
182 K     Trainable params
0         Non-trainable params
182 K     Total params
0.731     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
[I 2023-09-19 21:49:33,455] Trial 2 finished with value: 27682.267578125 and parameters: {'lr': 0.00044004929696713245, 'hidden_size': 208, 'num_layers': 1, 'noise_stddev': 0.001100378602111467}. Best is trial 1 with value: 25409.228515625.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type              | Params
-------------------------------------------
0 | lstm | LSTM              | 33.7 K
1 | fc   | Linear            | 312   
2 | mae  | MeanAbsoluteError | 0     
-------------------------------------------
34.0 K    Trainable params
0         Non-trainable params
34.0 K    Total params
0.136     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
[I 2023-09-19 21:49:40,747] Trial 3 finished with value: 27799.17578125 and parameters: {'lr': 0.00010798237416530305, 'hidden_size': 51, 'num_layers': 2, 'noise_stddev': 0.007174401916578016}. Best is trial 1 with value: 25409.228515625.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type              | Params
-------------------------------------------
0 | lstm | LSTM              | 122 K 
1 | fc   | Linear            | 600   
2 | mae  | MeanAbsoluteError | 0     
-------------------------------------------
122 K     Trainable params
0         Non-trainable params
122 K     Total params
0.492     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
[I 2023-09-19 21:49:51,297] Trial 4 finished with value: 23620.953125 and parameters: {'lr': 0.05555767555497658, 'hidden_size': 99, 'num_layers': 2, 'noise_stddev': 4.3619881691565234e-05}. Best is trial 4 with value: 23620.953125.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type              | Params
-------------------------------------------
0 | lstm | LSTM              | 732 K 
1 | fc   | Linear            | 1.1 K 
2 | mae  | MeanAbsoluteError | 0     
-------------------------------------------
733 K     Trainable params
0         Non-trainable params
733 K     Total params
2.935     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
[I 2023-09-19 21:50:58,031] Trial 5 finished with value: 27685.5078125 and parameters: {'lr': 0.00047642561674444625, 'hidden_size': 190, 'num_layers': 3, 'noise_stddev': 4.928146132733536e-05}. Best is trial 4 with value: 23620.953125.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type              | Params
-------------------------------------------
0 | lstm | LSTM              | 8.3 K 
1 | fc   | Linear            | 120   
2 | mae  | MeanAbsoluteError | 0     
-------------------------------------------
8.4 K     Trainable params
0         Non-trainable params
8.4 K     Total params
0.034     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
[I 2023-09-19 21:51:05,940] Trial 6 finished with value: 27807.076171875 and parameters: {'lr': 3.776227766019253e-05, 'hidden_size': 19, 'num_layers': 3, 'noise_stddev': 0.004621973492486963}. Best is trial 4 with value: 23620.953125.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type              | Params
-------------------------------------------
0 | lstm | LSTM              | 725 K 
1 | fc   | Linear            | 1.1 K 
2 | mae  | MeanAbsoluteError | 0     
-------------------------------------------
726 K     Trainable params
0         Non-trainable params
726 K     Total params
2.905     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
[I 2023-09-19 21:52:12,501] Trial 7 finished with value: 22457.986328125 and parameters: {'lr': 0.04627046977952241, 'hidden_size': 189, 'num_layers': 3, 'noise_stddev': 0.0019682730259753126}. Best is trial 7 with value: 22457.986328125.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type              | Params
-------------------------------------------
0 | lstm | LSTM              | 691 K 
1 | fc   | Linear            | 1.4 K 
2 | mae  | MeanAbsoluteError | 0     
-------------------------------------------
692 K     Trainable params
0         Non-trainable params
692 K     Total params
2.770     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
[I 2023-09-19 21:52:46,322] Trial 8 finished with value: 27800.048828125 and parameters: {'lr': 1.598405704251436e-05, 'hidden_size': 238, 'num_layers': 2, 'noise_stddev': 0.011071791740733921}. Best is trial 7 with value: 22457.986328125.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type              | Params
-------------------------------------------
0 | lstm | LSTM              | 851 K 
1 | fc   | Linear            | 1.2 K 
2 | mae  | MeanAbsoluteError | 0     
-------------------------------------------
853 K     Trainable params
0         Non-trainable params
853 K     Total params
3.413     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
[I 2023-09-19 21:53:20,440] Trial 9 finished with value: 27801.259765625 and parameters: {'lr': 1.4618340153704103e-05, 'hidden_size': 205, 'num_layers': 3, 'noise_stddev': 0.001459162870980104}. Best is trial 7 with value: 22457.986328125.


Best hyperparameters: {'lr': 0.04627046977952241, 'hidden_size': 189, 'num_layers': 3, 'noise_stddev': 0.0019682730259753126}
Best validation loss: 22457.986328125


In [24]:
# Plot the optimization history
optuna.visualization.plot_optimization_history(study)

In [25]:
# Plot the parameter importances
optuna.visualization.plot_param_importances(study)

In [26]:
# Plot the slice plot (useful for high-dimensional spaces)
optuna.visualization.plot_slice(study)

In [21]:
best_model = LightningLSTM(
    8,
    best_params["hidden_size"],
    best_params["num_layers"],
    6,
    best_params["lr"],
    best_params["noise_stddev"],
)

# Create a Lightning Trainer and evaluate the model on the test dataset
trainer = L.Trainer()
trainer.test(best_model, test_loader)

test_mae = trainer.logged_metrics.get("test/test_mae").item()
test_mae

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

35662.4296875

In [22]:
# save the best model
model_filename = "best_LSTM_model.pkl"
with open(model_filename, 'wb') as model_file:
    pickle.dump(best_model, model_file)