# LSTM for Multivariate Time Series Prediction

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

import pytorch_lightning as L
from torch.utils.data import TensorDataset, DataLoader

%load_ext blackcellmagic


## LSTM by Hand

In [8]:
class LSTMbyHand(L.LightningModule):
    def __init__(self):
        super(LSTMbyHand, self).__init__()
        mean = torch.tensor(0.0)
        std = torch.tensor(1.0)

        self.wlr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wlr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.blr1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)

        self.wpr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wpr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bpr1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)

        self.wp1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wp2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bp1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)

        self.wo1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wo2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bo1 = nn.Parameter(torch.tensor(0.0), requires_grad=True)

    def lstm_unit(self, input_value, long_memory, short_memory):
        long_remember_percent = torch.sigmoid(
            (short_memory * self.wlr1) + (input_value * self.wlr2) + self.blr1
        )

        potential_remember_percent = torch.sigmoid(
            (short_memory * self.wpr1) + (input_value * self.wpr2) + self.bpr1
        )

        potential_memory = torch.tanh(
            (short_memory * self.wp1) + (input_value * self.wp2) + self.bp1
        )

        updated_long_memory = (long_memory * long_remember_percent) + (
            potential_remember_percent * potential_memory
        )

        output_percent = torch.sigmoid(
            (short_memory * self.wo1) + (input_value * self.wo2) + self.bo1
        )

        updated_short_memory = torch.tanh(updated_long_memory) * output_percent

        return updated_long_memory, updated_short_memory

    def forward(self, input):
        long_memory = 0
        short_memory = 0
        day1 = input[0]
        day2 = input[1]
        day3 = input[2]
        day4 = input[3]

        long_memory, short_memory = self.lstm_unit(day1, long_memory, short_memory)
        long_memory, short_memory = self.lstm_unit(day2, long_memory, short_memory)
        long_memory, short_memory = self.lstm_unit(day3, long_memory, short_memory)
        long_memory, short_memory = self.lstm_unit(day4, long_memory, short_memory)

        return short_memory

    def configure_optimizers(self):
        return Adam(self.parameters())

    def training_step(self, batch, batch_idx):
        input_i, label_i = batch
        output_i = self.forward(input_i[0])
        loss = (output_i - label_i) ** 2

        self.log("train_loss", loss)

        # log for first company
        if label_i == 0:
            self.log("out_0", output_i)
        # log for second company
        else:
            self.log("out_1", output_i)
        return loss

In [10]:
# instantiate model
model = LSTMbyHand()

# create input data for the two companies
inputs = torch.tensor([[0., 0.5, 0.25, 1.], [1., 0.5, 0.25, 1.]])
labels = torch.tensor([0., 1.])

# create tensor dataset
dataset = TensorDataset(inputs, labels)
dataloader = DataLoader(dataset)

In [11]:
trainer = L.Trainer(max_epochs = 2000)
trainer.fit(model, train_dataloaders=dataloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Missing logger folder: /home/janhsc/Documents/Projects/MachineLearning/MachineLearning/DeepLearning/LSTM/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params
------------------------------
------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=2000` reached.


In [13]:
path_to_best_checkpoint = trainer.checkpoint_callback.best_model_path

trainer = L.Trainer(max_epochs=3000)
trainer.fit(model, train_dataloaders=dataloader, ckpt_path=path_to_best_checkpoint)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/janhsc/Documents/Projects/MachineLearning/MachineLearning/DeepLearning/LSTM/lightning_logs/version_0/checkpoints/epoch=1999-step=4000.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type | Params
------------------------------
------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model params size (MB)
Restored all states from the check

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=3000` reached.


In [16]:
print(
    f"Company A: Observed = 0, Predicted = {model(torch.tensor([0., 0.5, 0.25, 1.])).detach()}"
)  # detach gradients
print(
    f"Company B: Observed = 1, Predicted = {model(torch.tensor([1., 0.5, 0.25, 1.])).detach()}"
)  # detach gradients

Company A: Observed = 0, Predicted = 0.00031406752532348037
Company B: Observed = 1, Predicted = 0.9666670560836792


In [None]:
class LightningLSTM(L.LightningModule):
    def __init__(self, input_size, hidden_size, num_layers, output_size, noise_stddev = 0.01):
        super(LightningLSTM, self).__init__()
        self.hidden = hidden_size
        self.num_layers = num_layers
        
        # Define LSTM layer with noise
        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, batch_first = True)
        self.add_noise_to_weights(self.lstm, noise_stddev)
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        
    def add_noise_to_weights(self, layer, noise_stddev):
        for param in layer.parameters():
            if param.requires_grad:
                param.data += torch.randn_like(param) * noise_stddev
                
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        # input.view(len(input), 1)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out
    
    def configure_optimizers