In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

import lightning as L
from torch.utils.data import DataLoader, TensorDataset

In [2]:
class LSTMbyHand(L.LightningModule):

    # create and intialize weights and biases
    def __init__(self):
        super().__init__()

        # initializing the normal distribution for the values to be randomly picked
        mean = torch.tensor(0.0)
        std = torch.tensor(1.0)

        # assings a random value to the parameter from the noraml distribution defined above
        self.wlr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wlr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.blr1 = nn.Parameter(torch.tensor(0.), requires_grad=True)

        self.wpr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wpr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bpr1 = nn.Parameter(torch.tensor(0.), requires_grad=True)

        self.wp1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wp2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bp1 = nn.Parameter(torch.tensor(0.), requires_grad=True)

        self.wo1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wo2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.bo1 = nn.Parameter(torch.tensor(0.), requires_grad=True)

    # Do the LSTM math
    def lstm_unit(self, input_value, long_memory, short_memory):
        long_remember_percent = torch.sigmoid((short_memory * self.wlr1 + input_value * self.wlr2) + self.blr1)

        potential_remember_percent = torch.sigmoid((short_memory * self.wpr1 + input_value * self.wpr2) + self.bpr1)
        potential_memory = torch.tanh((short_memory * self.wp1 + input_value * self.wp2) + self.bp1)

        update_long_memory = ((long_memory * long_remember_percent) + (potential_memory * potential_remember_percent))

        output_percent = torch.sigmoid((short_memory * self.wo1 + input_value * self.wo2) + self.bo1)
        update_short_memory = torch.tanh(update_long_memory) * output_percent

        return ([update_long_memory, update_short_memory])
    
    # make a forward pass through unrolled LSTM
    def forward(self, input):
        batch_size = input.size(0)
        long_memory = torch.zeros(batch_size)
        short_memory = torch.zeros(batch_size)

        for i in range(input.size(1)):
            long_memory, short_memory = self.lstm_unit(input[:, i], long_memory, short_memory)

        return short_memory

    def configure_optimizers(self):
        return Adam(self.parameters())
    
    # calculating loss and log training progress
    def training_step(self, batch, batch_idx):
        input_i, label_i = batch
        output_i = self.forward(input_i)
        loss = F.mse_loss(output_i, label_i)
        
        # logging the train_loss for later observation
        self.log('train_loss', loss)

        # keeping tracks of Company A (0), and Company B (else)
        if label_i.item() == 0:
            self.log('out_0', output_i)
        else:
            self.log('out_1', output_i)

        return loss

In [3]:
model = LSTMbyHand()
print("Let's compare the observed and predicted values: ")
print("Company A: Observed = 0, Predicted =", model(torch.tensor([[0., 0.5, 0.25, 1.]])).detach().item())
print("Company B: Observed = 1, Predicted =", model(torch.tensor([[1., 0.5, 0.25, 1.]])).detach().item())
# print("Company A: Observed = 0, Predicted =", model(torch.tensor([0., 0.5, 0.25, 1.])).detach())
# print("Company B: Observed = 1, Predicted =", model(torch.tensor([1., 0.5, 0.25, 1.])).detach())

Let's compare the observed and predicted values: 
Company A: Observed = 0, Predicted = 0.16313424706459045
Company B: Observed = 1, Predicted = 0.16874220967292786


Company A prediction is close, however Company B prediction is horrible
So let's get to work TRAININGGGGG

In [4]:
# Training the model boisssssssssss
inputs = torch.tensor([[0., 0.5, 0.25, 1.], [1., 0.5, 0.25, 1.]])
labels = torch.tensor([0., 1.])

dataset = TensorDataset(inputs, labels)
dataloader = DataLoader(dataset)

trainer = L.Trainer(max_epochs = 2000)
trainer.fit(model, train_dataloaders=dataloader)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type | Params | Mode
---------------------------------------------
  | other params | n/a  | 12     | n/a 
---------------------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode
c:\Users\11ukn\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
c:\Users\11ukn\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (2) is smaller than the logging interval Tra

Epoch 1999: 100%|██████████| 2/2 [00:00<00:00, 83.31it/s, v_num=2] 

`Trainer.fit` stopped: `max_epochs=2000` reached.


Epoch 1999: 100%|██████████| 2/2 [00:00<00:00, 63.25it/s, v_num=2]


In [5]:
print("Let's compare the observed and predicted values: ")
print("Company A: Observed = 0, Predicted =", model(torch.tensor([[0., 0.5, 0.25, 1.]])).detach().item())
print("Company B: Observed = 1, Predicted =", model(torch.tensor([[1., 0.5, 0.25, 1.]])).detach().item())


Let's compare the observed and predicted values: 
Company A: Observed = 0, Predicted = 4.304886533645913e-05
Company B: Observed = 1, Predicted = 0.9161689877510071


In [6]:
# Let's do even more training
# One of the things about lightning is that it saves the number of epochs so that we won't have to train the model again from starting

path_to_best_checkpoint = trainer.checkpoint_callback.best_model_path
trainer = L.Trainer(max_epochs=3000)
# will make the model start from epoch = 2000
trainer.fit(model, train_dataloaders=dataloader, ckpt_path=path_to_best_checkpoint)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at c:\Users\11ukn\OneDrive\Desktop\Project ASUNA\Projects (ML)\Deep Learning - Duality\lightning_logs\version_2\checkpoints\epoch=1999-step=4000.ckpt
c:\Users\11ukn\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:362: The dirpath has changed from 'c:\\Users\\11ukn\\OneDrive\\Desktop\\Project ASUNA\\Projects (ML)\\Deep Learning - Duality\\lightning_logs\\version_2\\checkpoints' to 'c:\\Users\\11ukn\\OneDrive\\Desktop\\Project ASUNA\\Projects (ML)\\Deep Learning - Duality\\lightning_logs\\version_3\\checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_model_path` and `best_k_models` won't be reloaded. Only `best_model_path` will be reloaded.

  | Name         | Type | Params | Mode
---------------------------------------------
  | other params | n/

Epoch 2999: 100%|██████████| 2/2 [00:00<00:00, 59.45it/s, v_num=3] 

`Trainer.fit` stopped: `max_epochs=3000` reached.


Epoch 2999: 100%|██████████| 2/2 [00:00<00:00, 45.93it/s, v_num=3]


In [7]:
print("Let's compare the observed and predicted values: ")
print("Company A: Observed = 0, Predicted =", model(torch.tensor([[0., 0.5, 0.25, 1.]])).detach().item())
print("Company B: Observed = 1, Predicted =", model(torch.tensor([[1., 0.5, 0.25, 1.]])).detach().item())

Let's compare the observed and predicted values: 
Company A: Observed = 0, Predicted = -0.0007492341683246195
Company B: Observed = 1, Predicted = 0.9572266936302185


In [8]:
path_to_best_checkpoint = trainer.checkpoint_callback.best_model_path
trainer = L.Trainer(max_epochs=5000)
# will make the model start from epoch = 3000
trainer.fit(model, train_dataloaders=dataloader, ckpt_path=path_to_best_checkpoint)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at c:\Users\11ukn\OneDrive\Desktop\Project ASUNA\Projects (ML)\Deep Learning - Duality\lightning_logs\version_3\checkpoints\epoch=2999-step=6000.ckpt
c:\Users\11ukn\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:362: The dirpath has changed from 'c:\\Users\\11ukn\\OneDrive\\Desktop\\Project ASUNA\\Projects (ML)\\Deep Learning - Duality\\lightning_logs\\version_3\\checkpoints' to 'c:\\Users\\11ukn\\OneDrive\\Desktop\\Project ASUNA\\Projects (ML)\\Deep Learning - Duality\\lightning_logs\\version_4\\checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_model_path` and `best_k_models` won't be reloaded. Only `best_model_path` will be reloaded.

  | Name         | Type | Params | Mode
---------------------------------------------
  | other params | n/

Epoch 4999: 100%|██████████| 2/2 [00:00<00:00, 78.38it/s, v_num=4] 

`Trainer.fit` stopped: `max_epochs=5000` reached.


Epoch 4999: 100%|██████████| 2/2 [00:00<00:00, 56.50it/s, v_num=4]


In [9]:
print("Let's compare the observed and predicted values: ")
print("Company A: Observed = 0, Predicted =", model(torch.tensor([[0., 0.5, 0.25, 1.]])).detach().item())
print("Company B: Observed = 1, Predicted =", model(torch.tensor([[1., 0.5, 0.25, 1.]])).detach().item())

Let's compare the observed and predicted values: 
Company A: Observed = 0, Predicted = 2.3432488887920044e-05
Company B: Observed = 1, Predicted = 0.984913170337677
