In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import lightning as L
from torch.utils.data import TensorDataset, DataLoader

In [2]:
class LSTMbyHand(L.LightningModule):
    def __init__(self):
        super().__init__()

        mean = torch.tensor(0.0)
        std = torch.tensor(1.0)

        self.wlr1 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.wlr2 = nn.Parameter(torch.normal(mean=mean, std=std), requires_grad=True)
        self.blr1 = nn.Parameter(torch.tensor(0.), requires_grad=True)

        self.wpr1 = nn.Parameter(torch.normal(
            mean=mean, std=std), requires_grad=True)
        self.wpr2 = nn.Parameter(torch.normal(
            mean=mean, std=std), requires_grad=True)
        self.bpr1 = nn.Parameter(torch.tensor(0.), requires_grad=True)

        self.wp1 = nn.Parameter(torch.normal(
            mean=mean, std=std), requires_grad=True)
        self.wp2 = nn.Parameter(torch.normal(
            mean=mean, std=std), requires_grad=True)
        self.bp1 = nn.Parameter(torch.tensor(0.), requires_grad=True)

        self.wo1 = nn.Parameter(torch.normal(
            mean=mean, std=std), requires_grad=True)
        self.wo2 = nn.Parameter(torch.normal(
            mean=mean, std=std), requires_grad=True)
        self.bo1 = nn.Parameter(torch.tensor(0.), requires_grad=True)

    def lstm_unit(self, input_value, long_memory, short_memory):
        long_remember_percent = torch.sigmoid(short_memory * self.wlr1
                                              + input_value * self.wlr2
                                              + self.blr1)
        
        potential_remember_percentage = torch.sigmoid(short_memory * self.wpr1
                                                      + input_value * self.wpr2
                                                      + self.bpr1)

        potential_memory = torch.tanh(short_memory * self.wp1
                                      + input_value * self.wp2
                                      + self.bp1)
        
        updated_long_memory = long_memory * \
            long_remember_percent + potential_remember_percentage * potential_memory
        
        output_percent = torch.sigmoid(short_memory * self.wo1
                                      + input_value * self.wo2
                                      + self.bo1)
        
        updated_short_memory = torch.tanh(updated_long_memory) * output_percent

        return ([updated_long_memory, updated_short_memory])

    def forward(self, input):
        long_memory, short_memory =0, 0
        day1, day2, day3, day4 = input[0], input[1], input[2], input[3]
        long_memory, short_memory = self.lstm_unit(
            day1, long_memory, short_memory)

        long_memory, short_memory = self.lstm_unit(
            day2, long_memory, short_memory)

        long_memory, short_memory = self.lstm_unit(
            day3, long_memory, short_memory)

        long_memory, short_memory = self.lstm_unit(
            day4, long_memory, short_memory)
        
        return short_memory

    def configure_optimizers(self):
        return Adam(self.parameters())

    def training_step(self, batch, batch_idx):
        input_i, label_i = batch
        output_i = self.forward(input_i[0])
        loss = (output_i - label_i) ** 2

        self.log("train_loss", loss)

        if label_i == 0:
            self.log("out_0", output_i)
        else:
            self.log("out_1", output_i)

        return loss

In [3]:
model = LSTMbyHand()

In [4]:
inputs = torch.tensor([[0., 0.5, 0.25, 1.], [1., 0.5, 0.25, 1.]])
labels = torch.tensor([0., 1.])

In [5]:
initial_prediction = model(inputs[0]).detach()
print(f"Initial prediction: {initial_prediction}")

Initial prediction: 0.07599812000989914


In [6]:
dataset = TensorDataset(inputs, labels)
dataloader = DataLoader(dataset)

In [7]:
trainer = L.Trainer(max_epochs=2000)
trainer.fit(model, train_dataloaders=dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: d:\Programming\ML-Algos\lightning_logs

  | Name         | Type | Params
--------------------------------------
  | other params | n/a  | 12    
--------------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model params size (MB)
d:\Programming\ML-Algos\venv\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
d:\Programming\ML-Algos\venv\lib\site-packages\lightning\pytorch\loops\fit_loop.py:298: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower valu

Epoch 1999: 100%|██████████| 2/2 [00:00<00:00, 162.68it/s, v_num=0]

`Trainer.fit` stopped: `max_epochs=2000` reached.


Epoch 1999: 100%|██████████| 2/2 [00:00<00:00, 109.17it/s, v_num=0]


In [10]:
prediction_after_training = model(inputs[0]).detach()
print(f"Prediction after training: {prediction_after_training}")

Prediction after training: 0.0050132437609136105
