## LSTM from scratch using Pytorch

### Importing Libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

In [3]:
import lightning as L
from torch.utils.data import TensorDataset, DataLoader

### Creating the functions 

In [11]:
class LSTMbyHand(L.LightningModule):
    def __init__(self): ## Create and initialize weights and bias

        super().__init__() ## initialization method from the parent class

        mean = torch.tensor(0.0)
        std = torch.tensor(1.0)

        self.wlr1 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.wlr2 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.blr1 = nn.Parameter(torch.tensor(0.),requires_grad=True)


        self.wpr1 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.wpr2 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.bpr1 = nn.Parameter(torch.tensor(0.),requires_grad=True)

        self.wp1 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.wp2 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.bp1 = nn.Parameter(torch.tensor(0.),requires_grad=True)


        self.wo1 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.wo2 = nn.Parameter(torch.normal(mean=mean,std=std),requires_grad=True)
        self.bo1 = nn.Parameter(torch.tensor(0.),requires_grad=True)

        
    def lstm_unit(self, input_value, long_memory,short_memory): ## Does LSTM Math
        long_remember_percent = torch.sigmoid((short_memory*self.wlr1)+(input_value*self.wlr2)+self.blr1)
        potential_remember_percent = torch.sigmoid((short_memory*self.wpr1)+(input_value*self.wpr2)+self.bpr1) 
                                                   
        potential_memory = torch.tanh((short_memory*self.wp1)+(input_value*self.wp2)+self.bp1)
        
        updated_long_memory = ((long_memory*long_remember_percent)+(potential_remember_percent*potential_memory))

        output_percent = torch.sigmoid((short_memory*self.wo1)+(input_value*self.wo2) + self.bo1)

        updated_short_memory = torch.tanh(updated_long_memory)*output_percent

        return ([updated_long_memory,updated_short_memory])
                             
    def forward(self,input):## Forward pass
        long_memory = 0
        short_memory = 0
        day1 = input[0]
        day2 = input[1]               
        day3 = input[2]
        day4 = input[3]

        long_memory,short_memory = self.lstm_unit(day1,long_memory,short_memory)
        long_memory,short_memory = self.lstm_unit(day2,long_memory,short_memory)     
        long_memory,short_memory = self.lstm_unit(day3,long_memory,short_memory)
        long_memory,short_memory = self.lstm_unit(day4,long_memory,short_memory)
        return short_memory
             
    def configure_optimizers(self): ## To optimize weights
        return Adam(self.parameters())
             
    def training_step(self,batch,batch_idx): ## calculate loss and log training progress
        input_i,label_i = batch
        output_i = self.forward(input_i[0])
        loss = (output_i - label_i)**2

        self.log("train_loss",loss)

        if (label_i==0):
             self.log("out_0",output_i)
        else:
             self.log("out_1",output_i)
        return loss
             
             

### Model Prediction

In [12]:
model = LSTMbyHand()
print("\n Now let's compare the observed and predicted values...")
print("Company A : Observed = 0, Predicted = ", model(torch.tensor([0.,0.5,0.25,1.])).detach())



 Now let's compare the observed and predicted values...
Company A : Observed = 0, Predicted =  tensor(0.1022)


In [13]:
print("Company B : Observed = 1, Predicted = ", model(torch.tensor([1.,0.5,0.25,1.])).detach())


Company B : Observed = 1, Predicted =  tensor(0.1111)


In [14]:
inputs = torch.tensor([[0.,0.5,0.25,1.],[1.,0.5,0.25,1.]])
labels = torch.tensor([0.,1.])

dataset = TensorDataset(inputs,labels)
dataloader = DataLoader(dataset)

In [15]:
trainer = L.Trainer(max_epochs=2000)
trainer.fit(model,train_dataloaders = dataloader)

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs






  | Name         | Type | Params | Mode
---------------------------------------------
  | other params | n/a  | 12     | n/a 
---------------------------------------------
12        Trainable params
0         Non-trainable params
12        Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode
C:\Users\019176\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=13` in the `DataLoader` to improve performance.
C:\Users\019176\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (2) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training

Training: |                              | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=2000` reached.


In [16]:
print("\n Now let's compare the observed and predicted values...")
print("Company A : Observed = 0, Predicted = ", model(torch.tensor([0.,0.5,0.25,1.])).detach())



 Now let's compare the observed and predicted values...
Company A : Observed = 0, Predicted =  tensor(0.0010)


In [17]:
print("Company B : Observed = 1, Predicted = ", model(torch.tensor([1.,0.5,0.25,1.])).detach())


Company B : Observed = 1, Predicted =  tensor(0.9044)


In [18]:
## We are using the checkpoint
path_to_best_checkpoint = trainer.checkpoint_callback.best_model_path

### Increasing Epochs and training from the last checkpoint

In [19]:
trainer = L.Trainer(max_epochs=3000)
trainer.fit(model,train_dataloaders = dataloader,ckpt_path=path_to_best_checkpoint)

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at C:\Users\019176\lightning_logs\version_0\checkpoints\epoch=1999-step=4000.ckpt
C:\Users\019176\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:366: The dirpath has changed from 'C:\\Users\\019176\\lightning_logs\\version_0\\checkpoints' to 'C:\\Users\\019176\\lightning_logs\\version_1\\checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_model_path` and `best_k_models` won't be reloaded. Only `best_model_path` will be reloaded.

  | Name         | Type | Params | Mode
---------------------------------------------
  | other params | n/a  | 12 

Training: |                              | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=3000` reached.


In [20]:
print("\n Now let's compare the observed and predicted values...")
print("Company A : Observed = 0, Predicted = ", model(torch.tensor([0.,0.5,0.25,1.])).detach())



 Now let's compare the observed and predicted values...
Company A : Observed = 0, Predicted =  tensor(0.0002)


In [21]:
print("Company B : Observed = 1, Predicted = ", model(torch.tensor([1.,0.5,0.25,1.])).detach())


Company B : Observed = 1, Predicted =  tensor(0.9522)


## Here Using the Lightning Module and creating an LSTM 

In [22]:
class LightningLSTM(L.LightningModule):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1,hidden_size=1)

    def forward(self,input):
        input_trans = input.view(len(input),1)

        lstm_out ,temp = self.lstm(input_trans)
        prediction = lstm_out[-1]

        return prediction

    def configure_optimizers(self):
        return Adam(self.parameters(),lr=0.1)


    def training_step(self,batch,batch_idx): ## calculate loss and log training progress
        input_i,label_i = batch
        output_i = self.forward(input_i[0])
        loss = (output_i - label_i)**2

        self.log("train_loss",loss)

        if (label_i==0):
             self.log("out_0",output_i)
        else:
             self.log("out_1",output_i)
        return loss

In [23]:
model2 = LightningLSTM()
print("\n Now let's compare the observed and predicted values...")
print("Company A : Observed = 0, Predicted = ", model2(torch.tensor([0.,0.5,0.25,1.])).detach())



 Now let's compare the observed and predicted values...
Company A : Observed = 0, Predicted =  tensor([-0.3287])


In [24]:
print("Company B : Observed = 1, Predicted = ", model2(torch.tensor([1.,0.5,0.25,1.])).detach())


Company B : Observed = 1, Predicted =  tensor([-0.3371])


In [25]:
trainer2 = L.Trainer(max_epochs = 300,log_every_n_steps=2)

trainer2.fit(model2,train_dataloaders = dataloader)
print("Company A : Observed = 0, Predicted = ", model2(torch.tensor([0.,0.5,0.25,1.])).detach())


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name | Type | Params | Mode 
--------------------------------------
0 | lstm | LSTM | 16     | train
--------------------------------------
16        Trainable params
0         Non-trainable params
16        Total params
0.000     Total estimated model params size (MB)
1         Modules in train mode
0         Modules in eval mode


Training: |                              | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=300` reached.


Company A : Observed = 0, Predicted =  tensor([4.3605e-05])


In [26]:
print("Company B : Observed = 1, Predicted = ", model2(torch.tensor([1.,0.5,0.25,1.])).detach())


Company B : Observed = 1, Predicted =  tensor([0.9878])


In [27]:
print("Company A : Observed = 0, Predicted = ", model2(torch.tensor([0.,0.5,0.25,1.])).detach())

Company A : Observed = 0, Predicted =  tensor([4.3605e-05])
