### 1 the first stage o a LSTM determines the percentage of the Long-Term Memory that is going to be remembered by combining the short-term memory to the input
#### Usually called the **forget gate**

### The second stage combines the Short-Term Memory and the input to create a potential long memory and also the percentage of it to add to the Long-Term Memory
#### Usually called the **input gate**

### The final stages updates the short term memory
#### **Output Gate**

In [2]:
import torch
from torch import nn
import torch.nn.functional as F

In [416]:
class TorchLSTM(nn.Module):
    def __init__(self):
        super().__init__()

        self.lstm = nn.LSTM(input_size=1, hidden_size=1) #the input is going to go through another nn

    def forward(self, inputs):
        input_trans = inputs.view(len(inputs), 1) #1 input per row is how the nn.LSTM module takes the inputs in
        sm, lm = self.lstm(input_trans)
        #print(sm)
        #print()
        #print(lm)

        return sm[-1] #the last short memory output is the prediction for day 5



In [431]:
model = TorchLSTM()
optimizer = torch.optim.AdamW(params=model.parameters(), lr=0.001)
loss_fn = nn.L1Loss()

In [432]:
from tqdm import tqdm

cA_inputs = torch.tensor([0, 0.5, 0.25, 1])
cB_inputs = torch.tensor([1, 0.5, 0.25, 1])

cA_label = torch.tensor(0)
cB_label = torch.tensor(1)

inputs = torch.stack((cA_inputs, cB_inputs))
labels = torch.stack((cA_label, cB_label))
print(inputs)
print(labels)

dataset = torch.utils.data.TensorDataset(inputs, labels)
dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=1)

for x, y in dataloader:
    print(x[0], y)

tensor([[0.0000, 0.5000, 0.2500, 1.0000],
        [1.0000, 0.5000, 0.2500, 1.0000]])
tensor([0, 1])
tensor([0.0000, 0.5000, 0.2500, 1.0000]) tensor([0])
tensor([1.0000, 0.5000, 0.2500, 1.0000]) tensor([1])


In [458]:
epochs = 3000

model.train()
for epoch in tqdm(range(epochs)):
    for x, y in dataloader:
        pred = model(x[0])
        loss = loss_fn(pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
model.eval()
print(loss)

100%|██████████| 3000/3000 [00:04<00:00, 627.64it/s]

tensor(0.1066, grad_fn=<MeanBackward0>)





In [415]:
with torch.inference_mode():
    cA_pred = model(torch.tensor([0, 0.5, 0.25, 1.]))
    cB_pred = model(torch.tensor([1, 0.5, 0.25, 1.]))

print(cA_pred, cA_label)
print(cB_pred, cA_label)

tensor([0.0111]) tensor(0)
tensor([0.9962]) tensor(0)


In [95]:
####### There is something wrong with it
class LSTMbyHand(nn.Module):
    def __init__(self):
        super().__init__()


        #Forget gate
        self.wlr1 = nn.Parameter(torch.randn(1), requires_grad=True) #sm weight
        self.wlr2 = nn.Parameter(torch.randn(1), requires_grad=True) #input weight
        self.blr1 = nn.Parameter(torch.tensor(0.), requires_grad=True)

        #Input gate
        self.wpr1 = nn.Parameter(torch.randn(1), requires_grad=True)
        self.wpr2 = nn.Parameter(torch.randn(1), requires_grad=True)
        self.bpr1 = nn.Parameter(torch.tensor(0.), requires_grad=True)


        self.wp1 = nn.Parameter(torch.randn(1), requires_grad=True)
        self.wp2 = nn.Parameter(torch.randn(1), requires_grad=True)
        self.bp1 = nn.Parameter(torch.tensor(0.), requires_grad=True)

        #Output gate
        self.wo1 = nn.Parameter(torch.randn(1), requires_grad=True)
        self.wo2 = nn.Parameter(torch.randn(1), requires_grad=True)
        self.bo1 = nn.Parameter(torch.tensor(0.), requires_grad=True)



    #lstm_unit handles all the math
    def lstm_unit(self, input, lm, sm):
        long_remember_percent = torch.sigmoid(
            (sm*self.wlr1) + (input*self.wlr2) + self.blr1
        )

        remembered_lm = lm*long_remember_percent

        pontential_lm_remember_percent = torch.sigmoid(
            (sm*self.wpr1) + (input*self.wpr2) + self.bpr1
        )

        potential_lm = torch.tanh(
            (sm*self.wp1) + (input*self.wp2) + self.bp1
        )

        new_lm = remembered_lm + potential_lm


        potential_sm_remember_percent = torch.sigmoid(
            (sm*self.wo1) + (input*self.wo2)+ self.bo1
        )


        new_sm = torch.tanh(new_lm) * potential_sm_remember_percent


        return (new_sm, new_sm)


    
    def forward(self, inputs):
        lm = torch.tensor(0)
        sm = torch.tensor(0)

        day1, day2, day3, day4 = inputs[0] #passing in batches


        lm, sm = self.lstm_unit(day1, lm, sm)
        lm, sm = self.lstm_unit(day2, lm, sm)
        lm, sm = self.lstm_unit(day3, lm, sm)
        lm, sm = self.lstm_unit(day4, lm, sm)
    
        return sm

In [226]:
from tqdm import tqdm

model = LSTMbyHand()

cA_inputs = torch.tensor([0, 0.5, 0.25, 1])
cB_inputs = torch.tensor([1, 0.5, 0.25, 1])

cA_label = torch.tensor(0)
cB_label = torch.tensor(1)

inputs = torch.stack((cA_inputs, cB_inputs))
labels = torch.stack((cA_label, cB_label))
print(inputs)
print(labels)

dataset = torch.utils.data.TensorDataset(inputs, labels)
dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=1, shuffle=True)

loss_fn = nn.L1Loss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

model.eval()
with torch.inference_mode():
    pred = model([cA_inputs])

tensor([[0.0000, 0.5000, 0.2500, 1.0000],
        [1.0000, 0.5000, 0.2500, 1.0000]])
tensor([0, 1])


In [245]:
epochs = 10

model.train()
for epoch in tqdm(range(epochs)):
    for x, y in dataloader:
        preds = model(x)
        loss = loss_fn(preds, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
model.eval()

print(loss)

100%|██████████| 10/10 [00:00<00:00, 237.53it/s]

tensor(0.0048, grad_fn=<MeanBackward0>)





In [249]:
with torch.inference_mode():
    cA_pred = model([cA_inputs]) #have to pass in as batches
    cB_pred = model([cB_inputs])

print(cA_pred)
print(cB_pred)

tensor([0.0047])
tensor([-0.0159])
