Recurrent Neural Networks

In these neural networks the output is influenced by previous inputs too, so useful for sequential data. For example, predicting next word in the sentence. Basically, they have loops which feeds back to the same layer.

$ h_{t} = W_{hh}x_{t-1} + W_{hx}x_{t}$ \
$ y_{t} = W_{yh}h_{t}$

But these Neural Networks are very hard to train because of vanishing and exploding gradient problem. So, we have its updated version LSTM : Long Short Term Memory Networks where we don't consider all the previous inputs but channelize them into short and long channels using Forget Gate.

LSTM

In [None]:
import torch
import torch.nn as nn
import numpy as np
import torch.optim as optim

In [None]:
class LSTMcell:

    def __init__(self,m,r):

        # percentage long term to remember
        self.w_input_1 = nn.Parameter(torch.rand(m,r),requires_grad=True)
        self.w_short_1 = nn.Parameter(torch.rand(m,r),requires_grad=True)
        self.b_1 = nn.Parameter(torch.rand(r),requires_grad=True)
        # sigma

        # percentage potential memory to remember
        self.w_input_2 = nn.Parameter(torch.rand(m,r),requires_grad=True)
        self.w_short_2 = nn.Parameter(torch.rand(m,r),requires_grad=True)
        self.b_2 = nn.Parameter(torch.rand(r),requires_grad=True)
        # sigma

        # Potential Long term memory for current input
        self.w_input_3 = nn.Parameter(torch.rand(m,r),requires_grad=True)
        self.w_short_3 = nn.Parameter(torch.rand(m,r),requires_grad=True)
        self.b_3 = nn.Parameter(torch.rand(r),requires_grad=True)
        # tanh

        # New short
        self.w_input_4 = nn.Parameter(torch.rand(m,r),requires_grad=True)
        self.w_short_4 = nn.Parameter(torch.rand(m,r),requires_grad=True)
        self.b_4 = nn.Parameter(torch.rand(r),requires_grad=True)
        # sigmoid

    def integrate(self, input, short, long):
        # percentage long term to remember
        o1 = torch.matmul(self.w_input_1, input)
        o2 = torch.matmul(self.w_input_1, short)
        o3 = torch.matmul(long,torch.sigmoid(o1+o2) + self.b_1)

        # percentage potential memory to remember
        o4 = torch.matmul(self.w_input_2, input)
        o5 = torch.matmul(self.w_input_2, short) 
        o6 = torch.matmul(long,torch.sigmoid(o4+o5) + self.b_2)

        # Potential Long term memory for current input
        o7 = torch.matmul(self.w_input_3, input)
        o8 = torch.matmul(self.w_input_3, short)
        o9 = torch.matmul(long,torch.tanh(o7+o8) + self.b_3)

        newLong = o3 + torch.matmul(o6,o9)

        # new short
        o10 = torch.matmul(self.w_input_1, input)
        o11 = torch.matmul(self.w_input_1, short)
        o12 = torch.matmul(long,torch.sigmoid(o11+o10) + self.b_4)
        
        newShort = o12*(torch.tanh(newLong))

        return [newShort,newLong]

In [None]:
class LSTM:

    def __init__(self):
        self.layers = []
    
    def add_layer(self,m,r,n):
        self.layers.append([])
        for i in range(n):
            self.layers[-1].append(LSTMcell(m,r))