<a href="https://colab.research.google.com/github/KrRishabh/RNN-in-Pytorch/blob/main/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

### import libraries
import torch
import torch.nn as nn
import numpy as np


In [2]:
# set layer parameters
input_size  =  9 # number of features to extract (e.g., number of data channels)
hidden_size = 16 # number of units in the hidden state
num_layers  =  1 # number of vertical stacks of hidden layers (note: only the final layer gives an output)
actfun      = 'tanh'
bias        = True

# create an RNN instance
rnn = nn.RNN(input_size,hidden_size,num_layers,nonlinearity=actfun,bias=bias)
print(rnn)

RNN(9, 16)


In [3]:
# set data parameters
seqlength = 5
batchsize = 2

# create some data
X = torch.rand(seqlength,batchsize,input_size)

# create a hidden layer (typically initialized as zeros)
hidden = torch.zeros(num_layers,batchsize,hidden_size)


# run some data through the model and show the output sizes
y,h = rnn(X,hidden)
print(f' Input shape: {list(X.shape)}')
print(f'Hidden shape: {list(h.shape)}')
print(f'Output shape: {list(y.shape)}')


 Input shape: [5, 2, 9]
Hidden shape: [1, 2, 16]
Output shape: [5, 2, 16]


In [4]:
X

tensor([[[0.7124, 0.8268, 0.6505, 0.6392, 0.1949, 0.1660, 0.9832, 0.2186,
          0.1415],
         [0.6042, 0.6245, 0.5489, 0.9903, 0.9862, 0.0162, 0.8493, 0.0171,
          0.1573]],

        [[0.9987, 0.6623, 0.1594, 0.1162, 0.5679, 0.4654, 0.4227, 0.7844,
          0.8729],
         [0.3261, 0.1932, 0.1789, 0.1211, 0.4954, 0.5682, 0.6585, 0.9681,
          0.8899]],

        [[0.5627, 0.9916, 0.2912, 0.8522, 0.0378, 0.5357, 0.9877, 0.0699,
          0.8187],
         [0.7047, 0.2444, 0.6404, 0.9799, 0.8291, 0.1972, 0.4736, 0.5714,
          0.3698]],

        [[0.7868, 0.0430, 0.5079, 0.2597, 0.0209, 0.4889, 0.4004, 0.0909,
          0.9572],
         [0.2797, 0.4605, 0.7898, 0.0445, 0.8399, 0.8541, 0.5313, 0.6067,
          0.0353]],

        [[0.5474, 0.4118, 0.7763, 0.7338, 0.4825, 0.9859, 0.5320, 0.5255,
          0.1174],
         [0.8845, 0.0939, 0.6210, 0.6325, 0.4380, 0.9568, 0.7112, 0.8778,
          0.4794]]])

In [5]:

## Default hidden state is all zeros if nothing specified:
y,h1 = rnn(X,hidden)
print(h1), print('\n\n')

y,h2 = rnn(X)
print(h2), print('\n\n')

# they're the same! (meaning default=zeros)
print(h1-h2)

tensor([[[ 0.2542, -0.2258, -0.2611, -0.2039, -0.2146, -0.3922, -0.1002,
          -0.3635,  0.0133, -0.4380, -0.2633, -0.1005, -0.0487, -0.4496,
          -0.0186, -0.0909],
         [ 0.4224, -0.4980, -0.1223,  0.0309,  0.0697, -0.7206, -0.0233,
          -0.3577,  0.1424, -0.6483, -0.3849,  0.0722, -0.1944, -0.4809,
           0.2954,  0.0585]]], grad_fn=<StackBackward0>)



tensor([[[ 0.2542, -0.2258, -0.2611, -0.2039, -0.2146, -0.3922, -0.1002,
          -0.3635,  0.0133, -0.4380, -0.2633, -0.1005, -0.0487, -0.4496,
          -0.0186, -0.0909],
         [ 0.4224, -0.4980, -0.1223,  0.0309,  0.0697, -0.7206, -0.0233,
          -0.3577,  0.1424, -0.6483, -0.3849,  0.0722, -0.1944, -0.4809,
           0.2954,  0.0585]]], grad_fn=<StackBackward0>)



tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<SubBackward0>)


In [6]:

# Check out the learned parameters and their sizes
for p in rnn.named_parameters():
  if 'weight' in p[0]:
    print(f'{p[0]} has size {list(p[1].shape)}')

weight_ih_l0 has size [16, 9]
weight_hh_l0 has size [16, 16]


In [7]:
class RNNnet(nn.Module):
  def __init__(self,input_size,num_hidden,num_layers):
    super().__init__()

    # store parameters
    self.input_size = input_size
    self.num_hidden = num_hidden
    self.num_layers = num_layers

    # RNN Layer
    self.rnn = nn.RNN(input_size,num_hidden,num_layers)

    # linear layer for output
    self.out = nn.Linear(num_hidden,1)

  def forward(self,x):

    print(f'Input: {list(x.shape)}')

    # initialize hidden state for first input
    hidden = torch.zeros(self.num_layers,batchsize,self.num_hidden)
    print(f'Hidden: {list(hidden.shape)}')

    # run through the RNN layer
    y,hidden = self.rnn(x,hidden)
    print(f'RNN-out: {list(y.shape)}')
    print(f'RNN-hidden: {list(hidden.shape)}')

    # pass the RNN output through the linear output layer
    o = self.out(y)
    print(f'Output: {list(o.shape)}')

    return o,hidden

In [8]:

# create an instance of the model and inspect
net = RNNnet(input_size,hidden_size,num_layers)
print(net), print(' ')

# and check out all learnable parameters
for p in net.named_parameters():
  print(f'{p[0]} has size {list(p[1].shape)}')

RNNnet(
  (rnn): RNN(9, 16)
  (out): Linear(in_features=16, out_features=1, bias=True)
)
 
rnn.weight_ih_l0 has size [16, 9]
rnn.weight_hh_l0 has size [16, 16]
rnn.bias_ih_l0 has size [16]
rnn.bias_hh_l0 has size [16]
out.weight has size [1, 16]
out.bias has size [1]


In [9]:

# test the model with some data
# create some data
X = torch.rand(seqlength,batchsize,input_size)
y = torch.rand(seqlength,batchsize,1)
yHat,h = net(X)

# try a loss function
lossfun = nn.MSELoss()
lossfun(yHat,y)

Input: [5, 2, 9]
Hidden: [1, 2, 16]
RNN-out: [5, 2, 16]
RNN-hidden: [1, 2, 16]
Output: [5, 2, 1]


tensor(0.4411, grad_fn=<MseLossBackward0>)