In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
# set layer parameters
input_size = 9 # no of features to extract (eg: number of data channels)
hidden_size = 16 # no of units in hidden state
num_layers = 1 # no of vertical stacks of hidden layers (only the final layer gives an output)
actfun = 'tanh'
bias = True

# create an RNN instance
rnn = nn.RNN(input_size, hidden_size, num_layers, nonlinearity = actfun, bias = bias)
print(rnn)

RNN(9, 16)


In [3]:
# set data parameters
seqlength = 5
batchsize = 2

# create some data
X = torch.rand(seqlength, batchsize, input_size)

# create a hidden layer (typically initialized as zeros)
hidden = torch.zeros(num_layers, batchsize, hidden_size)

# run some data thru the model and show the output sizes
y, h = rnn(X, hidden)
print(f'Input shape: {list(X.shape)}')
print(f'Hidden shape: {list(h.shape)}')
print(f'Output shape: {list(y.shape)}')

Input shape: [5, 2, 9]
Hidden shape: [1, 2, 16]
Output shape: [5, 2, 16]


In [8]:
# Default hidden state is all zeros if nothing is specified
y, h1 = rnn(X, hidden)
print(h1), print('\n\n')

y, h2 = rnn(X) # no 2nd input still same effect as above where hidden is initialized to zeros
print(h2), print('\n\n')

# they're the same (meaning default = zeros)
print(h1 - h2)

tensor([[[ 0.3594, -0.2873,  0.2759, -0.0697,  0.0383, -0.5630, -0.1784,
           0.3934, -0.2888, -0.0261,  0.7370,  0.1172,  0.4130, -0.4845,
           0.0745,  0.0382],
         [ 0.1444, -0.2765,  0.3954, -0.1794,  0.1164, -0.6338,  0.0866,
           0.3805, -0.1298, -0.0128,  0.8552,  0.2816,  0.4813, -0.2404,
          -0.2895, -0.1675]]], grad_fn=<StackBackward0>)



tensor([[[ 0.3594, -0.2873,  0.2759, -0.0697,  0.0383, -0.5630, -0.1784,
           0.3934, -0.2888, -0.0261,  0.7370,  0.1172,  0.4130, -0.4845,
           0.0745,  0.0382],
         [ 0.1444, -0.2765,  0.3954, -0.1794,  0.1164, -0.6338,  0.0866,
           0.3805, -0.1298, -0.0128,  0.8552,  0.2816,  0.4813, -0.2404,
          -0.2895, -0.1675]]], grad_fn=<StackBackward0>)



tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<SubBackward0>)


In [9]:
# check out the learned parameters and their sizes
for p in rnn.named_parameters():
    if 'weight' in p[0]:
        print(f'{p[0]} has size {list(p[1].shape)}')

weight_ih_l0 has size [16, 9]
weight_hh_l0 has size [16, 16]


In [11]:
# weight_ih = weight from input to hidden layer
# weight_hh = weight from hidden layer recursively into itself

In [12]:
class RNNnet(nn.Module):
    def __init__(self, input_size, num_hidden, num_layers):
        super().__init__()

        # store parameters
        self.input_size = input_size
        self.num_hidden = num_hidden
        self.num_layers = num_layers

        # RNN layer
        self.rnn = nn.RNN(input_size, num_hidden, num_layers)

        # linear layer for output
        self.out = nn.Linear(num_hidden, 1)

    def forward(self, x):

        print(f'Input: {list(x.shape)}')

        # initialize hidden state for first input
        hidden = torch.zeros(self.num_layers, batchsize, self.num_hidden)
        print(f'Hidden: {list(hidden.shape)}')

        # run thru the RNN layer
        y, hidden = self.rnn(x, hidden)
        print(f'RNN-out: {list(y.shape)}')
        print(f'RNN-hidden: {list(hidden.shape)}')

        # pass the RNN output thru the linear output layer
        o = self.out(y)
        print(f'Output: {list(o.shape)}')

        return o, hidden

In [13]:
# create an instance of the model and inspect
net = RNNnet(input_size, hidden_size, num_layers)
print(net), print(' ')

# and check out all learnable parameters
for p in net.named_parameters():
    print(f'{p[0]} has size {list(p[1].shape)}')

RNNnet(
  (rnn): RNN(9, 16)
  (out): Linear(in_features=16, out_features=1, bias=True)
)
 
rnn.weight_ih_l0 has size [16, 9]
rnn.weight_hh_l0 has size [16, 16]
rnn.bias_ih_l0 has size [16]
rnn.bias_hh_l0 has size [16]
out.weight has size [1, 16]
out.bias has size [1]


In [14]:
# test the model with some data
# create some data
X = torch.rand(seqlength, batchsize, input_size)
y = torch.rand(seqlength, batchsize, 1)
yHat, y = net(X)

# try a loss function
lossfun = nn.MSELoss()
lossfun(yHat, y)

Input: [5, 2, 9]
Hidden: [1, 2, 16]
RNN-out: [5, 2, 16]
RNN-hidden: [1, 2, 16]
Output: [5, 2, 1]


  return F.mse_loss(input, target, reduction=self.reduction)


tensor(0.1502, grad_fn=<MseLossBackward0>)