## 1. Standard import

In [1]:
import torch
import torch.nn as nn
import numpy as np

## 2. Custom RNN function

#### Parameters of RNN : 

- **input_size** :    Number of data-input channel or features.
- **hidden_size** :   Number of neurons per hidden layer
- **num_layers** :    Number of hidden layers stacked on top of each other.
- **nonlinearity** :  Non-Linear activation function
- **bias** :          Boolean value, represents whether to include the bias term.

In [2]:
def get_RNN_object(InputSize, HiddenSize, NumLayers, Act_Func, Bias_boolean=True ):
    
    rnn= nn.RNN(input_size=InputSize,
                hidden_size=HiddenSize,
                num_layers=NumLayers,
                nonlinearity=Act_Func,
                bias=Bias_boolean
               )
    
    return rnn

In [3]:
InputSize =  9
HiddenSize = 16
NumLayers = 1
Act_Func = 'tanh'

rnn = get_RNN_object(InputSize=InputSize,
                     HiddenSize=HiddenSize,
                     NumLayers=NumLayers,
                     Act_Func=Act_Func,
                     )

In [4]:
rnn

RNN(9, 16)

#### Testing with some data

In [5]:
# set data parameters
seqlength = 5
batchsize = 2

# create some data
X = torch.rand(seqlength,batchsize,InputSize)

# create a hidden state
hidden = torch.zeros(NumLayers, batchsize, HiddenSize)

In [6]:
X

tensor([[[0.3856, 0.5737, 0.8874, 0.0254, 0.9620, 0.9792, 0.2979, 0.0400,
          0.2241],
         [0.1422, 0.5874, 0.2770, 0.9340, 0.7371, 0.9755, 0.8080, 0.1163,
          0.7380]],

        [[0.6212, 0.0827, 0.9882, 0.1072, 0.1239, 0.2108, 0.6987, 0.6395,
          0.8871],
         [0.3684, 0.5073, 0.1879, 0.9176, 0.2309, 0.9625, 0.9133, 0.8233,
          0.1237]],

        [[0.6466, 0.7775, 0.0448, 0.5607, 0.2443, 0.2805, 0.5716, 0.8407,
          0.5344],
         [0.4270, 0.0825, 0.4444, 0.8419, 0.4574, 0.7139, 0.4903, 0.7766,
          0.4746]],

        [[0.8731, 0.8856, 0.1984, 0.4528, 0.3614, 0.5605, 0.0918, 0.9263,
          0.8398],
         [0.3132, 0.8152, 0.1837, 0.5278, 0.6020, 0.0938, 0.7795, 0.1980,
          0.6627]],

        [[0.3883, 0.1212, 0.2019, 0.8511, 0.7220, 0.7198, 0.1012, 0.6645,
          0.7219],
         [0.5121, 0.9515, 0.8932, 0.0854, 0.4964, 0.4865, 0.2314, 0.3279,
          0.2313]]])

In [7]:
X.shape

torch.Size([5, 2, 9])

#### Pytorch Applies a linear transformation to the incoming data: 
- 𝑦 = xW^𝑇 + 𝑏


In [8]:
# run some data through the model and show the output sizes
y,h_state = rnn(X,hidden)
print(f' Input shape: {list(X.shape)}')
print(f'Hidden shape: {list(h_state.shape)}')
print(f'Output shape: {list(y.shape)}')

 Input shape: [5, 2, 9]
Hidden shape: [1, 2, 16]
Output shape: [5, 2, 16]


In [9]:
y

tensor([[[ 0.0058,  0.3844,  0.2238, -0.0316, -0.2441,  0.1264, -0.4425,
          -0.1156, -0.3598, -0.0695, -0.4892, -0.1706, -0.1326, -0.2527,
          -0.1947, -0.6057],
         [ 0.0401,  0.3692,  0.1720, -0.1388, -0.3756, -0.1568, -0.4135,
          -0.3439, -0.4901, -0.1348, -0.5755, -0.3050, -0.3162, -0.1394,
          -0.2733, -0.6384]],

        [[ 0.1673,  0.1787,  0.1752, -0.4160, -0.3304, -0.1477, -0.4167,
           0.0150, -0.6172,  0.0870, -0.1976,  0.3134, -0.5938, -0.3466,
          -0.3610, -0.4992],
         [ 0.3124,  0.3108,  0.2748, -0.3895, -0.3615, -0.3143, -0.3484,
          -0.0815, -0.5925,  0.1745, -0.6962, -0.0602, -0.6516, -0.0069,
          -0.3309, -0.6380]],

        [[ 0.1907,  0.2264,  0.3088, -0.0021, -0.4549,  0.0406, -0.1843,
          -0.1552, -0.6533, -0.2917, -0.5606, -0.1963, -0.7830, -0.0415,
          -0.6664, -0.4946],
         [ 0.1279,  0.3609,  0.3854, -0.3896, -0.3762, -0.0978, -0.1873,
          -0.0287, -0.5300, -0.1523, -0.6075,  0

In [10]:
## Default hidden state is all zeros if nothing specified:
y,h1 = rnn(X,hidden)
print(h1), print('\n\n')

y,h2 = rnn(X)
print(h2), print('\n\n')

# they're the same! (meaning default=zeros)
print(h1-h2)

tensor([[[ 0.0132,  0.1074,  0.5089, -0.2959, -0.6169,  0.3441, -0.3325,
          -0.0492, -0.6266, -0.1392, -0.5069,  0.2545, -0.6967, -0.0024,
          -0.7021, -0.7221],
         [ 0.1786,  0.2557,  0.3966, -0.1208, -0.6433,  0.2786, -0.4819,
          -0.2066, -0.5733,  0.0038, -0.3000,  0.1362, -0.4960,  0.0024,
          -0.3136, -0.5232]]], grad_fn=<StackBackward0>)



tensor([[[ 0.0132,  0.1074,  0.5089, -0.2959, -0.6169,  0.3441, -0.3325,
          -0.0492, -0.6266, -0.1392, -0.5069,  0.2545, -0.6967, -0.0024,
          -0.7021, -0.7221],
         [ 0.1786,  0.2557,  0.3966, -0.1208, -0.6433,  0.2786, -0.4819,
          -0.2066, -0.5733,  0.0038, -0.3000,  0.1362, -0.4960,  0.0024,
          -0.3136, -0.5232]]], grad_fn=<StackBackward0>)



tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<SubBackward0>)


In [11]:
for p in rnn.named_parameters():
    print(p)

('weight_ih_l0', Parameter containing:
tensor([[ 0.2417,  0.0965,  0.0229,  0.0664,  0.2416, -0.2049,  0.2306,  0.1260,
         -0.0549],
        [ 0.1524, -0.0409, -0.0506, -0.0410,  0.0149,  0.0689,  0.0926, -0.1741,
         -0.0076],
        [ 0.1972, -0.0767,  0.0424,  0.1904,  0.0565,  0.1051, -0.2361,  0.1813,
         -0.0630],
        [-0.1492,  0.2001, -0.2184, -0.2216,  0.0985, -0.1847,  0.0070,  0.0145,
         -0.1212],
        [-0.0331, -0.1258,  0.0135, -0.1312, -0.1661,  0.0897,  0.0951, -0.1510,
         -0.1920],
        [-0.2250,  0.1859,  0.0071, -0.2210,  0.1971, -0.0084, -0.2312, -0.0445,
          0.0551],
        [ 0.0398, -0.1330, -0.2105, -0.1030,  0.0795, -0.2028,  0.2231, -0.0844,
         -0.1520],
        [ 0.0266, -0.1274,  0.1901, -0.1059, -0.2120,  0.1853, -0.0590,  0.1891,
         -0.1035],
        [ 0.1730, -0.1491,  0.0949,  0.1587, -0.0953,  0.0364, -0.1548, -0.2421,
         -0.2444],
        [ 0.0505, -0.1636,  0.0410, -0.0185, -0.1522,  0.1025

In [12]:
# Check out the learned parameters and their sizes
for p in rnn.named_parameters():
    
    if 'weight' in p[0]:
        
        print(f'{p[0]} has size {list(p[1].shape)}')
    
# l0: layer0

weight_ih_l0 has size [16, 9]
weight_hh_l0 has size [16, 16]


## 3. Custom RNN class

In [None]:
InputSize=InputSize,
HiddenSize=HiddenSize,
NumLayers=NumLayers,
Act_Func=Act_Func,
Bias_boolean=Bias_boolean

In [16]:
class RNNnet(nn.Module):
    
    def __init__(self, InputSize, HiddenSize, NumLayers, BatchSize=1):
        super().__init__()

        # store parameters
        self.input_size = InputSize
        self.num_hidden = HiddenSize
        self.num_layers = NumLayers
        self.batch_size = BatchSize

        # RNN Layer
        self.rnn = nn.RNN(InputSize, HiddenSize, NumLayers)

        # Linear layer for output
        self.out = nn.Linear(HiddenSize, 1)
  
    def forward(self,x):
    
        print(f'Input: {list(x.shape)}')
    
        # initialize hidden state for first input
        hiddenstate = torch.zeros(self.num_layers, self.batch_size, self.num_hidden)
        print(f'HiddenState Before: {list(hidden.shape)}')

        # run through the RNN layer
        y,hiddenstate = self.rnn(x,hidden)
        print(f'RNN-out: {list(y.shape)}')
        print(f'RNN-hiddenstate After: {list(hidden.shape)}')
    
        # pass the RNN output through the linear output layer
        o = self.out(y)
        print(f'Output: {list(o.shape)}')

        return o, hidden

In [17]:
# create an instance of the model and inspect
net = RNNnet(InputSize=9, HiddenSize=16, NumLayers=1, BatchSize=2)
print(net), print(' ')

# check out all learnable parameters
for p in net.named_parameters():
    print(f'{p[0]} has size {list(p[1].shape)}')

RNNnet(
  (rnn): RNN(9, 16)
  (out): Linear(in_features=16, out_features=1, bias=True)
)
 
rnn.weight_ih_l0 has size [16, 9]
rnn.weight_hh_l0 has size [16, 16]
rnn.bias_ih_l0 has size [16]
rnn.bias_hh_l0 has size [16]
out.weight has size [1, 16]
out.bias has size [1]


### Test the model with some data

In [18]:
# create some data

seqlen = 5
BatchSize=2
InputSize=9

X = torch.rand(seqlen, BatchSize, InputSize)
y = torch.rand(seqlen,BatchSize,1)

yHat,h = net(X)

# try a loss function
lossfun = nn.MSELoss()
lossfun(yHat,y)

Input: [5, 2, 9]
HiddenState Before: [1, 2, 16]
RNN-out: [5, 2, 16]
RNN-hiddenstate After: [1, 2, 16]
Output: [5, 2, 1]


tensor(0.4487, grad_fn=<MseLossBackward0>)

In [21]:
yHat

tensor([[[-0.1556],
         [-0.0895]],

        [[-0.1924],
         [-0.0319]],

        [[-0.0592],
         [-0.0285]],

        [[ 0.0878],
         [-0.0402]],

        [[-0.0116],
         [ 0.0003]]], grad_fn=<ViewBackward0>)