# COURSE: A deep understanding of deep learning
## SECTION: RNNs (and LSTM and GRU)
### LECTURE: The RNN class
#### TEACHER: Mike X Cohen, sincxpress.com
##### COURSE URL: udemy.com/course/deeplearning_x/?couponCode=202401

In [7]:
### import libraries
import torch
import torch.nn as nn
import numpy as np

# Explore the RNN type

In [8]:
# set layer parameters
input_size  =  9 # number of features to extract (e.g., number of data channels)
hidden_size = 16 # number of units in the hidden state
num_layers  =  1 # number of vertical stacks of hidden layers (note: only the final layer gives an output)
actfun      = 'tanh'
bias        = True

# create an RNN instance
rnn = nn.RNN(input_size,hidden_size,num_layers,nonlinearity=actfun,bias=bias)
print(rnn)

RNN(9, 16)


In [9]:
# check out the source code for more detailed info about this class
??nn.RNN

[1;31mInit signature:[0m [0mnn[0m[1;33m.[0m[0mRNN[0m[1;33m([0m[1;33m*[0m[0margs[0m[1;33m,[0m [1;33m**[0m[0mkwargs[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mSource:[0m        
[1;32mclass[0m [0mRNN[0m[1;33m([0m[0mRNNBase[0m[1;33m)[0m[1;33m:[0m[1;33m
[0m    [1;34mr"""__init__(self,input_size,hidden_size,num_layers=1,nonlinearity='tanh',bias=True,batch_first=False,dropout=0.0,bidirectional=False,device=None,dtype=None)

    Apply a multi-layer Elman RNN with :math:`\tanh` or :math:`\text{ReLU}`
    non-linearity to an input sequence. For each element in the input sequence,
    each layer computes the following function:

    .. math::
        h_t = \tanh(x_t W_{ih}^T + b_{ih} + h_{t-1}W_{hh}^T + b_{hh})

    where :math:`h_t` is the hidden state at time `t`, :math:`x_t` is
    the input at time `t`, and :math:`h_{(t-1)}` is the hidden state of the
    previous layer at time `t-1` or the initial hidden state at time `0`.
    If :attr:`nonlinearity`

In [10]:
# set data parameters
seqlength = 5
batchsize = 2

# create some data
X = torch.rand(seqlength,batchsize,input_size)

# create a hidden layer (typically initialized as zeros)
hidden = torch.zeros(num_layers,batchsize,hidden_size)


# run some data through the model and show the output sizes
y,h = rnn(X,hidden)
print(f' Input shape: {list(X.shape)}')
print(f'Hidden shape: {list(h.shape)}')
print(f'Output shape: {list(y.shape)}')

 Input shape: [5, 2, 9]
Hidden shape: [1, 2, 16]
Output shape: [5, 2, 16]


In [11]:
## Default hidden state is all zeros if nothing specified:
y,h1 = rnn(X,hidden)
print(h1), print('\n\n')

y,h2 = rnn(X)
print(h2), print('\n\n')

# they're the same! (meaning default=zeros)
print(h1-h2)

tensor([[[-0.0246,  0.4103, -0.1830, -0.3036,  0.2910, -0.7324, -0.1408,
           0.4718,  0.0942, -0.4898,  0.2934,  0.3846, -0.3289, -0.0476,
           0.0025,  0.5845],
         [-0.2870,  0.2886, -0.1513, -0.2644,  0.2927, -0.4875,  0.2424,
           0.2052,  0.0360,  0.0063,  0.0341,  0.5950, -0.0009,  0.1874,
          -0.1395,  0.6277]]], grad_fn=<StackBackward0>)



tensor([[[-0.0246,  0.4103, -0.1830, -0.3036,  0.2910, -0.7324, -0.1408,
           0.4718,  0.0942, -0.4898,  0.2934,  0.3846, -0.3289, -0.0476,
           0.0025,  0.5845],
         [-0.2870,  0.2886, -0.1513, -0.2644,  0.2927, -0.4875,  0.2424,
           0.2052,  0.0360,  0.0063,  0.0341,  0.5950, -0.0009,  0.1874,
          -0.1395,  0.6277]]], grad_fn=<StackBackward0>)



tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<SubBackward0>)


In [12]:
# Check out the learned parameters and their sizes
for p in rnn.named_parameters():
  if 'weight' in p[0]:
    print(f'{p[0]} has size {list(p[1].shape)}')

weight_ih_l0 has size [16, 9]
weight_hh_l0 has size [16, 16]


# Create a DL model class

In [13]:
class RNNnet(nn.Module):
  def __init__(self,input_size,num_hidden,num_layers):
    super().__init__()

    # store parameters
    self.input_size = input_size
    self.num_hidden = num_hidden
    self.num_layers = num_layers

    # RNN Layer
    self.rnn = nn.RNN(input_size,num_hidden,num_layers)
    
    # linear layer for output
    self.out = nn.Linear(num_hidden,1)
  
  def forward(self,x):
    
    print(f'Input: {list(x.shape)}')
    
    # initialize hidden state for first input
    hidden = torch.zeros(self.num_layers,batchsize,self.num_hidden)
    print(f'Hidden: {list(hidden.shape)}')

    # run through the RNN layer
    y,hidden = self.rnn(x,hidden)
    print(f'RNN-out: {list(y.shape)}')
    print(f'RNN-hidden: {list(hidden.shape)}')
    
    # pass the RNN output through the linear output layer
    o = self.out(y)
    print(f'Output: {list(o.shape)}')

    return o,hidden

In [14]:
# create an instance of the model and inspect
net = RNNnet(input_size,hidden_size,num_layers)
print(net), print(' ')

# and check out all learnable parameters
for p in net.named_parameters():
  print(f'{p[0]} has size {list(p[1].shape)}')

RNNnet(
  (rnn): RNN(9, 16)
  (out): Linear(in_features=16, out_features=1, bias=True)
)
 
rnn.weight_ih_l0 has size [16, 9]
rnn.weight_hh_l0 has size [16, 16]
rnn.bias_ih_l0 has size [16]
rnn.bias_hh_l0 has size [16]
out.weight has size [1, 16]
out.bias has size [1]


In [15]:
# test the model with some data
# create some data
X = torch.rand(seqlength,batchsize,input_size)
y = torch.rand(seqlength,batchsize,1)
yHat,h = net(X)

# try a loss function
lossfun = nn.MSELoss()
lossfun(yHat,y)

Input: [5, 2, 9]
Hidden: [1, 2, 16]
RNN-out: [5, 2, 16]
RNN-hidden: [1, 2, 16]
Output: [5, 2, 1]


tensor(0.6014, grad_fn=<MseLossBackward0>)

In [6]:
import tensorflow as tf
from tensorflow.keras import layers

# Set layer parameters
input_size = 9
hidden_size = 16
num_layers = 1
activation = 'tanh'

# Create an RNN model
class RNNModel(tf.keras.Model):
    def __init__(self, input_size, hidden_size, num_layers, activation):
        super(RNNModel, self).__init__()
        
        self.rnn = layers.SimpleRNN(hidden_size, activation=activation, return_sequences=True, return_state=True)
        self.output_layer = layers.Dense(1)

    def call(self, inputs, states):
        x, states = self.rnn(inputs, initial_state=states)
        outputs = self.output_layer(x)
        return outputs, states

# Set data parameters
seq_length = 5
batch_size = 2

# Create some data
X = tf.random.uniform((batch_size, seq_length, input_size))
initial_state = tf.zeros((batch_size, hidden_size))

# Create an instance of the model
rnn_model = RNNModel(input_size, hidden_size, num_layers, activation)

# Run some data through the model and show the output sizes
y, h = rnn_model(X, initial_state)
print(f' Input shape: {X.shape}')
print(f'Hidden shape: {h.shape}')
print(f'Output shape: {y.shape}')

# Check out the learned parameters and their sizes
for layer in rnn_model.layers:
    for weight in layer.trainable_weights:
        print(f'{weight.name} has size {weight.shape}')

# Test the model with some data
y_true = tf.random.uniform((batch_size, seq_length, 1))
y_pred, h = rnn_model(X, initial_state)

# Try a loss function
loss_fn = tf.keras.losses.MeanSquaredError()
loss = loss_fn(y_true, y_pred)
print(f'Loss: {loss.numpy()}')

 Input shape: (2, 5, 9)
Hidden shape: (2, 16)
Output shape: (2, 5, 1)
kernel has size (9, 16)
recurrent_kernel has size (16, 16)
bias has size (16,)
kernel has size (16, 1)
bias has size (1,)
Loss: 0.5530695915222168


# Additional explorations

In [None]:
# 1) In the video, I asked about the "l0" from the parameter name "weight_ih_l0". To explore this further, 
#    recreate that RNN instance but set the number of layers to 3. Then go through the code again to print
#    out all of the weights matrices. Refer back to the discussion of layers in the previous video. Do you 
#    understand the naming system of the weights matrices?
