In [1]:
import torch
import numpy as np
from simple_RNN import *

### Set some parameters.
Seq length is the number of time periods of the sequence we pass in. Hidden size is the number of units in the hidden layer.


In [2]:
seq_length = 10
num_batches = 1
hidden_size = 3

### Load some data

Shape should be (seq_length, vocab_size). Each character is a one-hot vector.

In [3]:
filename = 'input.txt'
data_raw = open(filename, 'r').read() # should be simple plain text file
data_raw = data_raw.lower()

data, vocab_size, idx_to_char = data_from_text(data_raw)
data = np.eye(vocab_size)[data]
print( 'Data shape: ', data.shape)

### Initialize the parameters and the hidden state

In [4]:
params = initialize_parameters(hidden_size, vocab_size)
h_prev = np.zeros(hidden_size)

### Calculate the output h using my custom code

h is the output activation for each time step *before* any outupt layer is applied

In [5]:
h = rnn_forward(data, params, h_prev)
print( 'h shape: ', h.shape)

### Use torch's RNN object to calculate the same thing

It requires the inputs to be a tensor with shape (seq_length, batch_num, vocab_size)

In [6]:
rnn = torch.nn.RNN(input_size=vocab_size,hidden_size=hidden_size)
dtype = torch.float

# set the weight tensors
rnn.bias_ih_l0.data.fill_(0)
rnn.weight_hh_l0.data = torch.tensor(params['W'], dtype=dtype)
rnn.weight_ih_l0.data = torch.tensor(params['U'], dtype=dtype)
rnn.bias_hh_l0.data = torch.tensor(params['bh'], dtype=dtype)

inputs = data.reshape((len(data), 1, vocab_size))
inputs = torch.tensor(inputs, dtype=dtype)
h_prev_torch = torch.tensor(h_prev.reshape((1,1,hidden_size)), dtype=dtype)

h_pytorch, _ = rnn(inputs, h_prev_torch)
print( 'h_pytorch shape: ', h_pytorch.shape)

### Verify that we get the same result

In [7]:
np.testing.assert_array_almost_equal( h_pytorch.data.numpy()[:,0,:], h)