In [1]:
import torch
import numpy as np
from simple_RNN import *

### Set some parameters.
Seq length is the number of time periods of the sequence we pass in. Hidden size is the number of units in the hidden layer.


In [2]:
seq_length = 10
num_batches = 1
hidden_size = 3

### Load some data


In [3]:
#Shape should be (seq_length, vocab_size). Each character is a one-hot vector.

filename = 'input.txt'
data_raw = open(filename, 'r').read() # should be simple plain text file
data_raw = data_raw.lower()

data, vocab_size, idx_to_char = data_from_text(data_raw)
data = np.eye(vocab_size)[data]
print( 'Data shape: ', data.shape)


Data has length 454 and consist of 12 unique characters.
Data shape:  (454, 12)


### Initialize the parameters and the hidden state

In [4]:
params = initialize_parameters_lstm(hidden_size, vocab_size)
h_prev = np.zeros(hidden_size)
c_prev = np.zeros(hidden_size)

### Calculate the outputs h and c using my custom code

h is the output activation for each time step before any outupt layer is applied. c is the corresponding 'cell state'.


In [5]:
h, c = lstm_forward(data, params, h_prev, c_prev)
print( 'h shape: ', h.shape)
print( 'c shape: ', c.shape)

h shape:  (454, 3)
c shape:  (454, 3)


### Use torch's LSTM object to calculate the same thing

It requires the inputs to be a tensor with shape (seq_length, batch_num, vocab_size)

In [7]:
lstm = torch.nn.LSTM(input_size=vocab_size,hidden_size=hidden_size,num_layers=1)
dtype = torch.float

def set_lstm_weights(lstm_object, params, layer=0):
    weight_ih = np.concatenate([params['wix'], params['wfx'], params['wgx'], params['wox']])
    weight_hh = np.concatenate([params['wih'], params['wfh'], params['wgh'], params['woh']])
    bias_ih   = np.concatenate([params['bi'], params['bf'], params['bg'], params['bo']])
    bias_hh   = np.zeros_like(bias_ih)
    layer = str(layer)
    getattr(lstm_object,'weight_ih_l'+layer).data = torch.tensor(weight_ih, dtype=dtype)
    getattr(lstm_object,'weight_hh_l'+layer).data = torch.tensor(weight_hh, dtype=dtype)
    getattr(lstm_object,'bias_ih_l'+layer).data = torch.tensor(bias_ih, dtype=dtype)
    getattr(lstm_object,'bias_hh_l'+layer).data = torch.tensor(bias_hh, dtype=dtype)
    
set_lstm_weights(lstm, params, 0)

inputs = data.reshape((len(data), 1, vocab_size))
inputs = torch.tensor(inputs, dtype=dtype)
h_prev_torch = torch.tensor(h_prev.reshape((1,1,hidden_size)), dtype=dtype)
c_prev_torch = torch.tensor(c_prev.reshape((1,1,hidden_size)), dtype=dtype)

h_pytorch, (h_final_pytorch, c_final_pytorch) = lstm(inputs, (h_prev_torch, c_prev_torch))
print( 'h_pytorch shape: ', h_pytorch.shape)


h_pytorch shape:  torch.Size([454, 1, 3])


### Verify that we get the same result

In [8]:
np.testing.assert_array_almost_equal( h_pytorch.data.numpy()[:,0,:], h)

np.testing.assert_array_almost_equal( c_final_pytorch.data.numpy()[0,0,:], c[len(data)-1])


# Now do the same thing but for a two-layer stacked LSTM

In [37]:
# initial state values

h_prev1 = np.zeros(hidden_size)
h_prev2 = np.zeros(hidden_size)
c_prev1 = np.zeros(hidden_size)
c_prev2 = np.zeros(hidden_size)


In [42]:
# initialize parameters for each layer
params1 = initialize_parameters_lstm(hidden_size, vocab_size)
params2 = initialize_parameters_lstm(hidden_size, hidden_size)

# run y forward layer twice
h1, c1 = lstm_forward(data, params1, h_prev1, c_prev1)
h2, c2 = lstm_forward(h1, params2, h_prev2, c_prev2)



In [43]:
num_layers = 2
lstm2 = torch.nn.LSTM(input_size=vocab_size,hidden_size=hidden_size,num_layers=num_layers)

# stamp the weights on
set_lstm_weights(lstm2, params1, 0)
set_lstm_weights(lstm2, params2, 1)

# initial states
h_prev = np.stack([h_prev1, h_prev2], 0)
c_prev = np.stack([h_prev1, h_prev2], 0)
h_prev_torch = torch.tensor(h_prev.reshape((num_layers,1,hidden_size)), dtype=dtype)
c_prev_torch = torch.tensor(c_prev.reshape((num_layers,1,hidden_size)), dtype=dtype)

h_pytorch, (h_final_pytorch, c_final_pytorch) = lstm2(inputs, (h_prev_torch, c_prev_torch))


In [44]:
# check we agree
np.testing.assert_array_almost_equal( h_pytorch.data.numpy()[:,0,:], h2)

np.testing.assert_array_almost_equal( c_final_pytorch.data.numpy()[1,0,:], c2[len(data)-1])
