In [1]:
import torch

### Sequence representation
> \[seq_len, feature_len\], \[words, word_vec\]

* one-hot to represent a word
> sparse and high-dim

* semantic similarity
> word2vec, GloVe

In [2]:
word_to_ix = {"hello": 0, "world": 1}

lookup_tensor = torch.tensor([word_to_ix["hello"]], dtype=torch.long)

embeds = torch.nn.Embedding(2,5) # 2 words in vocab, 5 dimensional embeddings
hello_embed = embeds(lookup_tensor)
print(hello_embed)

tensor([[-0.5031,  0.7606,  3.1124,  0.6215, -0.1789]],
       grad_fn=<EmbeddingBackward>)


In [None]:
# GloVe
from torchnlp.word_to_vector import GloVe
vectors = GloVe()
vectors['hello']

### Consistent memory
> h_t = f_w(h_t-1, x_t), h_t = tanh(W_hh * h_t-1 + W_xh * x_t), y_t = W_hy * h_t

### Folded model
* h_t+1 = x_t @ W_xh + h_t @ W_hh
> \[batch, feature len\] @ \[hidden len, feature len\] + \[batch, hidden len\] @ \[hidden len, hidden len\]
>> x_t: \[batch, feature len\], x: \[seq len, batch, feature len\]

In [5]:
# input dim, hidden dim
rnn = torch.nn.RNN(100,10) # memory dim, word dim
rnn._parameters.keys()

odict_keys(['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0'])

In [6]:
rnn.weight_hh_l0.shape, rnn.weight_ih_l0.shape

(torch.Size([10, 10]), torch.Size([10, 100]))

In [7]:
rnn.bias_hh_l0.shape, rnn.bias_ih_l0.shape

(torch.Size([10]), torch.Size([10]))

### nn.RNN
* \__init\__
    * *input_size*: The number of expected features in the input x 
    * *hidden_size*: The number of features in the hidden state h
    * *num_layers*: The number of recurrent layers. E.g., setting num_layers=2 would mean stacking two RNNs together to form a stacked RNN, with the second RNN taking in outputs of the first RNN and computing the final results.Default: 1
* out, ht = forward(x, h0)
    * x: \[seq len, b, word vec\]
    * h0/ht: \[num layers, b, h dim\]
    * out: \[seq len, b, h dim\]

In [9]:
# Single layer RNN
rnn = torch.nn.RNN(input_size=100, hidden_size=20, num_layers=1)
print(rnn)

x = torch.randn(10, 3, 100)
out, h = rnn(x, torch.zeros(1, 3, 20))
print(out.shape, h.shape)

RNN(100, 20)
torch.Size([10, 3, 20]) torch.Size([1, 3, 20])
