In [1]:
import torch

In [44]:
import torch.nn as nn

In [96]:
BATCH_SIZE = 1
EMBED_SIZE = 5    # word_size
SENTECE_LEN = 6   # input_size
HIDDEN_SIZE = 10  # hidden_size
sentence = torch.randn(1, SENTECE_LEN, EMBED_SIZE)

In [97]:
sentence[0]

tensor([[ 1.1944, -0.3090, -1.8691,  0.7323, -0.0364],
        [ 0.5767, -1.1036, -1.5389,  1.5197,  0.7917],
        [ 1.1138,  0.0613, -0.9815,  0.6418, -0.4962],
        [ 0.5006, -0.7771,  1.1670, -0.4962, -2.1714],
        [-0.6125,  1.8546,  1.1128,  0.2832, -1.2968],
        [ 0.2312,  0.7097,  1.3771,  0.0922, -0.6707]])

In [98]:
print('There are {} words, each a {}-dim vector'.format(*sentence[0].shape))

There are 6 words, each a 5-dim vector


### Using LSTM Cell

Args:
    - input_size: The number of expected features in the input `x`
    - hidden_size: The number of features in the hidden state `h`
    - bias: If ``False``, then the layer does not use bias weights `b_ih` and
        `b_hh`. Default: ``True``

Inputs: input, (h_0, c_0)
    - **input** of shape `(batch, input_size)`: tensor containing input features
    - **h_0** of shape `(batch, hidden_size)`: tensor containing the initial hidden
      state for each element in the batch.
    - **c_0** of shape `(batch, hidden_size)`: tensor containing the initial cell state
      for each element in the batch.

      If `(h_0, c_0)` is not provided, both **h_0** and **c_0** default to zero.

Outputs: (h_1, c_1)
    - **h_1** of shape `(batch, hidden_size)`: tensor containing the next hidden state
      for each element in the batch
    - **c_1** of shape `(batch, hidden_size)`: tensor containing the next cell state
      for each element in the batch

Attributes:
    - weight_ih: the learnable input-hidden weights, of shape
        `(4*hidden_size, input_size)`
    - weight_hh: the learnable hidden-hidden weights, of shape
        `(4*hidden_size, hidden_size)`
    - bias_ih: the learnable input-hidden bias, of shape `(4*hidden_size)`
    - bias_hh: the learnable hidden-hidden bias, of shape `(4*hidden_size)`


In [99]:
cell = nn.LSTMCell(input_size=EMBED_SIZE, hidden_size=HIDDEN_SIZE)

In [100]:
hx = torch.randn(BATCH_SIZE, HIDDEN_SIZE)
cx = torch.randn(BATCH_SIZE, HIDDEN_SIZE)

In [101]:
output = []
for i in range(SENTECE_LEN):
    hx, cx = cell(sentence[i], (hx, cx))
    output.append(hx)

RuntimeError: Input batch size 6 doesn't match hidden[0] batch size 1

### Using LSTM Layer

Args:
    - input_size: The number of expected features in the input `x`
    - hidden_size: The number of features in the hidden state `h`
    - num_layers: Number of recurrent layers. E.g., setting ``num_layers=2``
        would mean stacking two LSTMs together to form a `stacked LSTM`,
        with the second LSTM taking in outputs of the first LSTM and
        computing the final results. Default: 1
    - bias: If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`.
        Default: ``True``
    - batch_first: If ``True``, then the input and output tensors are provided
        as (batch, seq, feature). Default: ``False``
    - dropout: If non-zero, introduces a `Dropout` layer on the outputs of each
        LSTM layer except the last layer, with dropout probability equal to
        :attr:`dropout`. Default: 0
    - bidirectional: If ``True``, becomes a bidirectional LSTM. Default: ``False``


Inputs: input, (h_0, c_0)
    - **input** of shape `(seq_len, batch, input_size)`: tensor containing the features
      of the input sequence.
      The input can also be a packed variable length sequence.
      See :func:`torch.nn.utils.rnn.pack_padded_sequence` or
      :func:`torch.nn.utils.rnn.pack_sequence` for details.
    - **h_0** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor
      containing the initial hidden state for each element in the batch.
      If the LSTM is bidirectional, num_directions should be 2, else it should be 1.
    - **c_0** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor
      containing the initial cell state for each element in the batch.

      If `(h_0, c_0)` is not provided, both **h_0** and **c_0** default to zero.


Outputs: output, (h_n, c_n)
    - **output** of shape `(seq_len, batch, num_directions * hidden_size)`: tensor
      containing the output features `(h_t)` from the last layer of the LSTM,
      for each `t`. If a :class:`torch.nn.utils.rnn.PackedSequence` has been
      given as the input, the output will also be a packed sequence.

      For the unpacked case, the directions can be separated
      using ``output.view(seq_len, batch, num_directions, hidden_size)``,
      with forward and backward being direction `0` and `1` respectively.
      Similarly, the directions can be separated in the packed case.
    - **h_n** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor
      containing the hidden state for `t = seq_len`.

      Like *output*, the layers can be separated using
      ``h_n.view(num_layers, num_directions, batch, hidden_size)`` and similarly for *c_n*.
    - **c_n** of shape `(num_layers * num_directions, batch, hidden_size)`: tensor
      containing the cell state for `t = seq_len`.

Attributes:
    - weight_ih_l[k] : the learnable input-hidden weights of the :math:`\text{k}^{th}` layer
        `(W_ii|W_if|W_ig|W_io)`, of shape `(4*hidden_size, input_size)` for `k = 0`.
        Otherwise, the shape is `(4*hidden_size, num_directions * hidden_size)`
    - weight_hh_l[k] : the learnable hidden-hidden weights of the :math:`\text{k}^{th}` layer
        `(W_hi|W_hf|W_hg|W_ho)`, of shape `(4*hidden_size, hidden_size)`
    - bias_ih_l[k] : the learnable input-hidden bias of the :math:`\text{k}^{th}` layer
        `(b_ii|b_if|b_ig|b_io)`, of shape `(4*hidden_size)`
    - bias_hh_l[k] : the learnable hidden-hidden bias of the :math:`\text{k}^{th}` layer
        `(b_hi|b_hf|b_hg|b_ho)`, of shape `(4*hidden_size)`

In [None]:
layer = nn.LSTM(input_size=EMBED_SIZE, hidden_size=HIDDEN_SIZE)