In [1]:
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7fb11cd8b050>

# LSTM in torch

This is a very simple notebook just to familiarize ourselves with the inputs and outputs of a recurrent neural network.

In [2]:
input_size = 5
hidden_size = 3

In [3]:
lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size)  # Input dim is 3, output (=hidden) dim is 3
# alternatively try a gated recurrent unit torch.GRU https://pytorch.org/docs/stable/generated/torch.nn.GRU.html
# or a simple RNN https://pytorch.org/docs/stable/generated/torch.nn.RNN.html

In [4]:
inputs = torch.cat([torch.randn(1,1,input_size) for _ in range(5)])  # make a sequence of length 5

In [5]:
inputs.shape # Sequence length, batch_size, embedding dimension (vector representation of a word in the sentence)

torch.Size([5, 1, 5])

In [6]:
# random initialization (optional, if nothing is given defaults to 0s)
h0 = torch.randn(1, 1, 3) # hidden state
c0 = torch.randn(1, 1, 3) # hidden cell state
hidden = (h0, c0)

In [7]:
out, hidden = lstm(inputs, hidden)

In [8]:
print(out)
print(hidden)

tensor([[[ 0.0350, -0.1568,  0.1137]],

        [[ 0.2973, -0.2097,  0.2475]],

        [[ 0.3209,  0.0711,  0.1950]],

        [[ 0.4948,  0.1319,  0.1546]],

        [[ 0.4059,  0.1276,  0.2292]]], grad_fn=<StackBackward>)
(tensor([[[0.4059, 0.1276, 0.2292]]], grad_fn=<StackBackward>), tensor([[[0.7476, 0.2052, 0.7313]]], grad_fn=<StackBackward>))


`out` is the hidden state for each of the words in the sentence  
`hidden` is the tuple `(h_final, c_final)` state of the *final* pass (the last word that was processed)