Pytorch’s LSTM expects all of its inputs to be 3D tensors. The semantics of the axes of these tensors is important. The first axis is the sequence itself, the second indexes instances in the mini-batch, and the third indexes elements of the input.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7f20aef488b0>

# An LSTM for Part-of-Speech Tagging

## 1. Data

In [2]:
training_data = [
    (("The dog ate the apple").split(), ["DET", "NN", "V", "DET", "NN"]),
    (("Everybody read that book").split(), ["NN", "V", "DET", "NN"]),
]

In [3]:
# Word to index.
word_to_index = {}
for sentence, _ in training_data:
    for word in sentence:
        if word not in word_to_index:
            word_to_index[word] = len(word_to_index)
print(word_to_index)

# Tag to index.
tag_to_index = {"DET": 0, "NN": 1, "V": 2}
print(tag_to_index)

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}
{'DET': 0, 'NN': 1, 'V': 2}


In [4]:
def prepare_sequence(seq, to_index):
    indices = [to_index[w] for w in seq]
    
    return torch.tensor(indices, dtype=torch.long)

## 2. Train the Model

In [5]:
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

In [6]:
class LSTMTagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, target_size):
        super(LSTMTagger, self).__init__()
        
        # Creates a look-up table.
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        
        # Input -> Hidden.
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        # Hidden -> Output.
        self.linear = nn.Linear(hidden_dim, target_size)
        
    def forward(self, sentence):
        # Gets embeddings.
        embeds = self.word_embeddings(sentence)  # embeds.size() = (word_num, embed_dim)
        
        a1, _ = self.lstm(embeds.view(len(sentence), 1, -1))  # (seq_len, mini_batch_size, embed_dim)
        
        z2 = self.linear(a1.view(len(sentence), -1))
        a2 = F.log_softmax(z2, dim=1)  # a2.size() = (seq_len, targets_len)
        
        return a2

In [7]:
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_index), 
                   len(tag_to_index))

## 3. Loss Function

In [8]:
loss_function = nn.NLLLoss()

## 4. Optimizer

In [9]:
optimizer = optim.SGD(model.parameters(), lr=0.1)

## 5. Train the Model

In [10]:
for epoch in range(300):
    for sentence, tags in training_data:
        model.zero_grad()
        
        # Prepares for data.
        sentence = prepare_sequence(sentence, word_to_index)
        targets = prepare_sequence(tags, tag_to_index)
        
        # Forward prop.
        tag_scores = model(sentence)
        loss = loss_function(tag_scores, targets)
        
        # Backward prop.
        loss.backward()
        
        # Updates params.
        optimizer.step()

## 6. Test the Model

In [12]:
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_index)
    tag_scores = model(inputs)
    
    print(tag_scores.max(dim=1).indices)
    
    inputs = prepare_sequence(training_data[1][0], word_to_index)
    tag_scores = model(inputs)
    
    print(tag_scores.max(dim=1).indices)

tensor([0, 1, 2, 0, 1])
tensor([1, 2, 0, 1])
