In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7fec13eb0090>

In [2]:
lstm = nn.LSTM(3, 3)
inputs = [torch.randn(1, 3) for _ in range(5)]

# initialize the hidden state
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))
for i in inputs:
  out, hidden = lstm(i.view(1, 1, -1), hidden)

inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))
out, hidden = lstm(inputs, hidden)
print(out)
print(hidden)

tensor([[[-0.0187,  0.1713, -0.2944]],

        [[-0.3521,  0.1026, -0.2971]],

        [[-0.3191,  0.0781, -0.1957]],

        [[-0.1634,  0.0941, -0.1637]],

        [[-0.3368,  0.0959, -0.0538]]], grad_fn=<StackBackward0>)
(tensor([[[-0.3368,  0.0959, -0.0538]]], grad_fn=<StackBackward0>), tensor([[[-0.9825,  0.4715, -0.0633]]], grad_fn=<StackBackward0>))


# PoS tagging

In [14]:
from functools import reduce
def prepare_sequence(seq, to_idx):
  idxs = [to_idx[w] for w in seq]
  return torch.tensor(idxs, dtype=torch.long)

training_data = [
  ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
  ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]
vocab = set([el for data in training_data for el in data[0]])
word_to_idx = {word:idx for idx, word in enumerate(vocab)}
tag_to_idx = {"DET": 0, "NN": 1, "V": 2}
idx_to_tag = {0: "DET", 1: "NN", 2: "V"}

EMBED_DIM = 6
HIDDEN_DIM = 6
print(word_to_idx)

{'dog': 0, 'book': 1, 'the': 2, 'Everybody': 3, 'that': 4, 'ate': 5, 'apple': 6, 'read': 7, 'The': 8}


In [4]:
class LSTMTagger(nn.Module):
  def __init__(self, embed_dim, hidden_dim, vocab_size, tagset_size):
    super(LSTMTagger, self).__init__()

    self.word_embed = nn.Embedding(vocab_size, embed_dim)

    self.lstm = nn.LSTM(embed_dim, hidden_dim)

    self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

  def forward(self, sentence):
    embeds = self.word_embed(sentence)
    lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
    tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
    tag_scores = F.log_softmax(tag_space, dim=1)
    return tag_scores

In [16]:
def print_beauti_res(sentence, tag_scores):
  print(reduce(lambda x, y: f"{x}\t{y}", sentence))
  print(reduce(lambda x, y: f"{x}\t{y}", [idx_to_tag[torch.argmax(scores).item()] for scores in tag_scores]))

In [17]:
model = LSTMTagger(EMBED_DIM, HIDDEN_DIM, len(word_to_idx), len(tag_to_idx))
loss_fn = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

with torch.no_grad():
  inputs = prepare_sequence(training_data[0][0], word_to_idx)
  tag_scores = model(inputs)
  print(tag_scores)

for epoch in range(300):
  for sentence, tags in training_data:
    model.zero_grad()

    sentence_in = prepare_sequence(sentence, word_to_idx)
    targets = prepare_sequence(tags, tag_to_idx)

    tag_scores = model(sentence_in)

    loss = loss_fn(tag_scores, targets)
    loss.backward()
    optimizer.step()

with torch.no_grad():
  inputs = prepare_sequence(training_data[0][0], word_to_idx)
  tag_scores = model(inputs)
  print(tag_scores)
  print_beauti_res(training_data[0][0], tag_scores)

tensor([[-1.0564, -1.3608, -0.9267],
        [-1.1500, -1.2674, -0.9118],
        [-1.1602, -1.3702, -0.8381],
        [-1.0840, -1.3456, -0.9129],
        [-0.9573, -1.4038, -0.9931]])
tensor([[-0.0370, -4.2475, -3.8169],
        [-4.0457, -0.0330, -4.1993],
        [-4.2150, -4.1447, -0.0311],
        [-0.0252, -4.6070, -4.2077],
        [-3.8959, -0.0215, -6.9592]])
The	dog	ate	the	apple
DET	NN	V	DET	NN


# Augment Char Embedding

In [18]:
vocab_char = set([char for data in training_data for el in data[0] for char in el])
char_to_idx = {char:idx for idx, char in enumerate(vocab_char)}

In [19]:
print(char_to_idx)

{'d': 0, 'y': 1, 'r': 2, 'h': 3, 'a': 4, 'T': 5, 'e': 6, 'p': 7, 'v': 8, 'E': 9, 'l': 10, 'b': 11, 't': 12, 'k': 13, 'o': 14, 'g': 15}


In [24]:
class AugCharTagger(nn.Module):
  def __init__(self, word_embed_dim, char_embed_dim, word_hidden_dim, char_hidden_dim,
               word_vocab_size, char_vocab_size, tagset_size):
    super(AugCharTagger, self).__init__()
    self.word_embed = nn.Embedding(word_vocab_size, word_embed_dim)
    self.char_embed = nn.Embedding(char_vocab_size, char_embed_dim)

    self.char_lstm = nn.LSTM(char_embed_dim, char_hidden_dim)
    self.word_lstm = nn.LSTM(word_embed_dim + char_hidden_dim, word_hidden_dim)

    self.hidden2tag = nn.Linear(word_hidden_dim, tagset_size)
    self.word_hidden = self.init_hidden(word_hidden_dim)
    self.char_hidden = self.init_hidden(char_hidden_dim)

  def init_hidden(self, size):
    return (torch.zeros(1, 1, size), torch.zeros(1, 1, size))

  def forward(self, word_sequence, char_sequence):
    word_embeds = self.word_embed(word_sequence)

    char_embeds = self.char_embed(char_sequence)
    char_lstm_out, self.char_hidden = self.char_lstm(
        char_embeds.view(len(char_sequence), 1, -1), self.char_hidden)

    concat = torch.cat([word_embeds.view(1, 1, -1), char_lstm_out[-1].view(1, 1, -1)], dim=2)
    word_lstm_out, self.word_hidden = self.word_lstm(concat, self.word_hidden)

    tag_space = self.hidden2tag(word_lstm_out.view(1, -1))
    tag_scores = F.log_softmax(tag_space, dim=1)
    return tag_scores

In [26]:
WORD_EMBED_DIM = 6
WORD_HIDDEN_DIM = 6
CHAR_EMBED_DIM = 3
CHAR_HIDDEN_DIM = 3
model = AugCharTagger(WORD_EMBED_DIM, CHAR_EMBED_DIM, WORD_HIDDEN_DIM, CHAR_HIDDEN_DIM,
                      len(vocab), len(vocab_char), len(tag_to_idx))
loss_fun = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
with torch.no_grad():
  tag_scores = []
  for word in training_data[0][0]:
    word_sequence = prepare_sequence([word], word_to_idx)
    char_sequence = prepare_sequence(word, char_to_idx)
    tag_score = model(word_sequence, char_sequence)
    tag_scores.append(tag_score)
  print_beauti_res(training_data[0][0], tag_scores)

The	dog	ate	the	apple
V	V	V	NN	NN


In [27]:
for epoch in range(300):
  for sentence, tags in training_data:
    model.zero_grad()
    model.word_hidden = model.init_hidden(WORD_HIDDEN_DIM)

    for index, word in enumerate(sentence):
      model.char_hidden = model.init_hidden(CHAR_HIDDEN_DIM)

      word_sequence = prepare_sequence([word], word_to_idx)
      char_sequence = prepare_sequence(word, char_to_idx)
      targets = prepare_sequence([tags[index]], tag_to_idx)

      tag_scores = model(word_sequence, char_sequence)
      loss = loss_fn(tag_scores, targets)
      loss.backward(retain_graph=True)
      
    optimizer.step()

In [28]:
with torch.no_grad():
  tag_scores = []
  for word in training_data[0][0]:
    word_sequence = prepare_sequence([word], word_to_idx)
    char_sequence = prepare_sequence(word, char_to_idx)
    tag_score = model(word_sequence, char_sequence)
    tag_scores.append(tag_score)
  print_beauti_res(training_data[0][0], tag_scores)

The	dog	ate	the	apple
DET	NN	V	DET	NN
