In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


torch.manual_seed(1)

<torch._C.Generator at 0x7fd75fbbdad0>

In [10]:
# helper function
def prepare_sequence(seq, to_ix):
    idxs = [ to_ix[word] for word in seq ]
    return torch.tensor(idxs, dtype=torch.long)

In [11]:
# making training data with tags as labels of nouns, verbs, determiner
# Tags are: DET - determiner; NN - noun; V - verb

train_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]


In [12]:
word_to_idx ={}
for sent,tags in train_data:
    for word in sent:
        if word not in word_to_idx:
            word_to_idx[word] = len(word_to_idx)

tags_to_idx = {"DET":0, "NN":1, "V":2}

In [13]:
EMDEDING_DIM = 6
HIDDEN_DIM = 6


In [14]:
# writing model class
class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        
        self.word_embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

    def forward(self, input_sentence):
        embeds = self.word_embedding(input_sentence)
        lstm_out, _ = self.lstm(embeds.view(len(input_sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(input_sentence), -1))
        tag_scores = F.log_softmax(tag_space,1)
        return tag_scores



In [15]:
model = LSTMTagger(EMDEDING_DIM, HIDDEN_DIM, len(word_to_idx), len(tags_to_idx))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)



In [16]:
with torch.no_grad():
  inputs = prepare_sequence(train_data[0][0], word_to_idx)
  print(model(inputs))

tensor([[-1.0500, -0.9256, -1.3714],
        [-1.0188, -0.9739, -1.3419],
        [-1.1330, -0.9662, -1.2126],
        [-1.1818, -0.9763, -1.1501],
        [-1.0766, -0.9916, -1.2439]])


In [17]:
for epoch in range(300):
    for sent, tag in train_data:

        model.zero_grad()
        input = prepare_sequence(sent, word_to_idx)
        target = prepare_sequence(tag, tags_to_idx)
        log_probs = model(input)
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()


In [10]:
with torch.no_grad():
    inputs = prepare_sequence(train_data[0][0], word_to_idx)
    print(model(inputs), train_data[0][0])

tensor([[-0.3892, -1.2426, -3.3890],
        [-2.1082, -0.1328, -5.8464],
        [-3.0852, -5.9469, -0.0495],
        [-0.0499, -3.4414, -4.0961],
        [-2.4540, -0.0929, -5.8799]]) ['The', 'dog', 'ate', 'the', 'apple']
