In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

torch.manual_seed(1)

<torch._C.Generator at 0x7f821b7fcf10>

In [3]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)

training_data = [
    ('My grandmother ate the polemta'.split(), ['DET', 'NN', 'V', 'DET', 'NN']),
    ('Marina read my book'.split(), ['NN', 'V', 'DET', 'NN'])
]

word_index = {}
for sent, tags in training_data:
    for word in sent:
        if word not in word_index:
            word_index[word] = len(word_index)

print(word_index)
tag_to_ix = {'DET':0, 'NN':1, 'V':2}

EMBEDDING_DIM = 6
HIDDEN_DIM = 6

{'My': 0, 'grandmother': 1, 'ate': 2, 'the': 3, 'polemta': 4, 'Marina': 5, 'read': 6, 'my': 7, 'book': 8}


In [4]:

#Defining the lstm model
class LSTMtagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_Size, tagset_size):
        super(LSTMtagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_Size, embedding_dim)
        #the lstm takes word embeddings as inputs, and outputs as hidden states
        #with dimesionality hidden_dim
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        #the linear layer that maps from hidden state space to tag_space
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [5]:
#TRaining the model

model = LSTMtagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_index), len(tag_to_ix))
loss = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

for epoch in range(300):
    for sentence, tags in training_data:
        #clearing gardients before each instance
        model.zero_grad()
        #Turn our inputs into tensors of word indices
        sentence_in = prepare_sequence(sentence, word_index)
        targets = prepare_sequence(tags, tag_to_ix)
        #Run our forward pass
        tag_scores = model(sentence_in)
        #compute loss, gradient, and update parameters by calling optimizer.step()
        losscalc = loss(tag_scores, targets)
        losscalc.backward()
        optimizer.step()


with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_index)
    tag_scores = model(inputs)

    print(tag_scores)

tensor([[-0.3892, -1.2426, -3.3890],
        [-2.1082, -0.1328, -5.8464],
        [-3.0852, -5.9469, -0.0495],
        [-0.0499, -3.4414, -4.0961],
        [-2.4540, -0.0929, -5.8799]])


In [9]:
import numpy as np

ix_to_tag = {0: "DET", 1: "NN", 2: "V"}

def get_max_prob_result(inp, ix_to_tag):
    idx_max = np.argmax(inp, axis=0)
    return ix_to_tag[idx_max]

test_sentence = training_data[0][0]
inputs = prepare_sequence(test_sentence, word_index)
tag_scores = model(inputs)
for i in range(len(test_sentence)):
    print(f'{test_sentence[i]}: {get_max_prob_result(tag_scores[i].data.numpy(), ix_to_tag)}')

My: DET
grandmother: NN
ate: V
the: DET
polemta: NN
