In [86]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import spacy
nlp = spacy.load('en')

In [87]:
def prepare_sequence(seq, to_ix):
    idxs = []
    for w in seq:
        if w in to_ix:
            idxs += [to_ix[w]]
        else:
            idxs += [0]

    return torch.tensor(idxs, dtype=torch.long)

In [88]:
def prepare_lable(size,lable):
    temp = []
    for i in range(size -1):
        temp += [0]   
    if lable == 1:
        temp += [1]
    else:
        temp += [0]
    
    return torch.tensor(temp, dtype=torch.long)

In [89]:
def get_split(sentence):
    doc = nlp(sentence)
    return [(w.text) for w in doc]

In [100]:
training_data = [
    (get_split("I live in U.S."), 0),
    (get_split("He is dr. Jone"), 0),
    (get_split("This car is 3.8 million dollor"), 0),
    (get_split("I want to go home. This is my dog"), 1),
    (get_split("I can write C. Show me the money"), 1),
    (get_split("His name is C.Lab"), 0),

]
print(training_data)

['I', 'can', 'write', 'C.', 'Show', 'me', 'the', 'money']
[(['I', 'live', 'in', 'U.S.'], 0), (['He', 'is', 'dr', '.', 'eye'], 0), (['This', 'car', 'is', '3.8', 'million', 'dollor'], 0), (['I', 'want', 'to', 'go', 'home', '.', 'This', 'is', 'my', 'dog'], 1), (['I', 'can', 'write', 'C.', 'Show', 'me', 'the', 'money'], 1), (['His', 'name', 'is', 'C.Lab'], 0)]


In [91]:
word_to_ix = {'unknow':0}
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)            
print(word_to_ix)



print(prepare_sequence(training_data[0][0], word_to_ix))
print(prepare_lable(len(training_data[0][0]),training_data[0][1]))
print(prepare_sequence(training_data[1][0], word_to_ix))
print(prepare_lable(len(training_data[1][0]),training_data[1][1]))

{'unknow': 0, 'I': 1, 'live': 2, 'in': 3, 'U.S.': 4, 'He': 5, 'is': 6, 'dr': 7, '.': 8, 'eye': 9, 'This': 10, 'car': 11, '3.8': 12, 'million': 13, 'dollor': 14, 'want': 15, 'to': 16, 'go': 17, 'home': 18, 'my': 19, 'dog': 20, 'can': 21, 'write': 22, 'C.': 23, 'Show': 24, 'me': 25, 'the': 26, 'money': 27, 'His': 28, 'name': 29, 'C.Lab': 30}
tensor([1, 2, 3, 4])
tensor([0, 0, 0, 0])
tensor([5, 6, 7, 8, 9])
tensor([0, 0, 0, 0, 0])


In [92]:
class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        return (torch.zeros(1, 1, self.hidden_dim),
                torch.zeros(1, 1, self.hidden_dim))

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, self.hidden = self.lstm(
            embeds.view(len(sentence), 1, -1), self.hidden)
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        #tag_scores = torch.sigmoid(tag_scores)
        return tag_scores

In [97]:
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), 2)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

for epoch in range(300):  # again, normally you would NOT do 300 epochs, it is toy data
    for sentence, lable in training_data:
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Also, we need to clear out the hidden state of the LSTM,
        # detaching it from its history on the last instance.
        model.hidden = model.init_hidden()

        # Step 2. Get our inputs ready for the network, that is, turn them into
        # Tensors of word indices.
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_lable(len(sentence),lable)
        
        # Step 3. Run our forward pass.
        tag_scores = model(sentence_in)

        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        
    #print(epoch)
    

In [98]:
inputs = prepare_sequence(get_split("I am happy.He is happy"), word_to_ix)
tag_scores = model(inputs)
print(tag_scores)

tensor([[-0.0008, -7.0803],
        [-0.0020, -6.2236],
        [-0.0020, -6.2344],
        [-0.0012, -6.6883],
        [-0.0009, -6.9855],
        [-0.0003, -8.0144],
        [-0.0012, -6.7018]], grad_fn=<LogSoftmaxBackward>)


In [99]:
inputs = prepare_sequence(get_split("He is 1.9 feet tall"), word_to_ix)
tag_scores = model(inputs)
print(tag_scores)

tensor([[-0.0012, -6.6932],
        [-0.0003, -8.0512],
        [-0.0011, -6.7908],
        [-0.0016, -6.4621],
        [-0.0017, -6.3778]], grad_fn=<LogSoftmaxBackward>)


In [101]:
inputs = prepare_sequence(get_split("He is dr. Jone"), word_to_ix)
tag_scores = model(inputs)
print(tag_scores)

tensor([[-0.0015, -6.4790],
        [-0.0003, -8.0236],
        [-0.0003, -8.1856],
        [-0.0002, -8.3515],
        [-0.0011, -6.8087]], grad_fn=<LogSoftmaxBackward>)


In [102]:
inputs = prepare_sequence(get_split("I want to go home. This is my dog"), word_to_ix)
tag_scores = model(inputs)
print(tag_scores)

tensor([[-0.0006, -7.4435],
        [-0.0007, -7.3367],
        [-0.0003, -8.0805],
        [-0.0016, -6.4397],
        [-0.0012, -6.7220],
        [-0.0025, -6.0093],
        [-0.0030, -5.8151],
        [-0.0025, -5.9760],
        [-0.0082, -4.8086],
        [-1.6479, -0.2137]], grad_fn=<LogSoftmaxBackward>)
