In [1]:
import torch
import torch.nn.functional as F

In [2]:
lstm = torch.nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5
inputs

[tensor([[-1.0269, -0.7684, -0.0047]]),
 tensor([[ 1.0225,  0.2555, -0.7867]]),
 tensor([[1.2509, 0.7152, 0.4489]]),
 tensor([[ 0.1487, -0.2523, -0.0829]]),
 tensor([[0.0560, 0.9337, 0.7852]])]

In [17]:
lstm = torch.nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5
inputs

# initialize the hidden state.
hidden = None

for i in inputs:
    if hidden:
        out, hidden = lstm(i.view(1, 1, -1), hidden)
    else:
        out, hidden = lstm(i.view(1, 1, -1))
print out
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
out, hidden = lstm(inputs)
print out[-1]

tensor([[[-0.2436, -0.1457, -0.4095]]], grad_fn=<CatBackward>)
tensor([[-0.2436, -0.1457, -0.4095]], grad_fn=<SelectBackward>)


In [84]:
training_data = [
    ("The dog ate the apple".lower().split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".lower().split(), ["NN", "V", "DET", "NN"])
]

vocab = set(reduce(lambda x, y : x+y, [t[0] for t in training_data] ) )
print vocab
word_to_ix = {word: i+1 for i, word in enumerate(vocab)}
word_to_ix["PADDING"] = 0
tag_to_ix = {"DET": 1, "NN": 2, "V": 3, "PADDING":0}
max_sentence_length = max([len(t[0]) for t in training_data]) + 1 #plus one for some extra protection
training_sentences = []
training_tags = []
for t in training_data:
    words, tags = t
    training_sentences.append([0 for i in xrange(max_sentence_length)])
    training_tags.append([0 for i in xrange(max_sentence_length)])
    for j, word in enumerate(words):

        training_sentences[-1][j] = word_to_ix[word]
        training_tags[-1][j] = tag_to_ix[tags[j]]
        
print(word_to_ix)
print training_sentences


set(['everybody', 'ate', 'apple', 'that', 'read', 'dog', 'book', 'the'])
{'everybody': 1, 'ate': 2, 'apple': 3, 'that': 4, 'read': 5, 'dog': 6, 'PADDING': 0, 'book': 7, 'the': 8}
[[8, 6, 2, 8, 3, 0], [1, 5, 4, 7, 0, 0]]


In [85]:
class LSTMTagger(torch.nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size, sentence_length):
        super(LSTMTagger, self).__init__()
        self.vocab_size = vocab_size
        self.hidden_dim = hidden_dim
        self.sentence_length = sentence_length

        self.embedding = torch.nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = torch.nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2tag = torch.nn.Linear(hidden_dim, tagset_size)
        
    def forward(self, input):
        embedded = self.embedding(input).view(self.sentence_length,1,-1)
        out, hidden = self.lstm(embedded)
        out = self.hidden2tag(out)
        return F.log_softmax(out, dim=2)
    
lstm = LSTMTagger(4, 4, len(vocab)+1, 3, max_sentence_length)
lstm(torch.tensor(training_sentences[0]))

tensor([[[-1.1534, -0.7874, -1.4722]],

        [[-1.1669, -0.7750, -1.4785]],

        [[-1.1825, -0.7322, -1.5482]],

        [[-1.1703, -0.7501, -1.5260]],

        [[-1.1532, -0.7662, -1.5160]],

        [[-1.1954, -0.7654, -1.4599]]], grad_fn=<LogSoftmaxBackward>)

In [86]:
step_size = 200
lstm = LSTMTagger(4, 4, len(vocab)+1, 4, max_sentence_length)
loss_fn = torch.nn.NLLLoss(ignore_index=0)
optimizer = torch.optim.Adam(lstm.parameters(), lr=1e-2)

total_losses = 0
for t in range(1000):


    for i, data in enumerate(training_data): 
        _, tags = data
        lstm.zero_grad()
        output = lstm(torch.tensor(training_sentences[i])).view(max_sentence_length,-1)

        loss = loss_fn(output, torch.tensor(training_tags[i]))
        loss.backward()
        optimizer.step()

        total_losses += loss.item()
    if t%step_size == step_size - 1:
        print t, total_losses/step_size
        total_losses = 0

199 0.435446375627
399 0.00839321744977
599 0.00338593649911
799 0.00184002554597
999 0.00110415017567


In [87]:
with torch.no_grad():
    test_sentence = "Everybody that ate the dog read".lower().split()
    test = [0 for _ in range(max_sentence_length)]
    for i, word in enumerate(test_sentence):
        test[i] = word_to_ix[word]
    output = lstm(torch.tensor(test))
    print torch.argmax(output, dim = 2)

tensor([[2],
        [1],
        [3],
        [1],
        [2],
        [3]])


# On Off Gate

In [33]:
class OOGate(torch.nn.Module):

    def __init__(self, hidden_dim):
        super(OOGate, self).__init__()
        self.hidden_dim = hidden_dim
        #self.lstm = torch.nn.LSTM(1, hidden_dim)
        self.input2hidden = torch.nn.Linear(1+hidden_dim, hidden_dim)
        self.hidden2out = torch.nn.Linear(hidden_dim, 1)
        self.init_hidden()
        
    def forward(self, input):
        input = torch.cat((input.view(1,1,-1), self.hidden), dim = 2)
        self.hidden = torch.tanh(self.input2hidden(input))
        # print input, self.hidden
        #out, self.hidden = self.lstm(input.view(1,1,1) , self.hidden)
        out = self.hidden2out(self.hidden)
        return torch.sigmoid(out)
    
    def init_hidden(self):
        self.hidden = torch.zeros(1, 1, self.hidden_dim)
    
oog = OOGate(2)
oog(torch.tensor([1.]))
oog(torch.tensor([0.]))

tensor([[[0.6945]]], grad_fn=<SigmoidBackward>)

In [65]:
import random
possibilities = [0.0 for i in range(128)]
for i in range(128):
    j = i%4
    if j < 2: 
        possibilities[i] = 1.0
        
step_size = 100
oog = OOGate(4)

loss_fn = torch.nn.BCELoss(reduction="sum")
optimizer = torch.optim.SGD(oog.parameters(), lr=1e-2)

total_losses = 0
for t in range(10000):
    
    while True:
        a = random.randint(0,127)
        b = random.randint(0,126)
        if a < b:
            try:
                example = possibilities[a:b+1]
                break
            except IndexError:
                pass
    # print example
    oog.zero_grad()
    oog.init_hidden()
    loss = torch.tensor([0.])
    # print example
    for i in range(len(example) -1): 
        # print "i", i, data
        oog.zero_grad()
        output = oog(torch.tensor([example[i]]))
        y = example[i+1]
        
        loss += loss_fn(output, torch.tensor([[y]]))
    loss.backward()
    optimizer.step()

    total_losses += loss.item()
    if t%step_size == step_size - 1:
        print t, total_losses/step_size
        total_losses = 0


99 9.48629769504
199 1.26617297471
299 0.994643369913
399 0.876655199528
499 0.858393005729
599 0.80405915767
699 0.804524154067
799 0.803903802931
899 0.766505013257
999 0.772753817141
1099 0.752951097488
1199 0.786715562046
1299 0.681298845261
1399 0.793822412789
1499 0.777228438556
1599 0.745934744179
1699 0.733000632226
1799 0.763534942269
1899 0.741612895131
1999 0.764834470749
2099 0.726180863976
2199 0.757913743854
2299 0.746083149016
2399 0.741460702121
2499 0.745916709602
2599 0.749795624614
2699 0.75549913168
2799 0.754217931032
2899 0.725392879248
2999 0.733314616978
3099 0.742916531265
3199 0.751228345931
3299 0.732059198916
3399 0.73889634937
3499 0.710037486255
3599 0.729008285701
3699 0.740915071666
3799 0.738084877431
3899 0.740829059482
3999 0.737211439013
4099 0.750723884404
4199 0.743137818873
4299 0.728881320953
4399 0.750462110639
4499 0.736233894229
4599 0.701864287108
4699 0.75448761344
4799 0.730128076375
4899 0.739888499677
4999 0.721907474101
5099 0.7562457743

In [66]:
oog.init_hidden()
print 1, oog(torch.tensor([1.]))
print 0, oog(torch.tensor([1.]))
print 0, oog(torch.tensor([0.]))
print 1, oog(torch.tensor([0.]))
print 1, oog(torch.tensor([1.]))
print 0, oog(torch.tensor([1.]))
print 0, oog(torch.tensor([0.]))
print 1, oog(torch.tensor([0.]))
print 1, oog(torch.tensor([1.]))
print 0, oog(torch.tensor([1.]))
print 0, oog(torch.tensor([0.]))
print 1, oog(torch.tensor([0.]))

1 tensor([[[0.5341]]], grad_fn=<SigmoidBackward>)
0 tensor([[[0.0011]]], grad_fn=<SigmoidBackward>)
0 tensor([[[0.0002]]], grad_fn=<SigmoidBackward>)
1 tensor([[[0.9999]]], grad_fn=<SigmoidBackward>)
1 tensor([[[0.9999]]], grad_fn=<SigmoidBackward>)
0 tensor([[[0.0002]]], grad_fn=<SigmoidBackward>)
0 tensor([[[0.0002]]], grad_fn=<SigmoidBackward>)
1 tensor([[[0.9999]]], grad_fn=<SigmoidBackward>)
1 tensor([[[0.9999]]], grad_fn=<SigmoidBackward>)
0 tensor([[[0.0002]]], grad_fn=<SigmoidBackward>)
0 tensor([[[0.0002]]], grad_fn=<SigmoidBackward>)
1 tensor([[[0.9999]]], grad_fn=<SigmoidBackward>)
