# Lstm Model

## Importing packages

In [206]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## Sample training data 

In [207]:
training_data = [
    ("The dog ate the apple".split(), torch.tensor([0,0,0,1,1,1],dtype=torch.float32)),
    ("Everybody read that beautiful book".split(), torch.tensor([1,1,1,1,1,1],dtype=torch.float32))
]



In [223]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)



vocab = {}
for sent, tags in training_data:
    for word in sent:
        if word not in vocab:
            vocab[word] = len(vocab)
print(vocab)





{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'beautiful': 8, 'book': 9}


## Hyperparemeters


In [224]:

EMBEDDING_DIM = 10
HIDDEN_DIM = 5


In [227]:
class BiLSTM(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size=6):
        super(BiLSTM, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim,bidirectional =True)

        self.hidden2tag = nn.Linear(hidden_dim, 1)

    def forward(self, sentence):

        
        
        embeds = self.word_embeddings(sentence)
        
        
        
        lstm_out, _ = self.lstm(embeds.unsqueeze(0))
        
        print(embeds.shape)
        print(embeds.unsqueeze(0).shape)
        print(lstm_out.shape)
        
        tag_space = self.hidden2tag(torch.t(lstm_out.squeeze(0)))
        print(tag_space)
        return tag_space

In [228]:
model = BiLSTM(EMBEDDING_DIM, HIDDEN_DIM, len(vocab), 6)
loss_function = torch.nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)


for epoch in range(2):  # again, normally you would NOT do 300 epochs, it is toy data
    for sentence, tags in training_data:
       
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Get our inputs ready for the network, that is, turn them into
        # Tensors of word indices.
        sentence_in = prepare_sequence(sentence, vocab)
        
        
        # Step 3. Run our forward pass.
        tag_scores = model(sentence_in)

        
        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        loss = loss_function(tag_scores, tags)
        loss.backward()
        optimizer.step()



torch.Size([5, 10])
torch.Size([1, 5, 10])
torch.Size([1, 5, 10])
tensor([[-0.3627],
        [-0.2163],
        [-0.2866],
        [-0.1561],
        [-0.3222],
        [-0.2199],
        [-0.2356],
        [-0.2366],
        [-0.3614],
        [-0.3076]], grad_fn=<AddmmBackward>)
torch.Size([5, 10])
torch.Size([1, 5, 10])
torch.Size([1, 5, 10])
tensor([[-0.0580],
        [-0.2571],
        [-0.0103],
        [-0.2067],
        [-0.0684],
        [-0.0835],
        [-0.1879],
        [-0.0879],
        [-0.2302],
        [-0.1294]], grad_fn=<AddmmBackward>)
torch.Size([5, 10])
torch.Size([1, 5, 10])
torch.Size([1, 5, 10])
tensor([[0.0207],
        [0.1664],
        [0.1031],
        [0.2317],
        [0.0624],
        [0.1687],
        [0.1441],
        [0.1490],
        [0.0269],
        [0.0816]], grad_fn=<AddmmBackward>)
torch.Size([5, 10])
torch.Size([1, 5, 10])
torch.Size([1, 5, 10])
tensor([[0.2464],
        [0.0454],
        [0.3077],
        [0.1009],
        [0.2412],
        