In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)
CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right
EMBEDDING_DIM = 300
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

# By deriving a set from `raw_text`, we deduplicate the array
vocab = set(raw_text)
vocab_size = len(vocab)

word_to_ix = {word: i for i, word in enumerate(vocab)}
data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    data.append((context, target))
print(data[:5])

[(['We', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to'), (['about', 'to', 'the', 'idea'], 'study'), (['to', 'study', 'idea', 'of'], 'the'), (['study', 'the', 'of', 'a'], 'idea')]


In [2]:
class CBOW(nn.Module):

    def __init__(self, vocab_size, embedding_dim):
        super(CBOW, self).__init__()
        self.embed = nn.Embedding(vocab_size, embedding_dim)

        self.lnlayer = nn.Linear(embedding_dim, vocab_size)

    def forward(self, inputs):
        embeds = self.embed(inputs)
        embedsum = sum(embeds)
        out = F.log_softmax(self.lnlayer(embedsum))
        return out

def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    tensor = torch.LongTensor(idxs)
    return autograd.Variable(tensor)



In [3]:
model = CBOW(vocab_size, EMBEDDING_DIM)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

In [4]:
for epoch in range(10):
    total_loss = torch.Tensor([0])
    for context, target in data:
        # Prepare the inputs
        net_input = make_context_vector(context,word_to_ix)
        # Reset the gradients
        model.zero_grad()

        # Forward pass
        log_probs = model(net_input)
        # Compute loss functions
        loss = loss_function(log_probs.view((1,-1)), autograd.Variable(
            torch.LongTensor([word_to_ix[target]])))
        loss.backward()
        optimizer.step()
        
        total_loss += loss.data
    print(total_loss)


 286.1781
[torch.FloatTensor of size 1]


 220.0313
[torch.FloatTensor of size 1]


 165.8631
[torch.FloatTensor of size 1]


 122.7446
[torch.FloatTensor of size 1]


 89.2427
[torch.FloatTensor of size 1]


 64.3500
[torch.FloatTensor of size 1]


 46.9617
[torch.FloatTensor of size 1]


 35.3020
[torch.FloatTensor of size 1]


 27.4908
[torch.FloatTensor of size 1]


 22.1556
[torch.FloatTensor of size 1]

