In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [14]:
CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right
EMBEDDING_DIM = 10
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

In [15]:
def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)

In [16]:
# By deriving a set from `raw_text`, we deduplicate the array
vocab = set(raw_text)
vocab_size = len(vocab)

In [17]:
word_to_ix = {word: i for i, word in enumerate(vocab)}
data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    data.append((context, target))
print(data[:5])

[(['We', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to'), (['about', 'to', 'the', 'idea'], 'study'), (['to', 'study', 'idea', 'of'], 'the'), (['study', 'the', 'of', 'a'], 'idea')]


In [18]:
class CBOW(nn.Module):

    def __init__(self, vocab_size, embedding_dim):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(embedding_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)

    def forward(self, inputs):
        out = self.embeddings(inputs).view((2*CONTEXT_SIZE,-1))
        out = torch.sum(out, 0, True)
        out = F.relu(self.linear1(out))
        out = self.linear2(out)
        out = F.log_softmax(out, dim=1)
    
        return out

In [19]:
model = CBOW(vocab_size, EMBEDDING_DIM)
criterion = nn.NLLLoss()
optim = torch.optim.SGD(model.parameters(), lr=0.001)

In [20]:
for epoch in range(1000):
    for txt, target in data:
        v_txt = make_context_vector(txt, word_to_ix)
        output = model(v_txt)
        loss = criterion(output, torch.tensor([word_to_ix[target]], dtype=torch.long))
        optim.zero_grad()
        loss.backward()
        optim.step()
    if epoch%50 == 0:    
        print('{:4d}/{} Loss: {:.4f}'.format(epoch, 1000, loss.item()))
        writer.add_scalar('Train/loss',loss.item(),epoch)

   0/1000 Loss: 3.9112
  50/1000 Loss: 2.6330
 100/1000 Loss: 1.1387
 150/1000 Loss: 0.4726
 200/1000 Loss: 0.2477
 250/1000 Loss: 0.1550
 300/1000 Loss: 0.1083
 350/1000 Loss: 0.0814
 400/1000 Loss: 0.0643
 450/1000 Loss: 0.0526
 500/1000 Loss: 0.0443
 550/1000 Loss: 0.0380
 600/1000 Loss: 0.0332
 650/1000 Loss: 0.0293
 700/1000 Loss: 0.0262
 750/1000 Loss: 0.0237
 800/1000 Loss: 0.0215
 850/1000 Loss: 0.0197
 900/1000 Loss: 0.0182
 950/1000 Loss: 0.0168


In [10]:
# test
a = data[0][0]
output = model(make_context_vector(a ,word_to_ix))
print(word_to_ix[data[0][1]])
print(torch.argmax(output))

33
tensor(33)
