In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. He is a young boy. In effect,
we conjure the spirits of the computer with our spells.""".split()

# By deriving a set from `raw_text`, we deduplicate the array
vocab = set(raw_text)
vocab_size = len(vocab)

In [3]:
print(vocab)
print('Vocab size: ', vocab_size)

{'called', 'rules', 'abstract', 'the', 'young', 'evolution', 'about', 'directed', 'computers.', 'processes', 'People', 'manipulate', 'computational', 'process.', 'beings', 'create', 'study', 'of', 'a', 'Computational', 'other', 'to', 'spells.', 'processes.', 'conjure', 'idea', 'is', 'data.', 'by', 'things', 'spirits', 'As', 'are', 'we', 'program.', 'effect,', 'they', 'boy.', 'computer', 'that', 'process', 'The', 'He', 'In', 'our', 'pattern', 'direct', 'programs', 'with', 'evolve,', 'inhabit', 'We'}
Vocab size:  52


In [5]:
word_to_ix = {word: i for i, word in enumerate(vocab)}
data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    data.append((context, target))
data

[(['We', 'are', 'to', 'study'], 'about'),
 (['are', 'about', 'study', 'the'], 'to'),
 (['about', 'to', 'the', 'idea'], 'study'),
 (['to', 'study', 'idea', 'of'], 'the'),
 (['study', 'the', 'of', 'a'], 'idea'),
 (['the', 'idea', 'a', 'computational'], 'of'),
 (['idea', 'of', 'computational', 'process.'], 'a'),
 (['of', 'a', 'process.', 'Computational'], 'computational'),
 (['a', 'computational', 'Computational', 'processes'], 'process.'),
 (['computational', 'process.', 'processes', 'are'], 'Computational'),
 (['process.', 'Computational', 'are', 'abstract'], 'processes'),
 (['Computational', 'processes', 'abstract', 'beings'], 'are'),
 (['processes', 'are', 'beings', 'that'], 'abstract'),
 (['are', 'abstract', 'that', 'inhabit'], 'beings'),
 (['abstract', 'beings', 'inhabit', 'computers.'], 'that'),
 (['beings', 'that', 'computers.', 'As'], 'inhabit'),
 (['that', 'inhabit', 'As', 'they'], 'computers.'),
 (['inhabit', 'computers.', 'they', 'evolve,'], 'As'),
 (['computers.', 'As', 'evol

In [6]:
class CBOW(nn.Module):
    def __init__(self, vocab_size, embedding_dim, context_size):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(context_size*2*embedding_dim,\
                                 context_size*2*vocab_size)
        self.linear2 = nn.Linear(context_size*2*vocab_size,\
                                 vocab_size)
    
    def forward(self, inputs):
        embeds = self.embeddings(inputs).view((1, -1))
        out = F.relu(self.linear1(embeds))
        out = self.linear2(out)
        log_probs = F.log_softmax(out, dim=1)
        return log_probs

In [7]:
def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)

make_context_vector(data[0][0], word_to_ix)  # example

tensor([51, 32, 21, 16])

In [8]:
EMBEDDING_DIM = 10
losses = []
loss_function = nn.NLLLoss()
model = CBOW(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE)
optimizer = optim.SGD(model.parameters(), lr=1e-2)

In [9]:
for epoch in range(100):
    total_loss = 0
    for context, target in data:
        context_idx = make_context_vector(context, word_to_ix)
        
        model.zero_grad()
        
        log_probs = model(context_idx)
        
        loss = loss_function(log_probs, torch.tensor([word_to_ix[target]],
                                                    dtype=torch.long))
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    losses.append(total_loss)

In [10]:
losses

[253.019305229187,
 230.9137463569641,
 210.22014904022217,
 190.29372417926788,
 170.7624912261963,
 151.5070405602455,
 132.65707820653915,
 114.49299466609955,
 97.33777034282684,
 81.55576676130295,
 67.5194634348154,
 55.37784552574158,
 45.191132828593254,
 36.85256762802601,
 30.160841643810272,
 24.88770680874586,
 20.762972980737686,
 17.547911062836647,
 15.026204392313957,
 13.03189954534173,
 11.436624128371477,
 10.142574552446604,
 9.078776236623526,
 8.194835923612118,
 7.451483592391014,
 6.819478992372751,
 6.277120156213641,
 5.807096537202597,
 5.397083749994636,
 5.03612663783133,
 4.717222640290856,
 4.432848459109664,
 4.178182557225227,
 3.9490040857344866,
 3.7420591581612825,
 3.5540504790842533,
 3.3826564829796553,
 3.2258762158453465,
 3.0820603035390377,
 2.949409393593669,
 2.827121267095208,
 2.7137881303206086,
 2.608572226949036,
 2.510677876882255,
 2.4193382915109396,
 2.3340127831324935,
 2.25415594689548,
 2.179088156670332,
 2.108680338598788,
 2.0

In [11]:
ix_to_word = {ix:word for ix, word in enumerate(vocab)}
# print(ix_to_word)
context = ['a', 'young', 'In', 'effect,']
context_vector = make_context_vector(context, word_to_ix)
a = model(context_vector)
# print(a)
#Print result
print(f'Raw text: {" ".join(raw_text)}\n')
print(f'Context: {context}\n')
print(f'Prediction: {ix_to_word[torch.argmax(a[0]).item()]}')

Raw text: We are about to study the idea of a computational process. Computational processes are abstract beings that inhabit computers. As they evolve, processes manipulate other abstract things called data. The evolution of a process is directed by a pattern of rules called a program. People create programs to direct processes. He is a young boy. In effect, we conjure the spirits of the computer with our spells.

Context: ['a', 'young', 'In', 'effect,']

Prediction: boy.
