In [83]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x11844b770>

# Making the model: NN similar to n-gram example

In [84]:
EMDEDDING_DIM_CBOW = 100
CONTEXT_SIZE_CBOW = 2  
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

vocab = set(raw_text)
vocab_size = len(vocab)

word_to_ix = {word: i for i, word in enumerate(vocab)}
data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    data.append((context, target))
print(data[:2])


class CBOW(nn.Module):

    def __init__(self, vocab_size, embedding_dim_cbow, context_size_cbow):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(vocab_size,embedding_dim_cbow)
        self.linear1 = nn.Linear(embedding_dim_cbow, 128)
        self.linear2 = nn.Linear(128, vocab_size)
        
    def forward(self, inputs):
        embeds = self.embeddings(inputs)
        embeds = torch.sum(embeds,0)
        embeds = embeds.view((1, -1))
        out = F.relu(self.linear1(embeds))
        out = self.linear2(out)
        log_probs = F.log_softmax(out, dim=1)
        return log_probs

[(['We', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to')]


# Provided functions

In [85]:
def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)

def get_index_of_max(input):
    index = 0
    for i in range(1, len(input)):
        if input[i] > input[index]:
            index = i 
    return index

def get_max_prob_result(input, ix_to_word):
    return ix_to_word[get_index_of_max(input)]


# Calculating the losses

In [87]:
losses = []
loss_function = nn.NLLLoss()
model = CBOW(len(vocab), EMDEDDING_DIM_CBOW, CONTEXT_SIZE_CBOW)
optimizer = optim.SGD(model.parameters(), lr=0.01)

for epoch in range(10):
    total_loss = 0
    for context, target in data:

       
        context_idxs = torch.tensor([word_to_ix[w] for w in context], dtype=torch.long)

        model.zero_grad()

        log_probs = model(context_idxs)

        loss = loss_function(log_probs, torch.tensor([word_to_ix[target]], dtype=torch.long))

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    losses.append(total_loss)
print(losses)

[233.94756531715393, 175.5988553762436, 128.38265174627304, 86.45967671275139, 51.76750895380974, 28.594298094511032, 16.59512160718441, 10.825176481157541, 7.87549738958478, 6.147348877042532]
