# Continuous Bag-of-Words

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## 1. Data

In [2]:
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

In [3]:
CONTEXT_SIZE = 2
EMBEDDING_DIM = 10

In [4]:
vocab = set(raw_text)
vocab_size = len(vocab)

In [5]:
word_to_index = {word: i for i, word in enumerate(vocab)}

In [6]:
data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
              raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    data.append((context, target))

In [7]:
 def make_context_vector(context, word_to_index):
    indices = [word_to_index[w] for w in context]
    return torch.tensor(indices, dtype=torch.long)

In [8]:
def make_target_vector(target, word_to_index):
    indices = [word_to_index[target]]
    return torch.tensor(indices, dtype=torch.long)

<font color=red>
    (i) Indices for the look-up table must be placed in a list.
</font>

## 2. Model

In [9]:
class CBOW(nn.Module):
    def __init__(self, vocab_size, embed_dim, context_size):
        super(CBOW, self).__init__()
        self.embeddings = torch.nn.Embedding(vocab_size, embed_dim)
        
        self.linear = nn.Linear(embed_dim, vocab_size)
    
    def forward(self, inputs):
        embeds = self.embeddings(inputs)
        
        avg_embed = torch.zeros(1, embeds.size()[1])
        for embed in embeds:
            avg_embed += embed
        avg_embed /= embeds.size()[0]
        
        z2 = self.linear(avg_embed)
        a2 = F.log_softmax(z2)
        
        return a2

In [10]:
model = CBOW(vocab_size, EMBEDDING_DIM, CONTEXT_SIZE)

## 3. Loss function

In [11]:
loss_function = nn.NLLLoss()

## 4. Optimizer

In [12]:
optimizer = optim.SGD(model.parameters(), lr=0.001)

## 5. Train the Model

In [13]:
losses = []
for eposh in range(10):
    total_loss = 0
    for context, target in data:
        optimizer.zero_grad()
        
        # Prepares the data.
        context_indices = make_context_vector(context, word_to_index)
        target_index = make_target_vector(target, word_to_index)
        
        # Forawrd prop.
        log_probs = model(context_indices)
        loss = loss_function(log_probs, target_index)
        
        # Backward prop.
        loss.backward()
        
        # Updates the params.
        optimizer.step()
        
        total_loss += loss.item()
    losses.append(loss)
    
print(losses)



[tensor(3.7327, grad_fn=<NllLossBackward>), tensor(3.7316, grad_fn=<NllLossBackward>), tensor(3.7304, grad_fn=<NllLossBackward>), tensor(3.7292, grad_fn=<NllLossBackward>), tensor(3.7280, grad_fn=<NllLossBackward>), tensor(3.7268, grad_fn=<NllLossBackward>), tensor(3.7256, grad_fn=<NllLossBackward>), tensor(3.7245, grad_fn=<NllLossBackward>), tensor(3.7233, grad_fn=<NllLossBackward>), tensor(3.7221, grad_fn=<NllLossBackward>)]
