In [None]:
!nvidia-smi

In [None]:
import torch 
import torch.nn as nn 

In [None]:
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells."""

In [None]:
special_tokens = ["<UNK>", "<END>"]

In [None]:
# vocab class 
class Vocab:
  def __init__(self, list_of_words):
    # convert raw text into list of sentences  
    self.total_tokens = len(list_of_words)
    self.vocab = set(list_of_words)
    self.vocab_size = len(self.vocab)
    self.token2id = {word:id for id, word in enumerate(self.vocab)}
    self.id2token = {id:word for id, word in enumerate(self.vocab)}
    
  def gettoken2id(self, token):
    return self.token2id[token]

  def getid2token(self, id):
    return self.id2token[id]

In [None]:
list_of_words = raw_text.split()
vocab = Vocab(list_of_words)

In [None]:
# Construct the dataset - pairs of [context, target word]
dataset = []
context_size = 2 # Number of words to look at either side 
# Loop from word with index context size to n-3 
for i in range(context_size, vocab.total_tokens - 3):
  context = []
  target = list_of_words[i]
  # Go from left to right 
  for j in range(1, context_size+1):
    context.append(list_of_words[i-j])
    context.append(list_of_words[i+j])
  # Tuples of context and target 
  dataset.append((context, target))

In [None]:
# Conert the datasets to ids 
inputs, targets = [], []
for sample in dataset:
  context, target = sample[0], sample[1] 
  context_vector = torch.tensor([vocab.gettoken2id(word) for word in context], dtype=torch.long)
  target_vector = torch.tensor([vocab.gettoken2id(target)], dtype=torch.long)
  #target_vector = nn.functional.one_hot(target_vector, vocab.vocab_size) # one hot encoding 
  inputs.append(context_vector)
  targets.append(target_vector)

In [None]:
# Define the continous bag of words model 
emb_dim = 32
hidden_dim = 64 
vocab_size = vocab.vocab_size 

class CBOW(torch.nn.Module):
  def __init__(self, vocab_size, embedding_dim):
    super(CBOW, self).__init__()

    #out: 1 x emdedding_dim
    self.embeddings = nn.Embedding(vocab_size, embedding_dim)
    self.linear1 = nn.Linear(embedding_dim, 128)
    self.activation_function1 = nn.ReLU()
    
    #out: 1 x vocab_size
    self.linear2 = nn.Linear(128, vocab_size)
    self.activation_function2 = nn.LogSoftmax(dim = -1)
      

  def forward(self, inputs):
    embeds = sum(self.embeddings(inputs)).view(1,-1)
    out = self.linear1(embeds)
    out = self.activation_function1(out)
    out = self.linear2(out)
    out = self.activation_function2(out)
    return out

model = CBOW(vocab_size, emb_dim)  

In [None]:
# Training stuff 
loss_fn = nn.NLLLoss() 
optim = torch.optim.SGD(model.parameters(), lr=0.001)

# train mode
model.train()
# training loop
for epoch in range(50):
  count = 0 
  total_loss = 0 
  for input, target in zip(inputs, targets):
    count += 1 
    model.zero_grad(set_to_none=True)
    output = model(input) 
    loss = loss_fn(output, target)
    total_loss += loss.item()
    loss.backward() 
    optim.step()
  print("Average Loss: {:.4f}".format(total_loss / count))

In [None]:
#TESTING
context = ['People','create','to', 'direct']
context_vector = torch.tensor([vocab.gettoken2id(word) for word in context], dtype=torch.long)
a = model(context_vector)
#Print result
print(f'Context: {context}\n')
print(f'Prediction: {vocab.getid2token(torch.argmax(a[0]).item())}')