In [4]:
import torch
import torch.nn as nn
def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)
CONTEXT_SIZE = 2 # 2 words to the left, 2 to the right
EMDEDDING_DIM = 100
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()
vocab = set(raw_text)
vocab_size = len(vocab)
word_to_ix = {word:ix for ix, word in enumerate(vocab)}
ix_to_word = {ix:word for ix, word in enumerate(vocab)}
data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
    raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    data.append((context, target))
class CBOW(torch.nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super(CBOW, self).__init__()
        #out: 1 x emdedding_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(embedding_dim, 128)
        self.activation_function1 = nn.ReLU()
        #out: 1 x vocab_size
        self.linear2 = nn.Linear(128, vocab_size)
        self.activation_function2 = nn.LogSoftmax(dim = -1)
    def forward(self, inputs):
        embeds = sum(self.embeddings(inputs)).view(1,-1)
        out = self.linear1(embeds)
        out = self.activation_function1(out)
        out = self.linear2(out)
        out = self.activation_function2(out)
        return out
    def get_word_emdedding(self, word):
        word = torch.tensor([word_to_ix[word]])
        return self.embeddings(word).view(1,-1)
model = CBOW(vocab_size, EMDEDDING_DIM)
loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
for epoch in range(50):
    total_loss = 0
    for context, target in data:
        context_vector = make_context_vector(context, word_to_ix)
        log_probs = model(context_vector)
        total_loss += loss_function(log_probs,
torch.tensor([word_to_ix[target]]))
    #optimize at the end of each epoch
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()
context = ['People','create','to', 'direct']
context_vector = make_context_vector(context, word_to_ix)
a = model(context_vector)
print(f'Raw text: {" ".join(raw_text)}\n')
print(f'Context: {context}\n')
print(f'Prediction: {ix_to_word[torch.argmax(a[0]).item()]}')


Raw text: We are about to study the idea of a computational process. Computational processes are abstract beings that inhabit computers. As they evolve, processes manipulate other abstract things called data. The evolution of a process is directed by a pattern of rules called a program. People create programs to direct processes. In effect, we conjure the spirits of the computer with our spells.

Context: ['People', 'create', 'to', 'direct']

Prediction: programs


In [5]:
# optional aahe
import torch
import torch.nn as nn

# Helper: convert context words to tensor indices
def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)

# Parameters
CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right
EMBEDDING_DIM = 100

# Text data
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

# Prepare vocabulary
vocab = set(raw_text)
vocab_size = len(vocab)
word_to_ix = {word: ix for ix, word in enumerate(vocab)}
ix_to_word = {ix: word for word, ix in word_to_ix.items()}

# Prepare (context, target) pairs
data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1], raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    data.append((context, target))

# CBOW model
class CBOW(nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(embedding_dim, 128)
        self.activation1 = nn.ReLU()
        self.linear2 = nn.Linear(128, vocab_size)
        self.log_softmax = nn.LogSoftmax(dim=-1)

    def forward(self, inputs):
        embeds = torch.mean(self.embeddings(inputs), dim=0).view(1, -1)
        out = self.linear1(embeds)
        out = self.activation1(out)
        out = self.linear2(out)
        out = self.log_softmax(out)
        return out

    def get_word_embedding(self, word):
        word = torch.tensor([word_to_ix[word]])
        return self.embeddings(word).view(1, -1)

# Initialize model
model = CBOW(vocab_size, EMBEDDING_DIM)
loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

# Training loop
for epoch in range(50):
    total_loss = 0
    for context, target in data:
        context_vector = make_context_vector(context, word_to_ix)

        # Forward pass
        log_probs = model(context_vector)
        loss = loss_function(log_probs, torch.tensor([word_to_ix[target]]))

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

# Prediction example
context = ['People', 'create', 'to', 'direct']
context_vector = make_context_vector(context, word_to_ix)
a = model(context_vector)

print(f'\nRaw text: {" ".join(raw_text)}\n')
print(f'Context: {context}\n')
print(f'Prediction: {ix_to_word[torch.argmax(a[0]).item()]}\n')


Epoch 1, Loss: 224.8508
Epoch 2, Loss: 224.3493
Epoch 3, Loss: 223.8495
Epoch 4, Loss: 223.3507
Epoch 5, Loss: 222.8527
Epoch 6, Loss: 222.3556
Epoch 7, Loss: 221.8590
Epoch 8, Loss: 221.3633
Epoch 9, Loss: 220.8676
Epoch 10, Loss: 220.3714
Epoch 11, Loss: 219.8744
Epoch 12, Loss: 219.3757
Epoch 13, Loss: 218.8766
Epoch 14, Loss: 218.3762
Epoch 15, Loss: 217.8744
Epoch 16, Loss: 217.3707
Epoch 17, Loss: 216.8652
Epoch 18, Loss: 216.3597
Epoch 19, Loss: 215.8529
Epoch 20, Loss: 215.3449
Epoch 21, Loss: 214.8347
Epoch 22, Loss: 214.3229
Epoch 23, Loss: 213.8102
Epoch 24, Loss: 213.2951
Epoch 25, Loss: 212.7784
Epoch 26, Loss: 212.2601
Epoch 27, Loss: 211.7386
Epoch 28, Loss: 211.2146
Epoch 29, Loss: 210.6884
Epoch 30, Loss: 210.1598
Epoch 31, Loss: 209.6299
Epoch 32, Loss: 209.0956
Epoch 33, Loss: 208.5591
Epoch 34, Loss: 208.0193
Epoch 35, Loss: 207.4751
Epoch 36, Loss: 206.9284
Epoch 37, Loss: 206.3779
Epoch 38, Loss: 205.8228
Epoch 39, Loss: 205.2671
Epoch 40, Loss: 204.7064
Epoch 41,