In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x12784843290>

In [2]:
CONTEXT_SIZE = 2
EMBEDDING_DIM = 10

test_sentence = """When forty winters shall besiege thy brow,
And dig deep trenches in thy beauty's field,
Thy youth's proud livery so gazed on now,
Will be a totter'd weed of small worth held:
Then being asked, where all thy beauty lies,
Where all the treasure of thy lusty days;
To say, within thine own deep sunken eyes,
Were an all-eating shame, and thriftless praise.
How much more praise deserv'd thy beauty's use,
If thou couldst answer 'This fair child of mine
Shall sum my count, and make my old excuse,'
Proving his beauty by succession thine!
This were to be new made when thou art old,
And see thy blood warm when thou feel'st it cold.""".split()

In [5]:
len(test_sentence)

115

In [6]:
n_grams = [
    (
        [test_sentence[i - j -1] for j in range(CONTEXT_SIZE)],
        test_sentence[i]
    )
    for i in range(CONTEXT_SIZE, len(test_sentence))
]

n_grams[:3]

[(['forty', 'When'], 'winters'),
 (['winters', 'forty'], 'shall'),
 (['shall', 'winters'], 'besiege')]

In [7]:
vocabulary = set(test_sentence)
vocab_dict = {word: i for i, word in enumerate(vocabulary)}

In [8]:
vocab_dict

{'Where': 0,
 'the': 1,
 "totter'd": 2,
 'use,': 3,
 'own': 4,
 'and': 5,
 'Proving': 6,
 'How': 7,
 'on': 8,
 "beauty's": 9,
 'worth': 10,
 'much': 11,
 'make': 12,
 'art': 13,
 'child': 14,
 'Will': 15,
 'an': 16,
 'fair': 17,
 'blood': 18,
 "excuse,'": 19,
 'eyes,': 20,
 'praise.': 21,
 'made': 22,
 'lies,': 23,
 'couldst': 24,
 'be': 25,
 "feel'st": 26,
 'by': 27,
 'deep': 28,
 'being': 29,
 'dig': 30,
 'all': 31,
 'cold.': 32,
 'besiege': 33,
 'thine!': 34,
 "'This": 35,
 'warm': 36,
 'And': 37,
 'thine': 38,
 'asked,': 39,
 'days;': 40,
 'To': 41,
 'so': 42,
 'shame,': 43,
 "youth's": 44,
 'thou': 45,
 'mine': 46,
 'gazed': 47,
 'it': 48,
 "deserv'd": 49,
 'a': 50,
 'see': 51,
 'winters': 52,
 'in': 53,
 'where': 54,
 'beauty': 55,
 'proud': 56,
 'sunken': 57,
 'say,': 58,
 'shall': 59,
 'to': 60,
 'old': 61,
 'Thy': 62,
 'small': 63,
 'all-eating': 64,
 'brow,': 65,
 'old,': 66,
 'lusty': 67,
 'within': 68,
 'field,': 69,
 'my': 70,
 'livery': 71,
 'treasure': 72,
 'his': 73,
 '

In [20]:
class NGramModel(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, context_size):
        super(NGramModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(context_size * embedding_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)
        
    def forward(self, x):
        embeds = self.embedding(x).view((1, -1))
        out = F.relu(self.linear1(embeds))
        out = self.linear2(out)
        log_probs = F.log_softmax(out, dim=1)
        return log_probs
    
model = NGramModel(len(vocabulary), EMBEDDING_DIM, CONTEXT_SIZE)
print(model)

NGramModel(
  (embedding): Embedding(97, 10)
  (linear1): Linear(in_features=20, out_features=128, bias=True)
  (linear2): Linear(in_features=128, out_features=97, bias=True)
)


In [13]:
losses = []
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

In [23]:
for epoch in range(10):
    total_loss = 0
    for context, target in n_grams:
        
        context_indexes = torch.tensor([vocab_dict[w] for w in context], dtype=torch.long)
        
        model.zero_grad()
        
        log_probs = model(context_indexes)
        
        loss = loss_function(log_probs, torch.tensor([vocab_dict[target]], dtype=torch.long))
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
    losses.append(total_loss)
    
print(losses)

[521.0756077766418, 521.0756077766418, 521.0756077766418, 521.0756077766418, 521.0756077766418, 521.0756077766418, 521.0756077766418, 521.0756077766418, 521.0756077766418, 521.0756077766418]


In [25]:
print(model.embedding.weight[vocab_dict["beauty"]])

tensor([-1.1496, -1.1600, -0.6482,  1.6195, -0.5876,  0.2096,  0.6177, -1.1770,
        -1.3879, -0.5300], grad_fn=<SelectBackward0>)


In [2]:
CONTEXT_SIZE = 2
EMBEDDING_DIM = 10

test_sentence = """When forty winters shall besiege thy brow,
And dig deep trenches in thy beauty's field,
Thy youth's proud livery so gazed on now,
Will be a totter'd weed of small worth held:
Then being asked, where all thy beauty lies,
Where all the treasure of thy lusty days;
To say, within thine own deep sunken eyes,
Were an all-eating shame, and thriftless praise.
How much more praise deserv'd thy beauty's use,
If thou couldst answer 'This fair child of mine
Shall sum my count, and make my old excuse,'
Proving his beauty by succession thine!
This were to be new made when thou art old,
And see thy blood warm when thou feel'st it cold.""".split()

In [4]:
ngrams = []

for i in range(CONTEXT_SIZE, len(test_sentence)-CONTEXT_SIZE):
    context_array = test_sentence[i-CONTEXT_SIZE:i] + test_sentence[i+1:i+1+CONTEXT_SIZE]
    target = test_sentence[i]
    ngrams.append((context_array, target))
    
ngrams[:3]

[(['When', 'forty', 'shall', 'besiege'], 'winters'),
 (['forty', 'winters', 'besiege', 'thy'], 'shall'),
 (['winters', 'shall', 'thy', 'brow,'], 'besiege')]

In [5]:
vocab = set(test_sentence)
vocab_dict = {word: i for i, word in enumerate(vocab)}

In [6]:
vocab_dict

{'say,': 0,
 'praise': 1,
 'lusty': 2,
 'a': 3,
 'all-eating': 4,
 'How': 5,
 'blood': 6,
 'of': 7,
 'If': 8,
 'old,': 9,
 'being': 10,
 'on': 11,
 "totter'd": 12,
 'winters': 13,
 'small': 14,
 'by': 15,
 'mine': 16,
 'make': 17,
 'child': 18,
 'in': 19,
 'thine': 20,
 'an': 21,
 'couldst': 22,
 'it': 23,
 'when': 24,
 'Thy': 25,
 'treasure': 26,
 'livery': 27,
 'within': 28,
 "excuse,'": 29,
 'sunken': 30,
 'use,': 31,
 'count,': 32,
 'made': 33,
 'sum': 34,
 'cold.': 35,
 'forty': 36,
 'proud': 37,
 'Will': 38,
 'much': 39,
 'were': 40,
 "youth's": 41,
 'asked,': 42,
 'shall': 43,
 'succession': 44,
 'and': 45,
 'the': 46,
 'eyes,': 47,
 "deserv'd": 48,
 'When': 49,
 'see': 50,
 'old': 51,
 'where': 52,
 'Proving': 53,
 'lies,': 54,
 'fair': 55,
 'trenches': 56,
 'answer': 57,
 'be': 58,
 'my': 59,
 'to': 60,
 'beauty': 61,
 'This': 62,
 'To': 63,
 'thy': 64,
 'thriftless': 65,
 'held:': 66,
 'all': 67,
 'thou': 68,
 'warm': 69,
 'brow,': 70,
 "'This": 71,
 'weed': 72,
 'shame,': 73

In [13]:
# Model defining

class CBOW(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, context_size):
        super(CBOW, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(4 * embedding_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)
        
    def forward(self, x):
        embeds = self.embedding(x).view((1,-1))
        out = F.relu(self.linear1(embeds))
        out = self.linear2(out)
        log_probs = F.log_softmax(out, dim=1)
        return log_probs
    
model = CBOW(len(vocab), 16, CONTEXT_SIZE)
print(model)     

CBOW(
  (embedding): Embedding(97, 16)
  (linear1): Linear(in_features=64, out_features=128, bias=True)
  (linear2): Linear(in_features=128, out_features=97, bias=True)
)


In [21]:
loss_function = nn.NLLLoss().to("cuda")
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.004)

In [27]:
for epoch in range(25):
    total_loss = 0
    for context, target in ngrams:
        
        context_indexes = torch.tensor([vocab_dict[w] for w in context], dtype=torch.long)
        
        model.zero_grad()
        
        log_probabilities = model(context_indexes)
        
        loss = loss_function(log_probabilities, torch.tensor([vocab_dict[target]], dtype=torch.long))
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
    print(f"epoch: {epoch}, loss: {total_loss/len(ngrams)}")

epoch: 0, loss: 0.11149177812711075
epoch: 1, loss: 0.11026416883409561
epoch: 2, loss: 0.10906095314468886
epoch: 3, loss: 0.10788093629788172
epoch: 4, loss: 0.10672344868113329
epoch: 5, loss: 0.10558738656811886
epoch: 6, loss: 0.10447313071035587
epoch: 7, loss: 0.10337932980074002
epoch: 8, loss: 0.10230545209603267
epoch: 9, loss: 0.1012517719923913
epoch: 10, loss: 0.10021716805997195
epoch: 11, loss: 0.09920044045324798
epoch: 12, loss: 0.09820307688871482
epoch: 13, loss: 0.09722275597353776
epoch: 14, loss: 0.09625994957782127
epoch: 15, loss: 0.09531420835100853
epoch: 16, loss: 0.09438442019326193
epoch: 17, loss: 0.093471972070433
epoch: 18, loss: 0.09257390776330286
epoch: 19, loss: 0.09169150190847414
epoch: 20, loss: 0.09082375319154413
epoch: 21, loss: 0.08997274960416395
epoch: 22, loss: 0.0891333321394684
epoch: 23, loss: 0.08830914575908635
epoch: 24, loss: 0.0874976299468193


In [30]:
model.embedding.weight[vocab_dict["beauty"]]

tensor([-0.5675,  1.4600,  0.8537,  1.5127,  1.0416, -0.5369, -0.0963,  0.8950,
        -0.3495, -3.6296, -0.3141, -1.0946,  0.1261,  2.0901,  0.5000,  0.1557],
       grad_fn=<SelectBackward0>)

In [33]:
ngrams[0][0]

['When', 'forty', 'shall', 'besiege']

In [47]:
log_prob = model(torch.tensor([vocab_dict[w] for w in ngrams[0][0]], dtype=torch.long))
print(f"\033[92m actual answer: {ngrams[0][1]}")
print(f"\033[94m predicted answer: {list(vocab)[log_prob.argmax().item()]}")

[92m actual answer: winters
[94m predicted answer: winters
