In [1]:
# task - given two words of a sent. predict third word using fixed window LM

sentences = ['bob likes sheep','alice is fast','i love you','He is mad']

In [2]:
# Before we start fancy modeling we should convert text we tokenize our inputs and converts words into indices

vocab = {} # map word to index
inputs = [] # stores indexified verison of sentences

for sent in sentences:
  sent_idxes = []
  sent = sent.split()
  for word in sent:
    if word not in vocab:
      vocab[word] = len(vocab)
    sent_idxes.append(vocab[word])
  inputs.append(sent_idxes)

print(vocab)

{'bob': 0, 'likes': 1, 'sheep': 2, 'alice': 3, 'is': 4, 'fast': 5, 'i': 6, 'love': 7, 'you': 8, 'He': 9, 'mad': 10}


In [3]:
inputs

[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 4, 10]]

In [4]:
# Embedding module in pytorch expects long tensors
# 1. Convert input into LongTensors 2. Define inputs and outputs

import torch

prefixes = torch.LongTensor([sent[:-1] for sent in inputs])
labels = torch.LongTensor([sent[-1] for sent in inputs])

print(prefixes,labels)

tensor([[0, 1],
        [3, 4],
        [6, 7],
        [9, 4]]) tensor([ 2,  5,  8, 10])


In [5]:
# defining a n/w

import torch.nn as nn

class NLM(nn.Module):
  # two things that are need to be done -
  # 1. init function - initialize all params of n/w
  # 2. forward network

  def __init__(self, d_embedding, d_hidden, window_size, len_vocab):
    super(NLM, self).__init__()
    self.d_emb = d_embedding
    self.embeddings = nn.Embedding(len_vocab, d_embedding)

    #concateneated embeddings
    self.W_hid = nn.Linear(d_embedding * window_size, d_hidden)

    self.W_out = nn.Linear(d_hidden, len_vocab)

  def forward(self, input): # batch of prefixes

    batch_size, window_size = input.size()
    embs = self.embeddings(input) # 4*2*5
    
    #concatenate embs together
    concat_embs = embs.reshape(batch_size,window_size*self.d_emb) # 4*10

    #now let's project it in to embedding space

    hiddens = self.W_hid(concat_embs) # 4*12
    outs = self.W_out(hiddens) # 4*11 also know as logits

    # probs = nn.functional.softmax(outs, 1)
    return outs
network = NLM(5,12,2,len(vocab))
network(prefixes)

num_epochs = 10
learning_rate = 0.5
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params= network.parameters(),lr=learning_rate)

for i in range(num_epochs):
  logits = network(prefixes)
  loss = loss_function(logits,labels)

  # now let's update params
  # 1. calculate gradient
  loss.backward()
  # 2. update params using grad. descent
  optimizer.step()
  # 3. zero out the gradients for next epoch
  optimizer.zero_grad()

  print('epoch: %d,loss: %.2f' %(i,loss))

epoch: 0,loss: 2.65
epoch: 1,loss: 1.75
epoch: 2,loss: 1.03
epoch: 3,loss: 0.54
epoch: 4,loss: 0.31
epoch: 5,loss: 0.19
epoch: 6,loss: 0.13
epoch: 7,loss: 0.10
epoch: 8,loss: 0.08
epoch: 9,loss: 0.06


In [6]:
rev_vocab = dict((j,i) for (i,j) in vocab.items())

boblikes = prefixes[0]
prediction = network(boblikes.unsqueeze(0))
probs=nn.functional.softmax(prediction,dim=1).squeeze()
argmax_idx = torch.argmax(nn.functional.softmax(prediction,dim=1).squeeze())

# Should not test it this way
print('Prediction of next word for "Bob likes" is %s with a prob. of %f' %(rev_vocab[argmax_idx.item()],probs[argmax_idx.item()]))

Prediction of next word for "Bob likes" is sheep with a prob. of 0.977574
