In [2]:
import torch
import torch.nn as nn 
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1234)

<torch._C.Generator at 0x1c173d36a90>

In [3]:
# Word2vec and GloVe known frameworks to execute word embeddings

In [4]:
word_to_ix = {"data": 0, "science": 1}

In [5]:
word_to_ix

{'data': 0, 'science': 1}

In [6]:
embeds = nn.Embedding(2, 5) # 2 words in vocab, 5 dimensional embeddings 

In [7]:
embeds

Embedding(2, 5)

In [8]:
lookup_tensor = torch.tensor([word_to_ix["data"]], dtype=torch.long)
lookup_tensor

tensor([0])

In [9]:
# Set up an embedding layer
hello_embed = embeds(lookup_tensor)
print(hello_embed)

tensor([[ 0.0461,  0.4024, -1.0115,  0.2167, -0.6123]],
       grad_fn=<EmbeddingBackward0>)


In [10]:
CONTEXT_SIZE = 2

In [11]:
EMBEDDING_DIM = 10

In [12]:
# Apply word embedding on these two paragraphs and get real vectors as features
test_sentence = """Data science combines math and statistics, specialized programming, advanced analytics, artificial intelligence (AI), and machine learning with specific subject matter expertise to uncover actionable insights hidden in an organization’s data. These insights can be used to guide decision making and strategic planning.
"""

test_sentence = test_sentence.split()

In [13]:
# Tokenization to small tokens (N-grams); Unigram - single word, bigram - two words, trigram
# Tokenize the input and build a list of tuples ([word - 2], [word - 1], target word)
trigrams = [([test_sentence[i], test_sentence[i + 1], test_sentence[i + 2]]) for i in range(len(test_sentence) - 2)]

# print(len(test_sentence))

# Print a chunk of triagrams 
print(trigrams[:3])

# Put all iterable objects list tuple into a set
vocab = set(test_sentence)
# print(vocab)

word_to_ix = {word: i for i, word in enumerate(vocab)}

# print(word_to_ix)

[['Data', 'science', 'combines'], ['science', 'combines', 'math'], ['combines', 'math', 'and']]


In [14]:
# Implement n-gram language modeler to extract relevant key words
class NGramLangModeler(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, context_size):
        super(NGramLangModeler, self).__init__()  #??
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)  #??
        self.linear1 = nn.Linear(context_size * embedding_dim, 128) #??
        self.linear2 = nn.Linear(128, vocab_size)
        
    def forward(self, inputs):
        embeds = self.embeddings(inputs).view((1, -1))
        out = F.relu(self.linear1(embeds))
        out = self.linear2(out)
        log_probs = F.log_softmax(out, dim=1) #?
        return log_probs
    
losses = []
loss_function = nn.NLLLoss()
model = NGramLangModeler(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE) # (722, 10, 2)
optimizer = optim.SGD(model.parameters(), lr = 0.001) # Stochastic gradient descent

In [15]:
losses

[]

In [16]:
loss_function

NLLLoss()

In [17]:
model

NGramLangModeler(
  (embeddings): Embedding(38, 10)
  (linear1): Linear(in_features=20, out_features=128, bias=True)
  (linear2): Linear(in_features=128, out_features=38, bias=True)
)

In [18]:
# Apply adam optimizer
optimizer

SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [19]:
for epoch in range(10):
    total_loss = 0
    
    for abc, context, target in trigrams:
        print(abc, context, target)
        # 1. prepare the input to be passed to the model (i.e. turn the words into integer indices and wrap them in tensors)
        context_idxs = torch.tensor([word_to_ix[w] for w in context], dtype = torch.long)
        
        # 2. recall that torch "accumulates" gradients. Before passing in a new instance, zero out the gradients from the old instance
        model.zero_grad()
        
        # 3. Run the forward pass, getting log probabilities over the next words
        log_probs = model(context_idxs)
        
        # 4. Compute your loss function (Torch wants the target word wrapped in a tensor)
        loss = loss_function(log_probs, torch.tensor([word_to_ix[target]], dtype=torch.long))
        
        # 5. Do the backward pass and update the gradient
        loss.backward()
        optimizer.step()
        
        # Get the python number from a 1-element tensor by calling tensor.item()
        total_loss += loss.item()
    
    losses.append(total_loss)
print(losses)

Data science combines


KeyError: 's'