In [None]:
import numpy as np
import config
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from split_functions import string_to_equipment
dataset = np.load(config.DATA_DIRECTORY/"v21D10_m1.npy", allow_pickle=True)

In [None]:
dataset
sentences = string_to_equipment(dataset)
sentences[:3]

In [None]:
class NGramLanguageModeler(nn.Module):

    def __init__(self, vocab_size, embedding_dim, context_size):
        super(NGramLanguageModeler, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(context_size * embedding_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)

    def forward(self, inputs):
        embeds = self.embeddings(inputs).view((1, -1))
        out = F.relu(self.linear1(embeds))
        out = self.linear2(out)
        log_probs = F.log_softmax(out, dim=1)
        return log_probs

CONTEXT_SIZE = 3
EMBEDDING_DIM = 10
losses = []
loss_function = nn.NLLLoss()
model = NGramLanguageModeler(10, EMBEDDING_DIM, CONTEXT_SIZE)
optimizer = optim.SGD(model.parameters(), lr=0.001)

In [None]:

ngrams = []
for sentence in sentences:
    sentence =sentence[1:-1]
    ngram = [
        (
            [sentence[i - j - 1] for j in range(CONTEXT_SIZE)],
            sentence[i]
        )
        for i in range(CONTEXT_SIZE, len(sentence))
    ]
    ngrams.extend(ngram)
    

In [None]:
for epoch in range(10):
    total_loss = 0
    for context, target in ngrams:

        # Step 1. Prepare the inputs to be passed to the model (i.e, turn the words
        # into integer indices and wrap them in tensors)
        context_idxs = torch.tensor(context, dtype=torch.long)
    #     # Step 2. Recall that torch *accumulates* gradients. Before passing in a
    #     # new instance, you need to zero out the gradients from the old
    #     # instance
        model.zero_grad()

    #     # Step 3. Run the forward pass, getting log probabilities over next
    #     # words
        log_probs = model(context_idxs)
    #     # Step 4. Compute your loss function. (Again, Torch wants the target
    #     # word wrapped in a tensor)
        loss = loss_function(log_probs, torch.tensor([target], dtype=torch.long))

    #     # Step 5. Do the backward pass and update the gradient
        loss.backward()
        optimizer.step()

    #     # Get the Python number from a 1-element Tensor by calling tensor.item()
        total_loss += loss.item()
    losses.append(total_loss)
print(losses)  # The loss decreased every iteration over the training data!