In [92]:
from src.data import math_tags
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import logging

In [88]:
_LOGGER = logging.getLogger('test')
_LOGGER.setLevel(logging.DEBUG)

In [89]:
tag_sequences = math_tags.get_sequences()

In [4]:
#getting 1 hot representation of sequences
#for each sequence with length N, you will have a corresponding tensor with shaep N by N_ELEMENTS representing set of sequences
one_hot_sequences = []
for sequence in tag_sequences:
    one_hot_sequences.append(torch.cat([torch.zeros(1, math_tags.N_ELEMENTS, dtype=torch.float).\
     scatter(1, torch.LongTensor(elm_set).view(1,-1), 1) for elm_set in sequence]))

In [90]:
n_epoch = 50
n_seq = 100

split = int(n_seq*0.8)
train_sequences = tag_sequences[:split]
train_targets =  one_hot_sequences[:split]

test_sequences = tag_sequences[split:n_seq]
test_targets = one_hot_sequences[split:n_seq]

In [95]:
from src.model.set_sequence import SetSequenceModel

tag_embedding = torch.load('data/processed/math_tags_embedding_normalized.pt')
HIDDEN_DIM = 100
EMBEDDING_DIM = 10
model = SetSequenceModel(hidden_dim=HIDDEN_DIM,
                         n_class=math_tags.N_ELEMENTS,
                         embedding_dim=EMBEDDING_DIM)

In [97]:
test_losses = []
for sequence, target in zip(test_sequences, test_targets):
    model.hidden = model.init_hidden()
    logits = model(sequence)
    loss = loss_fn(logits[1:].view(-1),target[1:].view(-1))
    test_losses.append(loss.data)
_LOGGER.info("Validation Loss: {}".format(np.mean(test_losses)))

INFO:test:Validation Loss: 0.6952255964279175


In [98]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=.9)
losses = []

for epoch in range(n_epoch):
    curr_losses = []
    for sequence, target in zip(train_sequences,
                                train_targets):
        model.zero_grad()
        model.hidden = model.init_hidden()
        
        logits = model(sequence)
        loss = loss_fn(logits[1:].view(-1),target[1:].view(-1))
        curr_losses.append(loss.data)
        loss.backward()
        optimizer.step()
    mean_loss = np.mean(curr_losses)
    losses.append(mean_loss)
    _LOGGER.debug("epoch {}: {}".format(epoch, mean_loss))

DEBUG:test:epoch 0: 0.6885996460914612
DEBUG:test:epoch 1: 0.6672942638397217
DEBUG:test:epoch 2: 0.6097691655158997
DEBUG:test:epoch 3: 0.3967171311378479
DEBUG:test:epoch 4: 0.19278714060783386
DEBUG:test:epoch 5: 0.11599986255168915
DEBUG:test:epoch 6: 0.08228222280740738
DEBUG:test:epoch 7: 0.06395518034696579
DEBUG:test:epoch 8: 0.05256105586886406
DEBUG:test:epoch 9: 0.044831085950136185
DEBUG:test:epoch 10: 0.03926101326942444
DEBUG:test:epoch 11: 0.03506654500961304
DEBUG:test:epoch 12: 0.03180012106895447
DEBUG:test:epoch 13: 0.02918834611773491
DEBUG:test:epoch 14: 0.027055028825998306
DEBUG:test:epoch 15: 0.025281619280576706
DEBUG:test:epoch 16: 0.023785516619682312
DEBUG:test:epoch 17: 0.02250741980969906
DEBUG:test:epoch 18: 0.02140369452536106
DEBUG:test:epoch 19: 0.020441533997654915
DEBUG:test:epoch 20: 0.019595813006162643
DEBUG:test:epoch 21: 0.018846968188881874
DEBUG:test:epoch 22: 0.018179554492235184
DEBUG:test:epoch 23: 0.017581215128302574
DEBUG:test:epoch 24: 

In [99]:
test_losses = []
for sequence, target in zip(test_sequences, test_targets):
    model.hidden = model.init_hidden()
    logits = model(sequence)
    loss = loss_fn(logits[1:].view(-1),target[1:].view(-1))
    test_losses.append(loss.data)
_LOGGER.info("Validation Loss: {}".format(np.mean(test_losses)))

INFO:test:Validation Loss: 0.012037409469485283


In [130]:
i = 0
logits = model(test_sequences[i])
prediction = torch.sigmoid(model(test_sequences[i]))

[[37], [136, 154, 32], [129, 136, 32], [189, 16], [189, 71, 16, 89], [42, 343, 289, 665], [602, 489], [69], [136, 16], [136, 1044]]
[[ 788   93  551 ...  136   52    4]
 [ 788   93  551 ...  136   52    4]
 [ 788  551   93 ...  136   52    4]
 ...
 [ 221  788 1439 ...  136   52    4]
 [ 688  956 1601 ...  136   52    4]
 [ 178    8  828 ...  136   52    4]]


In [135]:
print(test_sequences[i])

[[37], [136, 154, 32], [129, 136, 32], [189, 16], [189, 71, 16, 89], [42, 343, 289, 665], [602, 489], [69], [136, 16], [136, 1044]]


In [133]:
np.argsort(prediction.data.numpy(),axis=1)[:,:1650:-1]

array([[  4,  52, 136,  16,  81,  37, 132,   5,  21, 216,  71,  15, 171],
       [  4,  52, 136,  15,  16,  38,  81, 112,  85, 132,  55,  71,  21],
       [  4,  52, 136,  15,  38,  16, 112,  81,  55,  85,  71, 216, 132],
       [  4,  52, 136,  15,  38,  16, 112,  81,  55,  85, 216,  71, 132],
       [  4,  52, 136,  15,  38,  16, 112,  81,  55,  85, 216,  71, 132],
       [  4,  52, 136,  15,  38,  16, 112,  81,  55,  85, 216,  71, 132],
       [  4,  52, 136,  15,  38,  16, 112,  81,  55, 216,  85,  71, 132],
       [  4,  52, 136,  15,  38,  16, 112,  81,  55,  85, 216,  71, 132],
       [  4,  52, 136,  15,  38,  16, 112,  81,  55, 216,  85,  71, 132],
       [  4,  52, 136,  15,  38,  16, 112,  81,  55, 216,  85,  71, 132]])

In [139]:
np.max(prediction.data.numpy())

0.41964766