In [1]:
import torch
import torch.nn as nn
import random
import numpy as np
import pickle
from gru_model import Model

In [10]:
def load_dataset():
    d = pickle.load( open( "./prepared_dataset.p", "rb" ) )
    return d['x_test'], d['vocab']

x_test, vocab = load_dataset()

In [11]:
device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")

In [12]:
model = Model(len(vocab), 100, 100, 1, not '--untied', 0.0).to(device)
model.load_state_dict(torch.load('./state_dict.pth'))
loss_func = nn.CrossEntropyLoss()

In [13]:
def batches(data, batch_size):
    """ Yields batches of sentences from 'data', ordered on length. """
    random.shuffle(data)
    for i in range(0, len(data), batch_size):
        sentences = data[i:i + batch_size]
        sentences.sort(key=lambda l: len(l), reverse=True)
        yield [torch.LongTensor(s) for s in sentences]

def step(model, sents, loss_func, device):
    """ Performs a model inference for the given model and sentence batch.
    Returns the model otput, total loss and target outputs. """
    x = nn.utils.rnn.pack_sequence([s[:-1] for s in sents])
    y = nn.utils.rnn.pack_sequence([s[1:] for s in sents])
    
    print('sessions')
    for p in sents:
        print(p)
    print('packed x')
    print(x.data.tolist())
    print('packed y')
    print(y.data.tolist())
    
    if device.type == 'cuda':
        x, y = x.cuda(), y.cuda()
    out = model(x)
    #F.nll_loss(out, y.data)
    #loss = loss_func(out, y.data)
    return out, y

def calc_accuracy(output_distribution, targets):
    prediction = torch.argmax(output_distribution, dim=1)
    print('prediction')
    print(prediction.tolist())
    print('targets')
    print(targets.tolist())
    num_correct_prediction = (prediction == targets).float().sum()
    return num_correct_prediction.item()/targets.shape[0]

def test_accuracy(test_data, model, loss_func, device):
    model.eval()
    test_accuracies = []
    with torch.no_grad():
        for sents in batches(test_data, 100):
            out, y = step(model, sents, loss_func, device)
            test_accuracies.append(calc_accuracy(out,y.data))
    print(np.mean(test_accuracies))

In [19]:
test_accuracy(x_test[10:11], model, loss_func, device)

sessions
tensor([129, 155, 176, 148, 114,  32,  96, 147,  32,  86,   5,  91,  99,  70,
        145, 128, 106, 152,  79, 152, 147, 152, 147, 125])
packed x
[129, 155, 176, 148, 114, 32, 96, 147, 32, 86, 5, 91, 99, 70, 145, 128, 106, 152, 79, 152, 147, 152, 147]
packed y
[155, 176, 148, 114, 32, 96, 147, 32, 86, 5, 91, 99, 70, 145, 128, 106, 152, 79, 152, 147, 152, 147, 125]
prediction
[155, 176, 148, 114, 32, 96, 147, 32, 86, 5, 91, 99, 70, 145, 128, 106, 152, 79, 152, 147, 152, 147, 152]
targets
[155, 176, 148, 114, 32, 96, 147, 32, 86, 5, 91, 99, 70, 145, 128, 106, 152, 79, 152, 147, 152, 147, 125]
0.9565217391304348


In [8]:
x_test.sort(key=lambda l: len(l), reverse=False)
for x in x_test[10:12]:
    print(x)

tensor([129, 171])
tensor([129,  50])


In [18]:
for x in x_test[10:11]:
    print()
    for i in x:
        print(i,vocab[i])


tensor(129) load_homepage
tensor(155) click_on_recommendations
tensor(176) click_on_menu_subscriptions
tensor(148) click_on_not_relevant
tensor(114) click_on_subscription
tensor(32) open_invoice_account
tensor(96) scroll_on_homepage
tensor(147) click_on_number_details
tensor(32) open_invoice_account
tensor(86) click_on_agreements
tensor(5) see_agreements
tensor(91) click_on_agreement_detail
tensor(99) click_on_company
tensor(70) click_on_sim_card
tensor(145) click_on_APIs
tensor(128) click_on_cloud
tensor(106) click_on_order_overview
tensor(152) click_on_path
tensor(79) click_on_expand
tensor(152) click_on_path
tensor(147) click_on_number_details
tensor(152) click_on_path
tensor(147) click_on_number_details
tensor(125) click_log_out
