In [15]:
import pickle
import torch
import torch.nn as nn
import random
import numpy as np
from gru_model import Model

def load_dataset(path):
    d = pickle.load( open( path, "rb" ) )
    return d['x_test'], d['vocab']

def batches(data, batch_size):
    """ Yields batches of sentences from 'data', ordered on length. """
    random.shuffle(data)
    for i in range(0, len(data), batch_size):
        sentences = data[i:i + batch_size]
        sentences.sort(key=lambda l: len(l), reverse=True)
        yield [torch.LongTensor(s) for s in sentences]

def step(model, sents, device, n):
    """ Performs a model inference for the given model and sentence batch.
    Returns the model otput, total loss and target outputs. """
    if n == 0:
        x = nn.utils.rnn.pack_sequence([s[0].unsqueeze(0) for s in sents])
        y = nn.utils.rnn.pack_sequence([s[0].unsqueeze(0) for s in sents])
    else:
        print([s[n] for s in sents])
        x = nn.utils.rnn.pack_sequence([s[:n] for s in sents])
        y = nn.utils.rnn.pack_sequence([s[n].unsqueeze(0) for s in sents])

    if device.type == 'cuda':
        x, y = x.cuda(), y.cuda()
    out = model(x)
    return out, y

def calc_accuracy(output_distribution, targets , n):
    
    # extract item index of largest item prob
    prediction = torch.argmax(output_distribution, dim=1)
    
    # extract every n element
    if n > 1:
        prediction = prediction[n-1::n] 
    
    num_correct_prediction = (prediction == targets).float().sum()
    return num_correct_prediction.item()/targets.shape[0]

def topk_accuracy(output_distribution, targets, k, n):
    _, pred = torch.topk(input=output_distribution, k=k, dim=1)
    if n > 1:
        pred = pred[n-1::n] 
    pred = pred.t()
    correct = pred.eq(targets.expand_as(pred))
    return correct.sum().item() / targets.shape[0]

def test_accuracy(state_dict,data):
    device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
    x_test, vocab = load_dataset(data)
    model = Model(vocab_size=214, embedding_dim=20, hidden_dim=100, gru_layers=1, dropout=0.0)
    batch_size = 200
    model.load_state_dict(torch.load(state_dict))
    model.eval()
    model.to(device)
    n = 3
    test_accuracies = []
    top2_accuracies = []
    top3_accuracies = []
    with torch.no_grad():
        for sents in batches(x_test, batch_size):
            out, y = step(model, sents, device, n)
            test_accuracies.append(calc_accuracy(out,y.data, n))
            top2_accuracies.append(topk_accuracy(out,y.data, 2, n))
            top3_accuracies.append(topk_accuracy(out,y.data, 3, n))
    return np.mean(test_accuracies), np.mean(top2_accuracies), np.mean(top3_accuracies)

def roundoff(num):
    return "{:.5f}".format(num)

def test_run():
    state_dicts = [
        'state_dicts/state_dict_short_lstm.pth'
    ]
    data = [
        './data/short_sessions.p'
    ]
    p1, p2, p3 = [], [], []
    for i in range(3):
        a, b, c = test_accuracy(state_dicts[0],data[0])
        p1.append(a)
        p2.append(b)
        p3.append(c)
    
    print(roundoff(np.mean(p1)),'$\pm$', roundoff(np.std(p1)), '&', roundoff(np.mean(p2)),'$\pm$', roundoff(np.std(p2)), '&', roundoff(np.mean(p3)),'$\pm$', roundoff(np.std(p3)), '\\\\') 
    
    print('test accuracy',np.mean(p1),np.std(p1))
    print('test top 2 accuracy',np.mean(p2),np.std(p2))
    print('test top 3 accuracy',np.mean(p3),np.std(p3))

In [17]:
test_accuracy('./data/short_sessions.p','state_dicts/state_dict_short_lstm.pth')

TypeError: 'int' object is not subscriptable