In [None]:
import torch.nn as nn
from torch.autograd import Variable
import torch
from qanta.guesser.torch.dan import *
from qanta.datasets.quiz_bowl import QuizBowlDataset

In [None]:
dataset = QuizBowlDataset(1, guesser_train=True)
training_data = dataset.training_data()

In [None]:
x_train_text, y_train, x_test_text, y_test, vocab, class_to_i, i_to_class = preprocess_dataset(
    training_data
)

In [None]:
embeddings, embedding_lookup = load_embeddings(vocab=vocab, expand_glove=True)

In [None]:
x_train = np.array([convert_text_to_embeddings_indices(q, embedding_lookup) for q in x_train_text])
y_train = np.array(y_train)

x_test = np.array([convert_text_to_embeddings_indices(q, embedding_lookup) for q in x_test_text])
y_test = np.array(y_test)

In [None]:
n_classes = compute_n_classes(training_data[1])

In [None]:
i_to_word = {ind: word for word, ind in embedding_lookup.items()}

In [None]:
for i in range(5):
    print(' '.join([i_to_word[ind] for ind in x_train[i]]))
    print(i_to_class[y_train[i]])
    print()

In [None]:
n_examples = x_train.shape[0]
batch_size = 512
n_batches = n_examples // batch_size
random_order = np.random.permutation(n_examples)
x_train = x_train[random_order]
y_train = y_train[random_order]

t_x_batches = []
t_offset_batches = []
t_y_batches = []

for b in range(n_batches):
    x_batch = x_train[b * batch_size:(b + 1) * batch_size]
    y_batch = y_train[b * batch_size:(b + 1) * batch_size]
    
    flat_x_batch = []
    for r in x_batch:
        flat_x_batch.extend(r)
    flat_x_batch = np.array(flat_x_batch)
    x_lengths = [len(r) for r in x_batch]
    offsets = np.cumsum([0] + x_lengths[:-1])
    
    t_x_batches.append(torch.from_numpy(flat_x_batch).long().cuda())
    t_offset_batches.append(torch.from_numpy(offsets).long().cuda())
    t_y_batches.append(torch.from_numpy(y_batch).long().cuda())

t_x_batches = np.array(t_x_batches)
t_offset_batches = np.array(t_offset_batches)
t_y_batches = np.array(t_y_batches)

In [None]:
model = DanModel(embeddings.shape[0], n_classes)
model.init_weights(initial_embeddings=embeddings)
model.train()
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
max_gradient = float('-inf')
min_gradient = float('inf')
accuracies = []
losses = []

for epoch in range(100):
    random_batch_order = np.random.permutation(n_batches)
    t_x_batches = t_x_batches[random_batch_order]
    t_offset_batches = t_offset_batches[random_batch_order]
    t_y_batches = t_y_batches[random_batch_order]

    batch_accuracies = []
    batch_losses = []
    epoch_start = time.time()
    print('Starting epoch: ', epoch)
    for batch in range(n_batches):
        t_x_batch = Variable(t_x_batches[batch])#.cuda())
        t_offsets = Variable(t_offset_batches[batch])#.cuda())
        t_y_batch = Variable(t_y_batches[batch])#.cuda())

        model.zero_grad()
        out = model(t_x_batch, t_offsets)
        _, preds = torch.max(out, 1)

        accuracy = torch.mean(torch.eq(preds, t_y_batch).float()).data[0]
        batch_loss = criterion(out, t_y_batch)

        batch_accuracies.append(accuracy)
        batch_losses.append(batch_loss.data[0])

        batch_loss.backward()
        #for p in model.parameters():
        #    max_gradient = max(max_gradient, p.grad.max().data[0])
        #    min_gradient = min(min_gradient, p.grad.min().data[0])
            
        optimizer.step()
    
    epoch_accuracy = np.mean(batch_accuracies)
    epoch_loss = np.mean(batch_losses)
    
    accuracies.append(epoch_accuracy)
    losses.append(epoch_loss)
            
    epoch_end = time.time()
    print('Epoch: time={} accuracy={} loss={}'.format(epoch_end - epoch_start, epoch_accuracy, epoch_loss))
    #print(max_gradient, min_gradient)
