In [1]:
from typing import List, Tuple, Optional
import shutil
import os
import pickle
import time

import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F

from qanta import logging
from qanta.datasets.quiz_bowl import QuizBowlDataset
from qanta.preprocess import preprocess_dataset, tokenize_question
from qanta.guesser.nn import create_load_embeddings_function, convert_text_to_embeddings_indices, compute_n_classes
from qanta.manager import (
    BaseLogger, TerminateOnNaN, EarlyStopping, ModelCheckpoint, MaxEpochStopping, TrainingManager)
from qanta.guesser.torch.dan import flatten_and_offset, batchify, create_save_model, DanModel

In [2]:
log = logging.get(__name__)
PTDAN_WE_TMP = '/tmp/qanta/deep/pt_dan_we.pickle'
PTDAN_WE = 'pt_dan_we.pickle'
load_embeddings = create_load_embeddings_function(PTDAN_WE_TMP, PTDAN_WE, log)

In [3]:
dataset = QuizBowlDataset(1, guesser_train=True)
training_data = dataset.training_data()

In [4]:
x_train_text, y_train, x_test_text, y_test, vocab, class_to_i, i_to_class = preprocess_dataset(
    training_data
)

In [5]:
embeddings, embedding_lookup = load_embeddings(vocab=vocab, expand_glove=True)

2017-09-19 16:38:33,842 - __main__ - INFO - Loading word embeddings from tmp cache


In [7]:
embedding_lookup['UNK']

183067

In [8]:
embeddings.shape

(183068, 300)

In [9]:
max(embedding_lookup.values())

183067

In [None]:
x_train = [convert_text_to_embeddings_indices(q, embedding_lookup) for q in x_train_text]
for r in x_train:
    if len(r) == 0:
        r.append(embedding_lookup['UNK'])

x_train = np.array(x_train)
y_train = np.array(y_train)

x_test = [convert_text_to_embeddings_indices(q, embedding_lookup) for q in x_test_text]
for r in x_test:
    if len(r) == 0:
        r.append(embedding_lookup['UNK'])
x_test = np.array(x_test)
y_test = np.array(y_test)
n_classes = compute_n_classes(training_data[1])
i_to_word = {ind: word for word, ind in embedding_lookup.items()}

In [None]:
batch_size = 512
learning_rate = .001
non_linearity = 'elu'
max_epochs = 100

In [None]:
n_batches_train, t_x_train, t_offset_train, t_y_train = batchify(batch_size, x_train, y_train, truncate=True)
n_batches_test, t_x_test, t_offset_test, t_y_test = batchify(batch_size, x_test, y_test, truncate=False)

In [None]:
def run_epoch(model, n_batches, t_x_array, t_offset_array, t_y_array, evaluate=False):
    if not evaluate:
        random_batch_order = np.random.permutation(n_batches)
        t_x_array = t_x_array[random_batch_order]
        t_offset_array = t_offset_array[random_batch_order]
        t_y_array = t_y_array[random_batch_order]

    batch_accuracies = []
    batch_losses = []
    epoch_start = time.time()
    for batch in range(n_batches):
        t_x_batch = Variable(t_x_array[batch], volatile=evaluate)
        t_offset_batch = Variable(t_offset_array[batch], volatile=evaluate)
        t_y_batch = Variable(t_y_array[batch], volatile=evaluate)

        model.zero_grad()
        out = model(t_x_batch, t_offset_batch)
        _, preds = torch.max(out, 1)
        accuracy = torch.mean(torch.eq(preds, t_y_batch).float()).data[0]
        batch_loss = criterion(out, t_y_batch)
        if not evaluate:
            batch_loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), .25)
            optimizer.step()

        batch_accuracies.append(accuracy)
        batch_losses.append(batch_loss.data[0])

    epoch_end = time.time()

    return np.array(batch_accuracies), np.array(batch_losses), epoch_end - epoch_start

In [None]:
try:
    del model
    del optimizer
    del criterion
except:
    pass
model = DanModel(embeddings.shape[0], n_classes)
model.init_weights(initial_embeddings=embeddings)
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()
manager = TrainingManager([
    BaseLogger(log_func=log.info), TerminateOnNaN(),
    EarlyStopping(patience=10, verbose=1), MaxEpochStopping(100),
    ModelCheckpoint(create_save_model(model), '/tmp/dan.pt')
])

In [None]:
model.train()
train_acc, train_loss, train_time = run_epoch(
    model, n_batches_train,
    t_x_train, t_offset_train, t_y_train, evaluate=False
)
print(train_acc.mean(), train_loss.mean(), train_time)

In [None]:
model.eval()
test_acc, test_loss, test_time = run_epoch(
    model, n_batches_test,
    t_x_test, t_offset_test, t_y_test, evaluate=True
)
print(test_acc.mean(), test_loss.mean(), test_time)

In [None]:
model.train()
t_x_batch = Variable(t_x_test[2], volatile=False)
t_offset_batch = Variable(t_offset_test[2], volatile=False)
t_y_batch = Variable(t_y_test[2], volatile=False)
out = model(t_x_batch, t_offset_batch)
loss = criterion(out, t_y_batch)

In [None]:
len(vocab)

In [None]:
embeddings.shape

In [None]:
t_x_batch.max()

In [None]:
model.embeddings(t_x_batch, t_offset_batch).sum()

In [None]:
F.softmax(out).sum()

In [None]:
F.nll_loss(F.log_softmax(out), t_y_batch, size_average=False)

In [None]:
_, preds = torch.max(out, 1)

In [None]:
n_classes

In [None]:
t_y_batch.max()

In [None]:
test_loss[2]

In [None]:
t_y_test[2]

In [None]:
stop_training, reasons = manager.instruct(
    train_time, train_loss, train_acc,
    test_time, test_loss, test_acc
)

In [None]:
print(stop_training, reasons)