In [1]:
import os
import os
import sys
import random
import numpy as np
import argparse
import chainer
import pickle
from typing import List, Dict, Tuple, Optional

from qanta import logging
from qanta.config import conf
from qanta.guesser.abstract import AbstractGuesser

from qanta.buzzer import configs
from qanta.buzzer.progress import ProgressBar
from qanta.buzzer.trainer import Trainer
from qanta.buzzer.iterator import QuestionIterator
from qanta.buzzer.util import load_quizbowl, GUESSERS
from qanta.buzzer.models import MLP, RNN
from qanta.buzzer import constants as bc
from qanta.util import constants as c


log = logging.get(__name__)

N_GUESSERS = len(GUESSERS)
N_GUESSES = conf['buzzer']['n_guesses']

ERROR (theano.gpuarray): pygpu was configured but could not be imported or is too old (version 0.6 or higher required)
NoneType: None


In [2]:
def new_dense_vector(dicts: List[List[Dict[str, float]]],
        wordvecs: List[List[np.ndarray]], step_size=1) -> List[List[float]]:

    length = len(dicts)
    prev_vec = [0.02 for _ in range(N_GUESSERS * N_GUESSES)]
    vecs = []
    for i in range(length):
        if len(dicts[i]) != N_GUESSERS:
            raise ValueError("Inconsistent number of guessers ({0}, {1}).".format(
                N_GUESSERS, len(dicts)))
        vec = []
        diff_vec = []
        isnew_vec = []
        for j in range(N_GUESSERS):
            dic = sorted(dicts[i][j].items(), key=lambda x: x[1], reverse=True)
            for guess, score in dic:
                vec.append(score)
                if i > 0 and guess in dicts[i-1][j]:
                    diff_vec.append(score - dicts[i-1][j][guess])
                    isnew_vec.append(0)
                else:
                    diff_vec.append(score) 
                    isnew_vec.append(1)
            if len(dic) < N_GUESSES:
                for k in range(max(N_GUESSES - len(dic), 0)):
                    vec.append(0)
                    diff_vec.append(0)
                    isnew_vec.append(0)
        features = [vec[0], vec[1], vec[2],
                    np.average(vec[:10]), np.average(prev_vec[:10]),
                    np.var(vec[:10]), np.var(prev_vec[:10]),
                    sum(isnew_vec[:10]),
                    isnew_vec[0], isnew_vec[1], isnew_vec[2],
                    diff_vec[0], diff_vec[1],
                    vec[0] - vec[1], vec[1] - vec[2], 
                    vec[0] / vec[1], vec[0] / prev_vec[0],
                    vec[0] - prev_vec[0], vec[1] - prev_vec[1]
                    ]

        vecs.append(features)
        prev_vec = vec
    return vecs

In [3]:
option2id, all_guesses = load_quizbowl(normalize=True)
iterators = dict()
for fold in c.BUZZER_INPUT_FOLDS:
    iterators[fold] = QuestionIterator(all_guesses[fold], option2id,
        batch_size=128, make_vector=new_dense_vector)

2017-05-26 13:18:00,145 - qanta.buzzer.util - INFO - Merging guesser DataFrames.
2017-05-26 13:18:00,146 - qanta.buzzer.util - INFO - Merged buzzertrain exists, skipping.
2017-05-26 13:18:00,147 - qanta.buzzer.util - INFO - Merged buzzerdev exists, skipping.
2017-05-26 13:18:00,148 - qanta.buzzer.util - INFO - Merged dev exists, skipping.
2017-05-26 13:18:00,150 - qanta.buzzer.util - INFO - Merged test exists, skipping.
2017-05-26 13:18:00,151 - qanta.buzzer.util - INFO - Merged expo exists, skipping.
2017-05-26 13:18:00,152 - qanta.buzzer.util - INFO - Loading data
2017-05-26 13:18:06,976 - qanta.buzzer.util - INFO - Number of options 8247
2017-05-26 13:18:27,531 - qanta.buzzer.util - INFO - Loading buzzertrain guesses
2017-05-26 13:18:32,471 - qanta.buzzer.util - INFO - Loading buzzerdev guesses
2017-05-26 13:18:38,994 - qanta.buzzer.util - INFO - Loading dev guesses
2017-05-26 13:18:39,762 - qanta.buzzer.util - INFO - Loading test guesses
2017-05-26 13:18:39,778 - qanta.buzzer.util 

In [4]:
iterators['expo'].n_input

19

In [5]:
for n_hidden in [200, 400]:
    cfg = configs.rnn()
    cfg.n_hidden = n_hidden
    cfg.model_name = 'rnn.{}'.format(n_hidden, 'dense_vector_1')
    cfg.model_dir = 'output/buzzer/{}.npz'.format(cfg.model_name)

    model = RNN(iterators[c.BUZZER_TRAIN_FOLD].n_input, cfg.n_hidden, N_GUESSERS + 1)
    chainer.cuda.get_device(0).use()
    model.to_gpu(0)

    trainer = Trainer(model, cfg.model_dir)
    trainer.run(iterators[c.BUZZER_TRAIN_FOLD], iterators[c.BUZZER_DEV_FOLD], 25)

    for fold in c.BUZZER_GENERATION_FOLDS:
        test_iter = iterators[fold]
        buzzes = trainer.test(test_iter)
        log.info('{0} buzzes generated. Size {1}.'.format(fold, len(buzzes)))
        buzzes_dir = bc.BUZZES_DIR.format(fold, cfg.model_name)
        with open(buzzes_dir, 'wb') as outfile:
            pickle.dump(buzzes, outfile)
        log.info('Buzzes saved to {0}.'.format(buzzes_dir))
        if fold == 'expo':
            guesses_df = AbstractGuesser.load_guesses(bc.GUESSES_DIR, folds=[fold])
            buzzer2vwexpo(guesses_df, buzzes, fold)

2017-05-26 13:30:31,787 - qanta.buzzer.trainer - INFO - epoch 0
2017-05-26 13:31:18,785 - qanta.buzzer.trainer - INFO - train loss: nan  acc: 0.56  
2017-05-26 13:31:32,194 - qanta.buzzer.trainer - INFO - eval loss: nan  acc: 0.61  
2017-05-26 13:31:32,211 - qanta.buzzer.trainer - INFO - epoch 1
2017-05-26 13:32:01,036 - qanta.buzzer.trainer - INFO - train loss: nan  acc: 0.55  
2017-05-26 13:32:10,760 - qanta.buzzer.trainer - INFO - eval loss: nan  acc: 0.61  
2017-05-26 13:32:10,771 - qanta.buzzer.trainer - INFO - epoch 2
2017-05-26 13:32:53,690 - qanta.buzzer.trainer - INFO - train loss: nan  acc: 0.55  
2017-05-26 13:33:01,315 - qanta.buzzer.trainer - INFO - eval loss: nan  acc: 0.61  
2017-05-26 13:33:01,326 - qanta.buzzer.trainer - INFO - epoch 3
2017-05-26 13:33:33,181 - qanta.buzzer.trainer - INFO - train loss: nan  acc: 0.55  
2017-05-26 13:33:43,310 - qanta.buzzer.trainer - INFO - eval loss: nan  acc: 0.61  
2017-05-26 13:33:43,324 - qanta.buzzer.trainer - INFO - epoch 4
2017

NameError: name 'buzzer2vwexpo' is not defined

In [None]:
option2id, all_guesses = load_quizbowl(['expo'], normalize=True)
iterators = dict()
iterators['expo'] = QuestionIterator(all_guesses['expo'], option2id,
    batch_size=128, make_vector=old_dense_vector )

In [None]:
iterators['expo'].n_input

In [None]:
from qanta.buzzer.interface import buzzer2vwexpo

n_hidden = 100
cfg = configs.rnn()
cfg.n_hidden = n_hidden
cfg.model_name = 'rnn_{}'.format(n_hidden)
cfg.model_dir = 'output/buzzer/{}.npz'.format(cfg.model_name)

model = RNN(iterators['expo'].n_input, cfg.n_hidden, N_GUESSERS + 1)
chainer.cuda.get_device(0).use()
model.to_gpu(0)

log.info('Loading model {0}'.format(cfg.model_dir))
chainer.serializers.load_npz(cfg.model_dir, model)

trainer = Trainer(model, cfg.model_dir)

fold = 'expo'

test_iter = iterators[fold]
buzzes = trainer.test(test_iter)
log.info('{0} buzzes generated. Size {1}.'.format(fold, len(buzzes)))
buzzes_dir = bc.BUZZES_DIR.format(fold, cfg.model_name)
with open(buzzes_dir, 'wb') as outfile:
    pickle.dump(buzzes, outfile)
log.info('Buzzes saved to {0}.'.format(buzzes_dir))

guesses_df = AbstractGuesser.load_guesses(bc.GUESSES_DIR, folds=[fold])
buzzer2vwexpo(guesses_df, buzzes, fold)