In [3]:
import os
import os
import sys
import random
import numpy as np
import argparse
import chainer
import pickle
from typing import List, Dict, Tuple, Optional

from qanta import logging
from qanta.config import conf
from qanta.guesser.abstract import AbstractGuesser

from qanta.buzzer import configs
from qanta.buzzer.progress import ProgressBar
from qanta.buzzer.trainer import Trainer
from qanta.buzzer.iterator import QuestionIterator
from qanta.buzzer.util import load_quizbowl, GUESSERS
from qanta.buzzer.models import MLP, RNN
from qanta.buzzer import constants as bc
from qanta.util import constants as c


log = logging.get(__name__)

N_GUESSERS = len(GUESSERS)
N_GUESSES = conf['buzzer']['n_guesses']

In [9]:
def dense_vector(dicts: List[List[Dict[str, float]]],
        wordvecs: List[List[np.ndarray]], step_size=1) -> List[List[float]]:

    length = len(dicts)
    prev_vec = [0.02 for _ in range(N_GUESSERS * N_GUESSES)]
    vecs = []
    for i in range(length):
        if len(dicts[i]) != N_GUESSERS:
            raise ValueError("Inconsistent number of guessers ({0}, {1}).".format(
                N_GUESSERS, len(dicts)))
        vec = []
        diff_vec = []
        isnew_vec = []
        for j in range(N_GUESSERS):
            dic = sorted(dicts[i][j].items(), key=lambda x: x[1], reverse=True)
            for guess, score in dic:
                vec.append(score)
                if i > 0 and guess in dicts[i-1][j]:
                    diff_vec.append(score - dicts[i-1][j][guess])
                    isnew_vec.append(0)
                else:
                    diff_vec.append(score) 
                    isnew_vec.append(1)
            if len(dic) < N_GUESSES:
                for k in range(max(N_GUESSES - len(dic), 0)):
                    vec.append(0)
                    diff_vec.append(0)
                    isnew_vec.append(0)
        features = [vec[0], vec[1], vec[2],
                    np.average(vec[:10]), np.average(prev_vec[:10]),
                    np.var(vec[:10]), np.var(prev_vec[:10]),
                    sum(isnew_vec[:10]),
                    isnew_vec[0], isnew_vec[1], isnew_vec[2],
                    diff_vec[0], diff_vec[1]
                    vec[0] - vec[1], vec[1] - vec[2], 
                    vec[0] / vec[1], vec[0] / prev_vec[0],
                    vec[0] - prev_vec[0], vec[1] - prev_vec[1]
                    ]

        vecs.append(features)
        prev_vec = vec
    return vecs

option2id, all_guesses = load_quizbowl()
iterators = dict()
for fold in c.BUZZER_INPUT_FOLDS:
    iterators[fold] = QuestionIterator(all_guesses[fold], option2id,
        batch_size=128, make_vector=dense_vector)

2017-05-26 02:06:19,269 - qanta.buzzer.util - INFO - Merging guesser DataFrames.
2017-05-26 02:06:19,271 - qanta.buzzer.util - INFO - Merged buzzertrain exists, skipping.
2017-05-26 02:06:19,272 - qanta.buzzer.util - INFO - Merged buzzerdev exists, skipping.
2017-05-26 02:06:19,273 - qanta.buzzer.util - INFO - Merged dev exists, skipping.
2017-05-26 02:06:19,274 - qanta.buzzer.util - INFO - Merged test exists, skipping.
2017-05-26 02:06:19,274 - qanta.buzzer.util - INFO - Merged expo exists, skipping.
2017-05-26 02:06:19,275 - qanta.buzzer.util - INFO - Loading data
2017-05-26 02:06:26,631 - qanta.buzzer.util - INFO - Number of options 8247
2017-05-26 02:06:46,684 - qanta.buzzer.util - INFO - Loading buzzertrain guesses
2017-05-26 02:06:49,779 - qanta.buzzer.util - INFO - Loading buzzerdev guesses
2017-05-26 02:06:50,894 - qanta.buzzer.util - INFO - Loading dev guesses
2017-05-26 02:06:51,484 - qanta.buzzer.util - INFO - Loading test guesses
2017-05-26 02:06:51,506 - qanta.buzzer.util 

In [17]:
iterators1 = dict()
for fold in c.BUZZER_INPUT_FOLDS:
    iterators1[fold] = QuestionIterator(all_guesses[fold], option2id,
        batch_size=128)

2017-05-26 05:11:41,438 - qanta.buzzer.iterator - INFO - Creating batches
[creat batches] (1) done: 23210/23211
2017-05-26 05:16:14,828 - qanta.buzzer.iterator - INFO - Finish creating batches
2017-05-26 05:16:14,829 - qanta.buzzer.iterator - INFO - Creating batches
[creat batches] (1) done: 7586/7587
2017-05-26 05:17:43,460 - qanta.buzzer.iterator - INFO - Finish creating batches
2017-05-26 05:17:43,461 - qanta.buzzer.iterator - INFO - Creating batches
[creat batches] (1) done: 2088/2089
2017-05-26 05:18:12,804 - qanta.buzzer.iterator - INFO - Finish creating batches
2017-05-26 05:18:12,805 - qanta.buzzer.iterator - INFO - Creating batches
[creat batches] (1) done: 1411/1412
2017-05-26 05:18:31,469 - qanta.buzzer.iterator - INFO - Finish creating batches
2017-05-26 05:18:31,470 - qanta.buzzer.iterator - INFO - Creating batches
[creat batches] (1) done: 68/69
2017-05-26 05:18:31,916 - qanta.buzzer.iterator - INFO - Finish creating batches


In [None]:
for n_hidden in [10, 25, 50, 100, 200]:
    n_hidden = 200
    cfg = configs.rnn()
    cfg.n_hidden = n_hidden
    cfg.model_name = 'rnn_1_{}'.format(n_hidden)
    cfg.model_dir = 'output/buzzer/{}.npz'.format(cfg.model_name)

    model = RNN(iterators[c.BUZZER_TRAIN_FOLD].n_input, cfg.n_hidden, N_GUESSERS + 1)
    chainer.cuda.get_device(0).use()
    model.to_gpu(0)

    trainer = Trainer(model, cfg.model_dir)
    trainer.run(iterators[c.BUZZER_TRAIN_FOLD], iterators[c.BUZZER_DEV_FOLD], 25)

    for fold in c.BUZZER_GENERATION_FOLDS:
        test_iter = iterators[fold]
        buzzes = trainer.test(test_iter)
        log.info('{0} buzzes generated. Size {1}.'.format(fold, len(buzzes)))
        buzzes_dir = bc.BUZZES_DIR.format(fold, cfg.model_name)
        with open(buzzes_dir, 'wb') as outfile:
            pickle.dump(buzzes, outfile)
        log.info('Buzzes saved to {0}.'.format(buzzes_dir))

2017-05-26 05:21:40,561 - qanta.buzzer.trainer - INFO - epoch 0
2017-05-26 05:22:05,112 - qanta.buzzer.trainer - INFO - train loss: 0.00  acc: 0.71  
2017-05-26 05:22:10,283 - qanta.buzzer.trainer - INFO - eval loss: 0.01  acc: 0.76  
2017-05-26 05:22:10,318 - qanta.buzzer.trainer - INFO - epoch 1
2017-05-26 05:22:41,995 - qanta.buzzer.trainer - INFO - train loss: 0.00  acc: 0.80  
2017-05-26 05:22:50,567 - qanta.buzzer.trainer - INFO - eval loss: 0.01  acc: 0.82  
2017-05-26 05:22:50,605 - qanta.buzzer.trainer - INFO - epoch 2
2017-05-26 05:23:19,943 - qanta.buzzer.trainer - INFO - train loss: 0.00  acc: 0.82  
2017-05-26 05:23:34,080 - qanta.buzzer.trainer - INFO - eval loss: 0.00  acc: 0.83  
2017-05-26 05:23:34,125 - qanta.buzzer.trainer - INFO - epoch 3
2017-05-26 05:24:05,451 - qanta.buzzer.trainer - INFO - train loss: 0.00  acc: 0.83  
2017-05-26 05:24:14,694 - qanta.buzzer.trainer - INFO - eval loss: 0.00  acc: 0.84  
2017-05-26 05:24:14,733 - qanta.buzzer.trainer - INFO - epoc