# Relation Ranking Yu et al. 2017 Model

Our goal here to to reimplement Yu et al. 2017 93% relation model. 

First things first, set up the initial configuration.

In [None]:
import sys
sys.path.insert(0, '../../')

In [None]:
from tqdm import tqdm_notebook
from functools import partial
tqdm_notebook = partial(tqdm_notebook, leave=False)
tqdm_notebook().pandas()

In [None]:
import logging

from lib.utils import config_logging
from lib.utils import new_experiment_folder
from lib.utils import seed
from lib.utils import save_standard_streams
from lib.utils import device_default

experiment_folder = new_experiment_folder(label='relation_ranking', parent_directory='../../experiments/')
print('Experiment Folder: %s' % experiment_folder)
save_standard_streams(experiment_folder) # Copy Stdout and Stderr to experiments folder

config_logging()
logger = logging.getLogger(__name__)

device = device_default()
logger.info('Device: %d', device)
is_cuda = device >= 0
cuda = lambda t: t.cuda(device=device) if is_cuda else t

random_seed = 1212212
seed(random_seed, is_cuda=is_cuda)

# Async minibatch allocation for speed
# Reference: http://timdettmers.com/2015/03/09/deep-learning-hardware-guide/
cuda_async = lambda t: t.cuda(device=device, async=True) if is_cuda else t

## Dataset

Load our dataset. Log a couple rows.

In [None]:
import os
from tqdm import tqdm_notebook

from lib.datasets.dataset import Dataset

def yu_dataset(directory='../../data/yu/',
               train=False,
               dev=False,
               test=False,
               train_filename='train.replace_ne.withpool',
               dev_filename='valid.replace_ne.withpool',
               test_filename='test.replace_ne.withpool',
               vocab_filename='relation.2M.list'):
    """
    Example line example: 40	61 40 117	which genre of album is #head_entity# ?
    Vocab example: /film/film/genre
    
    Sample Data:
        Question: 'which genre of album is #head_entity# ?'
        True Relation: '/music/album/genre'
        False Relation Pool: ['/music/album/release_type', '/music/album/genre', '/music/album/artist']
    """
    vocab_path = os.path.join(directory, vocab_filename)
    vocab = [l.strip() for l in open(vocab_path, 'r')]
    
    ret = []
    datasets = [(train, train_filename), (dev, dev_filename), (test, test_filename)]
    for is_requested, filename in datasets:
        if not is_requested:
            continue
            
        file_path = os.path.join(directory, filename)
        data = pd.read_table(file_path, header=None, names=['True Relation', 'Relation Pool', 'Question'])
        rows = []
        for i, row in tqdm_notebook(data.iterrows(), total=data.shape[0]):
            if row['Relation Pool'].strip() == 'noNegativeAnswer':
                continue
            relation_pool = [vocab[int(i) - 1].strip('/') for i in row['Relation Pool'].split()]
            true_relation = vocab[int(row['True Relation']) - 1].strip('/')
            question = row['Question'].strip()
            # Development and test set may or may not have the True relation based on our predicted pool
            if filename == train_filename:
                assert true_relation not in relation_pool
                
            for relation in relation_pool:
                if filename == train_filename:
                    rows.append({'Question': question,
                                 'True Relation': true_relation,
                                 'False Relation': relation,
                                 'Example ID': i})
                else:
                    rows.append({'Question': question,
                                 'True Relation': true_relation,
                                 'Relation': relation,
                                 'Example ID': i})
        ret.append(Dataset(rows))

    if len(ret) == 1:
        return ret[0]
    else:
        return tuple(ret)

In [None]:
import os
from tqdm import tqdm_notebook

from lib.datasets.dataset import Dataset

def relation_ranking_dataset(directory='../../data/relation_ranking/',
               train=False,
               dev=False,
               test=False,
               train_filename='train.txt',
               dev_filename='dev.txt',
               test_filename=''):
    """
    Example line example: 
        film/film/country	film/film/country film/film/genre film/film/language	what country is <e> from ?
    Vocab example: 
        /film/film/genre
    
    Sample Data:
        Question: 'which genre of album is #head_entity# ?'
        True Relation: '/music/album/genre'
        False Relation Pool: ['/music/album/release_type', '/music/album/genre', '/music/album/artist']
    """
    ret = []
    datasets = [(train, train_filename), (dev, dev_filename), (test, test_filename)]
    for is_requested, filename in datasets:
        if not is_requested:
            continue
            
        file_path = os.path.join(directory, filename)
        data = pd.read_table(file_path, header=None, names=['True Relation', 'Relation Pool', 'Question', 'Entity'])
        rows = []
        for i, row in tqdm_notebook(data.iterrows(), total=data.shape[0]):
            relation_pool = list(set(row['Relation Pool'].split()))
            true_relation = row['True Relation']
            question = row['Question'].strip()
            entity = row['Entity'].strip()
                
            for relation in relation_pool:
                if filename == train_filename:
                    rows.append({'Question': question,
                                 'Entity': entity,
                                 'True Relation': true_relation,
                                 'False Relation': relation,
                                 'Example ID': i})
                else:
                    rows.append({'Question': question,
                                 'Entity': entity,
                                 'True Relation': true_relation,
                                 'Relation': relation,
                                 'Example ID': i})
        ret.append(Dataset(rows))

    if len(ret) == 1:
        return ret[0]
    else:
        return tuple(ret)

In [None]:
from IPython.display import display
import pandas as pd

train_dataset, dev_dataset = relation_ranking_dataset(train=True, dev=True)

print('Num Training Data: %d' % len(train_dataset))
print('Train Sample:')
display(pd.DataFrame(train_dataset[:5]))
print('\nNum Development Data: %d' % len(dev_dataset))
print('Development Sample:')
display(pd.DataFrame(dev_dataset[:5]))

## Encode Text

Here we encode our data into a numerical format. 

In [None]:
from IPython.display import display
from tqdm import tqdm_notebook
import re

from lib.text_encoders import StaticTokenizerEncoder
from lib.text_encoders import DelimiterEncoder
from lib.text_encoders import WordEncoder

# We add development dataset to text_encoder for embeddings
# We make sure not to use the the development dataset to provide us with any vocab optimizations or learning
text_encoder = WordEncoder(train_dataset['Question'] + dev_dataset['Question'], lower=True, append_eos=False)
print('Text encoder vocab size: %d' % text_encoder.vocab_size)

relations = set(train_dataset['True Relation'] + train_dataset['False Relation'])
relation_word_encoder = StaticTokenizerEncoder(relations, tokenize=lambda s: re.split('/|_', s))
print('Relation word encoder vocab size: %d' % relation_word_encoder.vocab_size)

relation_encoder = DelimiterEncoder('/', relations)
print('Relation encoder vocab size: %d' % relation_encoder.vocab_size)
relations = None # Clear memory

# TODO: Lots of repeat computations... do this at the level of the dataset before it's split apart?
for dataset in [train_dataset, dev_dataset]:
    for row in tqdm_notebook(dataset):
        row['Question'] = text_encoder.encode(row['Question'])
        row['True Relation Word'] = relation_word_encoder.encode(row['True Relation'])
        row['True Relation'] = relation_encoder.encode(row['True Relation'])
        
        if 'False Relation' in row:
            row['False Relation Word'] = relation_word_encoder.encode(row['False Relation'])
            row['False Relation'] = relation_encoder.encode(row['False Relation'])

        if 'Relation' in row:
            row['Relation Word'] = relation_word_encoder.encode(row['Relation'])
            row['Relation'] = relation_encoder.encode(row['Relation'])
            

print('Train Sample:')
display(pd.DataFrame(train_dataset[:5]))
print('Development Sample:')
display(pd.DataFrame(dev_dataset[:5]))

## Dataset Iterators

Define functions to create iterators over the development and the train dataset for each epoch.

In [None]:
import torch
from torch.autograd import Variable
from lib.utils import pad_batch

# Defines how to combine a batch of rows into a tensor
def collate_fn(batch, train=True):
    """ list of tensors to a batch variable """
    question_batch, _ = pad_batch([row['Question'] for row in batch])

    # PyTorch RNN requires batches to be transposed for speed and integration with CUDA
    to_variable = (lambda b: Variable(torch.stack(b).t_().contiguous(), volatile=not train))

    if train:
        true_relation_word_batch, _ = pad_batch([row['True Relation Word'] for row in batch])
        true_relation_batch, _ = pad_batch([row['True Relation'] for row in batch])
        false_relation_word_batch, _ = pad_batch([row['False Relation Word'] for row in batch])
        false_relation_batch, _ = pad_batch([row['False Relation'] for row in batch])
        return (to_variable(question_batch), to_variable(true_relation_batch),
                to_variable(true_relation_word_batch), to_variable(false_relation_batch),
                to_variable(false_relation_word_batch))
    else:
        relation_word_batch, _ = pad_batch([row['Relation Word'] for row in batch])
        relation_batch, _ = pad_batch([row['Relation'] for row in batch])
        return (to_variable(question_batch), to_variable(relation_batch),
                to_variable(relation_word_batch), batch)

In [None]:
from functools import partial

from torch.utils.data import DataLoader

from lib.samplers import BucketBatchSampler
from lib.samplers import SortedSampler

sort_key = lambda r: r['Question'].size()[0]

def get_iterator(dataset, batch_size, train=False):
    # Use bucket sampling to group similar sized text but with noise + random
    batch_sampler = BucketBatchSampler(dataset, sort_key, batch_size, sort_key_noise=0.5)
    return DataLoader(
        dataset,
        batch_sampler=batch_sampler,
        collate_fn=partial(collate_fn, train=train),
        pin_memory=is_cuda,
        num_workers=0)

# Model

Instantiate the model.

In [None]:
import torch
from lib.pretrained_embeddings import FastText

unk_init = lambda t: torch.FloatTensor(t).uniform_(-0.1, 0.1)
pretrained_embedding = FastText(language='en', cache='./../../.pretrained_embeddings_cache')
text_embedding_weights = torch.Tensor(text_encoder.vocab_size, pretrained_embedding.dim)
for i, token in enumerate(text_encoder.vocab):
    text_embedding_weights[i] = pretrained_embedding[token]
pretrained_embedding = None # Clear memory

In [None]:
import copy
from lib.nn import YuModel

def make_model(**kwargs):
    model = YuModel(relation_encoder.vocab_size, relation_word_encoder.vocab_size,
                    text_encoder.vocab_size, **kwargs)
    for param in model.parameters():
        param.data.uniform_(-0.1, 0.1)

    freeze_embeddings = True
    model.text_embedding.weight.data.copy_(text_embedding_weights)
    model.text_embedding.weight.requires_grad = not freeze_embeddings

    cuda(model)
    return model

## Evaluate

In [12]:
from collections import defaultdict 

def evaluate(dataset, model, batch_size=1):
    model.train(mode=False)
    examples = defaultdict(list)
    iterator = get_iterator(dataset, batch_size)
    for (question, relation, relation_word, batch) in tqdm_notebook(iterator):
        output = model(cuda_async(question), cuda_async(relation), cuda_async(relation_word))
        output = output.data.cpu()

        for i, row in enumerate(batch):
            examples[row['Example ID']].append({
                'Score': output[i],
                'Question': row['Question'],
                'True Relation': row['True Relation'],
                'Relation': row['Relation']
            })
    model.train(True) # No side affects

    # Print metrics
    correct = 0
    for pool in examples.values():
        max_relation = max(pool, key=lambda p: p['Score'])
        if max_relation['Relation'].tolist() == max_relation['True Relation'].tolist():
            correct += 1
    accuracy = correct / len(examples)
    logger.info('Accuracy: %.03f', accuracy)
    return accuracy

## Hyperparameters

In [13]:
from lib.optim import Adam
from lib.configurable import configurable
from lib.configurable import add_config
from lib.configurable import log_config

Adam.__init__ = configurable(Adam.__init__)

add_config({
    'lib': {
        'nn.yu_model.YuModel.__init__': {
            'embedding_size': 300,
            'hidden_size': 200,
            'dropout_relation': 0.25,
            'dropout_text': 0.25,
            'n_layers_relation': 1
        },
        'optimizer.Optimizer.__init__.max_grad_norm': 1,
    },
    'lib.optim.adam.Adam.__init__': {
        'amsgrad': True
    }
})

log_config()

[2018-02-12 08:44:55,985][MainProcess][lib.configurable][INFO] Checking configuration...
[2018-02-12 08:44:55,987][MainProcess][lib.configurable][INFO] Configuration checked.
[2018-02-12 08:44:55,988][MainProcess][lib.configurable][INFO] Global configuration:
[2018-02-12 08:44:55,989][MainProcess][root][INFO] {   'lib': {   'nn': {   'yu_model': {   'YuModel': {   '__init__': {   'dropout_relation': 0.25,
                                                                        'dropout_text': 0.25,
                                                                        'embedding_size': 300,
                                                                        'hidden_size': 200,
                                                                        'n_layers_relation': 1}}}},
               'optim': {'adam': {'Adam': {'__init__': {'amsgrad': True}}}},
               'optimizer': {'Optimizer': {'__init__': {'max_grad_norm': 1}}}}}


## Training Loop

Below here, we do a training loop over a number of epochs.

In [14]:
from tqdm import tqdm_notebook
import random

from torch.utils.data import DataLoader
from torch.nn.modules.loss import MarginRankingLoss
from lib.optim import Adam
from functools import partial

from lib.checkpoint import Checkpoint
from lib.utils import get_total_parameters
from lib.utils import resplit_datasets
from lib.optimizer import Optimizer

def train(resources=30, checkpoint=None, margin=0.5, **kwargs):
    
    if isinstance(checkpoint, str):
        checkpoint = Checkpoint(checkpoint)
        model = checkpoint.model
        train_batch_size = checkpoint.train_batch_size
        optimizer = checkpoint.optimizer
        n_bad_epochs = checkpoint.n_bad_epochs
        max_score = checkpoint.max_score
    else:
        model = make_model()
        train_batch_size = 512
        # NOTE: https://github.com/pytorch/pytorch/issues/679
        params = filter(lambda p: p.requires_grad, model.parameters())
        optimizer = Optimizer(Adam(params=params))
        n_bad_epochs = 0
        max_score = 0

    # NOTE: Because the training dataset was used to train the subject recongition, its better; therefore,
    # we cannot mix them
    epochs = max(round(resources), 1)
    train_max_batch_size = 2048
    patience = 3
    criterion = cuda(MarginRankingLoss(margin=margin))
    logger.info('Epochs: %d', epochs)
    logger.info('Train Dataset Size: %d', len(train_dataset))
    logger.info('Dev Dataset Size: %d', len(dev_dataset))
    logger.info('Train Batch Size: %d', train_batch_size)
    logger.info('Train Max Batch Size: %d', train_max_batch_size)
    logger.info('Total Parameters: %d', get_total_parameters(model))
    logger.info('Model:\n%s' % model)
    
    # Train!
    for epoch in range(epochs):
        print('Epoch %d' % epoch)

        # Iterate over the training data
        model.train(mode=True)
        train_iterator = get_iterator(train_dataset, train_batch_size, True)
        for (question, true_relation, true_relation_word, false_relation,
             false_relation_word) in tqdm_notebook(train_iterator):
            optimizer.zero_grad()
            output_true = model(
                cuda_async(question), cuda_async(true_relation), cuda_async(true_relation_word))
            output_false = model(
                cuda_async(question), cuda_async(false_relation), cuda_async(false_relation_word))
            labels = cuda(Variable(torch.ones(output_true.size()[0])))
            loss = criterion(output_true, output_false, labels)

            # Backward propagation
            loss.backward()
            optimizer.step()

        # Save checkpoint
        checkpoint_path = Checkpoint.save(
            experiment_folder, {
                'model': model,
                'optimizer': optimizer,
                'relation_word_encoder': relation_word_encoder,
                'relation_encoder': relation_encoder,
                'text_encoder': text_encoder,
                'train_batch_size': train_batch_size,
                'n_bad_epochs': n_bad_epochs,
                'max_score': max_score
            },
            device=device)

        # Evaluate
        score = evaluate(dev_dataset, model, 4096)

        # Scheduler for increasing batch_size inspired by this paper:
        # https://openreview.net/forum?id=B1Yy1BxCZ
        if max_score > score:
            n_bad_epochs += 1
        else:
            n_bad_epochs = 0
            max_score = score

        if n_bad_epochs >= patience:
            train_batch_size = min(train_max_batch_size, train_batch_size * 2)
            logger.info('Ran out of patience, increasing train batch size to: %d', train_batch_size)
            
        print('–' * 100)
        
    return -max_score, checkpoint_path

In [15]:
train()

[2018-02-12 08:44:56,168][MainProcess][lib.configurable][INFO] yu_model.YuModel.__init__ was configured with:
{   'dropout_relation': 0.25,
    'dropout_text': 0.25,
    'embedding_size': 300,
    'hidden_size': 200,
    'n_layers_relation': 1,
    'relation_vocab_size': 5267,
    'relation_word_vocab_size': 3386,
    'text_vocab_size': 6693}
[2018-02-12 08:44:58,667][MainProcess][lib.configurable][INFO] adam.Adam.__init__ was configured with:
{'amsgrad': True, 'params': <filter object at 0x7f26e1848588>}
[2018-02-12 08:44:58,669][MainProcess][lib.configurable][INFO] optimizer.Optimizer.__init__ was configured with:
{'max_grad_norm': 1, 'optim': <lib.optim.adam.Adam object at 0x7f26e1848208>}
[2018-02-12 08:44:58,670][MainProcess][__main__][INFO] Epochs: 30
[2018-02-12 08:44:58,671][MainProcess][__main__][INFO] Train Dataset Size: 1378892
[2018-02-12 08:44:58,673][MainProcess][__main__][INFO] Dev Dataset Size: 194798
[2018-02-12 08:44:58,675][MainProcess][__main__][INFO] Train Batch Si


[2018-02-12 08:47:28,977][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/152.pt



[2018-02-12 08:47:52,073][MainProcess][__main__][INFO] Accuracy: 0.844
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 1



[2018-02-12 08:50:43,832][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/347.pt



[2018-02-12 08:50:59,995][MainProcess][__main__][INFO] Accuracy: 0.862
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 2



[2018-02-12 08:53:49,833][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/533.pt



[2018-02-12 08:54:05,514][MainProcess][__main__][INFO] Accuracy: 0.855
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 3



[2018-02-12 08:56:54,579][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/718.pt



[2018-02-12 08:57:09,948][MainProcess][__main__][INFO] Accuracy: 0.867
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 4



[2018-02-12 08:59:59,633][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/903.pt



[2018-02-12 09:00:14,626][MainProcess][__main__][INFO] Accuracy: 0.864
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 5



[2018-02-12 09:03:00,759][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/1084.pt



[2018-02-12 09:03:16,472][MainProcess][__main__][INFO] Accuracy: 0.865
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 6



[2018-02-12 09:06:03,267][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/1267.pt



[2018-02-12 09:06:17,915][MainProcess][__main__][INFO] Accuracy: 0.860
[2018-02-12 09:06:19,170][MainProcess][__main__][INFO] Ran out of patience, increasing train batch size to: 1024
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 7



[2018-02-12 09:09:04,694][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/1448.pt



[2018-02-12 09:09:19,758][MainProcess][__main__][INFO] Accuracy: 0.859
[2018-02-12 09:09:20,953][MainProcess][__main__][INFO] Ran out of patience, increasing train batch size to: 2048
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 8



[2018-02-12 09:12:02,867][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/1626.pt



[2018-02-12 09:12:18,605][MainProcess][__main__][INFO] Accuracy: 0.863
[2018-02-12 09:12:19,878][MainProcess][__main__][INFO] Ran out of patience, increasing train batch size to: 2048
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 9



[2018-02-12 09:14:59,826][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/1803.pt



[2018-02-12 09:15:15,012][MainProcess][__main__][INFO] Accuracy: 0.858
[2018-02-12 09:15:16,160][MainProcess][__main__][INFO] Ran out of patience, increasing train batch size to: 2048
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 10



[2018-02-12 09:17:57,309][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/1981.pt



[2018-02-12 09:18:12,821][MainProcess][__main__][INFO] Accuracy: 0.853
[2018-02-12 09:18:14,520][MainProcess][__main__][INFO] Ran out of patience, increasing train batch size to: 2048
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 11



[2018-02-12 09:20:54,509][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/2158.pt



[2018-02-12 09:21:09,807][MainProcess][__main__][INFO] Accuracy: 0.853
[2018-02-12 09:21:10,972][MainProcess][__main__][INFO] Ran out of patience, increasing train batch size to: 2048
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 12



[2018-02-12 09:23:52,005][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/2335.pt



[2018-02-12 09:24:07,676][MainProcess][__main__][INFO] Accuracy: 0.864
[2018-02-12 09:24:08,839][MainProcess][__main__][INFO] Ran out of patience, increasing train batch size to: 2048
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 13



[2018-02-12 09:26:49,442][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/2513.pt



[2018-02-12 09:27:04,660][MainProcess][__main__][INFO] Accuracy: 0.859
[2018-02-12 09:27:05,821][MainProcess][__main__][INFO] Ran out of patience, increasing train batch size to: 2048
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 14



[2018-02-12 09:29:47,018][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/2690.pt



[2018-02-12 09:30:02,638][MainProcess][__main__][INFO] Accuracy: 0.855
[2018-02-12 09:30:03,853][MainProcess][__main__][INFO] Ran out of patience, increasing train batch size to: 2048
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 15



[2018-02-12 09:32:47,035][MainProcess][lib.checkpoint][INFO] Saving checkpoint: ../../experiments/relation_ranking.02_12_08:43:59/2870.pt



[2018-02-12 09:33:02,442][MainProcess][__main__][INFO] Accuracy: 0.856
[2018-02-12 09:33:03,608][MainProcess][__main__][INFO] Ran out of patience, increasing train batch size to: 2048
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
Epoch 16


KeyboardInterrupt: 

## Hyperparameter Optimization

In [None]:
import torch
import random

from skopt.space import Real, Integer, Categorical

from lib.hyperparameter_optimization import hyperband
from lib.configurable import add_config
from lib.configurable import log_config

space  = [Real(0, 0.9, name='dropout_text'),
          Real(0, 0.9, name='dropout_relation'),
          Real(0.1, 1.9, name='margin'), # 1 == 90 degrees in consine distance
          Integer(50, 250, name='hidden_size'), # We multiply this hyperparameter by two to allow for bidirectional
          Real(0, 5, name='max_grad_norm')]

def objective(hidden_size, dropout_relation, dropout_text, max_grad_norm, margin, 
              *args, **kwargs):

    add_config({
        'lib': {
            'nn.yu_model.YuModel.__init__': {
                'embedding_size': 300,
                'hidden_size': int(hidden_size * 2),
                'dropout_relation': float(dropout_relation),
                'dropout_text': float(dropout_text),
                'n_layers_relation': 1
            },
            'optimizer.Optimizer.__init__.max_grad_norm': max_grad_norm,
        },
        'lib.optim.adam.Adam.__init__': {
            'amsgrad': True
        }
    })

    ret = train(margin=margin, *args, **kwargs)
    print('=' * 100)
    torch.cuda.empty_cache()
    return ret

scores, hyperparameters = hyperband(objective, space, max_resources_per_model=10, total_resources=150)
print('Best Accuracy: %.4f' % min(scores))