# Relation Ranking Yu et al. 2017 Model

Our goal here to to reimplement Yu et al. 2017 93% relation model. 

First things first, set up the initial configuration.

In [5]:
import sys
print('Python Version:', sys.version)
import pandas as pd
import logging
sys.path.insert(0, '../../')

from lib.utils import setup_training

# Create root logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 80)

random_seed = 12930129
device = 0
is_cuda, _ = setup_training(device, random_seed) 
# Async minibatch allocation for speed
# Reference: http://timdettmers.com/2015/03/09/deep-learning-hardware-guide/
# TODO: look into cuda_async device=device
cuda_async = lambda t: t.cuda(device=device, async=True) if is_cuda else t  # Use with tensors
cuda = lambda t: t.cuda(device=device) if is_cuda else t  # Use with nn.modules

Python Version: 3.6.4 (default, Dec 19 2017, 17:29:45) 
[GCC 5.4.0 20160609]


## Dataset

Load our dataset. Log a couple rows.

In [6]:
import os
from tqdm import tqdm_notebook

from lib.datasets.dataset import Dataset

def yu_dataset(directory='../../data/yu/',
               train=False,
               dev=False,
               test=False,
               train_filename='train.replace_ne.withpool',
               dev_filename='valid.replace_ne.withpool',
               test_filename='test.replace_ne.withpool',
               vocab_filename='relation.2M.list'):
    """
    Example line example: 40	61 40 117	which genre of album is #head_entity# ?
    Vocab example: /film/film/genre
    
    Sample Data:
        Question: 'which genre of album is #head_entity# ?'
        True Relation: '/music/album/genre'
        False Relation Pool: ['/music/album/release_type', '/music/album/genre', '/music/album/artist']
    """
    vocab_path = os.path.join(directory, vocab_filename)
    vocab = [l.strip() for l in open(vocab_path, 'r')]
    
    ret = []
    datasets = [(train, train_filename), (dev, dev_filename), (test, test_filename)]
    for is_requested, filename in datasets:
        if not is_requested:
            continue
            
        file_path = os.path.join(directory, filename)
        data = pd.read_table(file_path, header=None, names=['True Relation', 'Relation Pool', 'Question'])
        rows = []
        for i, row in tqdm_notebook(data.iterrows(), total=data.shape[0]):
            if row['Relation Pool'].strip() == 'noNegativeAnswer':
                continue
            relation_pool = [vocab[int(i) - 1].strip('/') for i in row['Relation Pool'].split()]
            true_relation = vocab[int(row['True Relation']) - 1].strip('/')
            question = row['Question'].strip()
            # Development and test set may or may not have the True relation based on our predicted pool
            if filename == train_filename:
                assert true_relation not in relation_pool
                
            for relation in relation_pool:
                if filename == train_filename:
                    rows.append({'Question': question,
                                 'True Relation': true_relation,
                                 'False Relation': relation,
                                 'Example ID': i})
                else:
                    rows.append({'Question': question,
                                 'True Relation': true_relation,
                                 'Relation': relation,
                                 'Example ID': i})
        ret.append(Dataset(rows))

    if len(ret) == 1:
        return ret[0]
    else:
        return tuple(ret)

In [7]:
import os
from tqdm import tqdm_notebook

from lib.datasets.dataset import Dataset

def relation_ranking_dataset(directory='../../data/relation_ranking/',
               train=False,
               dev=False,
               test=False,
               train_filename='train.txt',
               dev_filename='dev.txt',
               test_filename=''):
    """
    Example line example: 
        film/film/country	film/film/country film/film/genre film/film/language	what country is <e> from ?
    Vocab example: 
        /film/film/genre
    
    Sample Data:
        Question: 'which genre of album is #head_entity# ?'
        True Relation: '/music/album/genre'
        False Relation Pool: ['/music/album/release_type', '/music/album/genre', '/music/album/artist']
    """
    ret = []
    datasets = [(train, train_filename), (dev, dev_filename), (test, test_filename)]
    for is_requested, filename in datasets:
        if not is_requested:
            continue
            
        file_path = os.path.join(directory, filename)
        data = pd.read_table(file_path, header=None, names=['True Relation', 'Relation Pool', 'Question', 'Entity'])
        rows = []
        for i, row in tqdm_notebook(data.iterrows(), total=data.shape[0]):
            relation_pool = set(row['Relation Pool'].split())
            true_relation = row['True Relation']
            question = row['Question'].strip()
            entity = row['Entity'].strip()
            
            # Development and test set may or may not have the True relation based on our predicted pool
            if filename == train_filename:
                relation_pool.remove(true_relation)
                relation_pool = list(relation_pool)
                
            for relation in relation_pool:
                if filename == train_filename:
                    rows.append({'Question': question,
                                 'Entity': entity,
                                 'True Relation': true_relation,
                                 'False Relation': relation,
                                 'Example ID': i})
                else:
                    rows.append({'Question': question,
                                 'Entity': entity,
                                 'True Relation': true_relation,
                                 'Relation': relation,
                                 'Example ID': i})
        ret.append(Dataset(rows))

    if len(ret) == 1:
        return ret[0]
    else:
        return tuple(ret)

In [8]:
from IPython.display import display

train_dataset, dev_dataset = relation_ranking_dataset(train=True, dev=True)

print('Num Training Data: %d' % len(train_dataset))
print('Train Sample:')
display(pd.DataFrame(train_dataset[:5]))
print('\nNum Development Data: %d' % len(dev_dataset))
print('Development Sample:')
display(pd.DataFrame(dev_dataset[:5]))





Num Training Data: 1295041
Train Sample:


Unnamed: 0,Entity,Example ID,False Relation,Question,True Relation
0,e,0,user/coco/science/concepts_theories/parent_concept,what is the book <e> about,book/written_work/subjects
1,e,0,base/rosetta/languoid/languoid_class,what is the book <e> about,book/written_work/subjects
2,e,0,people/person/profession,what is the book <e> about,book/written_work/subjects
3,e,0,music/album/artist,what is the book <e> about,book/written_work/subjects
4,e,0,music/album/album_content_type,what is the book <e> about,book/written_work/subjects



Num Development Data: 196297
Development Sample:


Unnamed: 0,Entity,Example ID,Question,Relation,True Relation
0,american,0,name an <e> thoroughbread racehorse,people/ethnicity/geographic_distribution,biology/organism_classification/organisms_of_this_type
1,american,0,name an <e> thoroughbread racehorse,dining/cuisine/chefs,biology/organism_classification/organisms_of_this_type
2,american,0,name an <e> thoroughbread racehorse,common/topic/image,biology/organism_classification/organisms_of_this_type
3,american,0,name an <e> thoroughbread racehorse,dining/cuisine/dishes,biology/organism_classification/organisms_of_this_type
4,american,0,name an <e> thoroughbread racehorse,symbols/namesake/named_after,biology/organism_classification/organisms_of_this_type


## Load Checkpoint

In [9]:
from lib.checkpoint import Checkpoint

# Load a checkpoint
checkpoint_path = None # 'logs/0000.01-29_13:18:52.yu_relation_model/01m_29d_15h_00m_22s.pt'
if checkpoint_path is not None:
    checkpoint = Checkpoint(checkpoint_path, device=0)
else:
    checkpoint = None

## Encode Text

Here we encode our data into a numerical format. 

In [10]:
from IPython.display import display
from tqdm import tqdm_notebook
import re

from lib.text_encoders import StaticTokenizerEncoder
from lib.text_encoders import DelimiterEncoder
from lib.text_encoders import WordEncoder

# We add development dataset to text_encoder for embeddings
# We make sure not to use the the development dataset to provide us with any vocab optimizations or learning
if checkpoint is None:
    text_encoder = WordEncoder(train_dataset['Question'] + dev_dataset['Question'], lower=True, append_eos=False)
    print('Text encoder vocab size: %d' % text_encoder.vocab_size)

    relations = set(train_dataset['True Relation'] + train_dataset['False Relation'])
    relation_word_encoder = StaticTokenizerEncoder(relations, tokenize=lambda s: re.split('/|_', s))
    print('Relation word encoder vocab size: %d' % relation_word_encoder.vocab_size)

    relation_encoder = DelimiterEncoder('/', relations)
    print('Relation encoder vocab size: %d' % relation_encoder.vocab_size)
    relations = None # Clear memory
else:
    relation_word_encoder = checkpoint.relation_word_encoder
    relation_encoder = checkpoint.relation_encoder
    text_encoder = checkpoint.text_encoder

# TODO: Lots of repeat computations... do this at the level of the dataset before it's split apart?
for dataset in [train_dataset, dev_dataset]:
    for row in tqdm_notebook(dataset):
        row['Question'] = text_encoder.encode(row['Question'])
        row['True Relation Word'] = relation_word_encoder.encode(row['True Relation'])
        row['True Relation'] = relation_encoder.encode(row['True Relation'])
        
        if 'False Relation' in row:
            row['False Relation Word'] = relation_word_encoder.encode(row['False Relation'])
            row['False Relation'] = relation_encoder.encode(row['False Relation'])

        if 'Relation' in row:
            row['Relation Word'] = relation_word_encoder.encode(row['Relation'])
            row['Relation'] = relation_encoder.encode(row['Relation'])
            

print('Train Sample:')
display(pd.DataFrame(train_dataset[:5]))
print('Development Sample:')
display(pd.DataFrame(dev_dataset[:5]))

Text encoder vocab size: 6389
Relation word encoder vocab size: 3382
Relation encoder vocab size: 5254






Train Sample:


Unnamed: 0,Entity,Example ID,False Relation,False Relation Word,Question,True Relation,True Relation Word
0,e,0,"[4913, 2748, 1063, 948, 23]","[3157, 1769, 701, 263, 3329, 266, 662]","[2571, 3768, 5576, 567, 5114, 1240]","[1890, 1070, 3739]","[1225, 3023, 2780, 2431]"
1,e,0,"[5192, 508, 4825, 1240]","[3348, 335, 3102, 3102, 2681]","[2571, 3768, 5576, 567, 5114, 1240]","[1890, 1070, 3739]","[1225, 3023, 2780, 2431]"
2,e,0,"[3934, 1831, 4220]","[2569, 1190, 2737]","[2571, 3768, 5576, 567, 5114, 1240]","[1890, 1070, 3739]","[1225, 3023, 2780, 2431]"
3,e,0,"[4017, 2154, 4992]","[2627, 1389, 3209]","[2571, 3768, 5576, 567, 5114, 1240]","[1890, 1070, 3739]","[1225, 3023, 2780, 2431]"
4,e,0,"[4017, 2154, 4745]","[2627, 1389, 1389, 122, 1666]","[2571, 3768, 5576, 567, 5114, 1240]","[1890, 1070, 3739]","[1225, 3023, 2780, 2431]"


Development Sample:


Unnamed: 0,Entity,Example ID,Question,Relation,Relation Word,True Relation,True Relation Word
0,american,0,"[364, 6053, 5114, 4170, 917]","[3934, 2215, 34]","[2569, 1428, 2047, 2991]","[262, 879, 4475]","[181, 3318, 2201, 1609, 2507, 2914, 1666]"
1,american,0,"[364, 6053, 5114, 4170, 917]","[1455, 4352, 877]","[952, 2823, 576]","[262, 879, 4475]","[181, 3318, 2201, 1609, 2507, 2914, 1666]"
2,american,0,"[364, 6053, 5114, 4170, 917]","[1530, 247, 1214]","[1004, 173, 794]","[262, 879, 4475]","[181, 3318, 2201, 1609, 2507, 2914, 1666]"
3,american,0,"[364, 6053, 5114, 4170, 917]","[1455, 4352, 4400]","[952, 2823, 2851]","[262, 879, 4475]","[181, 3318, 2201, 1609, 2507, 2914, 1666]"
4,american,0,"[364, 6053, 5114, 4170, 917]","[4575, 1717, 4570]","[2950, 1116, 2341, 1943]","[262, 879, 4475]","[181, 3318, 2201, 1609, 2507, 2914, 1666]"


## Dataset Iterators

Define functions to create iterators over the development and the train dataset for each epoch.

In [11]:
from functools import partial

import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader

from lib.utils import pad_batch
from lib.samplers import BucketBatchSampler
from lib.samplers import SortedSampler


# Defines how to combine a batch of rows into a tensor
def collate_fn(batch, train=True):
    """ list of tensors to a batch variable """
    question_batch, _ = pad_batch([row['Question'] for row in batch])

    # PyTorch RNN requires batches to be transposed for speed and integration with CUDA
    to_variable = (lambda b: Variable(torch.stack(b).t_().contiguous(), volatile=not train))

    if train:
        true_relation_word_batch, _ = pad_batch([row['True Relation Word'] for row in batch])
        true_relation_batch, _ = pad_batch([row['True Relation'] for row in batch])
        false_relation_word_batch, _ = pad_batch([row['False Relation Word'] for row in batch])
        false_relation_batch, _ = pad_batch([row['False Relation'] for row in batch])
        return (to_variable(question_batch), to_variable(true_relation_batch),
                to_variable(true_relation_word_batch), to_variable(false_relation_batch),
                to_variable(false_relation_word_batch))
    else:
        relation_word_batch, _ = pad_batch([row['Relation Word'] for row in batch])
        relation_batch, _ = pad_batch([row['Relation'] for row in batch])
        return (to_variable(question_batch), to_variable(relation_batch),
                to_variable(relation_word_batch), batch)


def make_train_iterator(train_dataset, train_batch_size):
    # Use bucket sampling to group similar sized text but with noise + random
    sort_key = lambda r: r['Question'].size()[0]
    batch_sampler = BucketBatchSampler(train_dataset, sort_key, train_batch_size)
    return DataLoader(
        train_dataset,
        batch_sampler=batch_sampler,
        collate_fn=collate_fn,
        pin_memory=is_cuda,
        num_workers=0)


def make_dev_iterator(dev_dataset, dev_batch_size):
    # Group together all examples for metrics and sort questions of similar sizes for speed
    sort_key = lambda r: (r['Question'].size()[0], r['Example ID'])
    return DataLoader(
        dev_dataset,
        batch_size=dev_batch_size,
        sampler=SortedSampler(dev_dataset, sort_key, sort_noise=0.0),
        collate_fn=partial(collate_fn, train=False),
        pin_memory=is_cuda,
        num_workers=0)


# Just to make sure everything runs
train_iterator_test = make_train_iterator(train_dataset, 512)
dev_iterator_test = make_dev_iterator(dev_dataset, 512)
# Clear memory
train_iterator_test = None
dev_iterator_test = None

# Model

Instantiate the model.

In [12]:
import torch
from lib.pretrained_embeddings import FastText

# Load embeddings
if checkpoint is None:
    unk_init = lambda t: torch.FloatTensor(t).uniform_(-0.1, 0.1)
    pretrained_embedding = FastText(language='en', cache='./../../.pretrained_embeddings_cache')
    text_embedding_weights = torch.Tensor(text_encoder.vocab_size, pretrained_embedding.dim)
    for i, token in enumerate(text_encoder.vocab):
        text_embedding_weights[i] = pretrained_embedding[token]
    relation_word_embedding_weights = torch.Tensor(relation_word_encoder.vocab_size, pretrained_embedding.dim)
    for i, token in enumerate(relation_word_encoder.vocab):
        relation_word_embedding_weights[i] = pretrained_embedding[token]
    pretrained_embedding = None # Clear memory

In [13]:
import copy
from lib.nn import YuModel

def make_model(**kwargs):
    if checkpoint is None:
        model = YuModel(relation_encoder.vocab_size, relation_word_encoder.vocab_size,
                        text_encoder.vocab_size, **kwargs)
        for param in model.parameters():
            param.data.uniform_(-0.1, 0.1)

        model.text_embedding.weight.data.copy_(text_embedding_weights)
        model.relation_word_embedding.weight.data.copy_(relation_word_embedding_weights)
        
        freeze_embeddings = True
        model.relation_word_embedding.weight.requires_grad = not freeze_embeddings
        model.text_embedding.weight.requires_grad = not freeze_embeddings
        
        cuda(model)
        return model
    else:
        model = checkpoint.model
        model = copy.deepcopy(model)
        cuda(model)
        model.relation_word_rnn.flatten_parameters()
        model.text_rnn_layer_one.flatten_parameters()
        model.text_rnn_layer_two.flatten_parameters()
        model.relation_rnn.flatten_parameters()
        return model

# Test that making the model works
model_test = make_model()
model_test = None # Clear memory

## Gradient Descent Optimizer 

Instantiate the gradient descent optimizer.

In [14]:
from torch.optim import SGD

from lib.optim import Optimizer

# https://github.com/pytorch/pytorch/issues/679
def make_optimizer(model, learning_rate=1):
    params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = Optimizer(SGD(params=params, lr=learning_rate))
    return optimizer

## Training Loop

Below here, we do a training loop over a number of epochs.

In [15]:
import torch
from tqdm import tqdm_notebook
from torch.nn.modules.loss import MarginRankingLoss
from collections import defaultdict
from lib.utils import get_total_parameters
from lib.utils import get_log_directory_path

from lib.checkpoint import Checkpoint

def train(hidden_size=300, margin=0.3, learning_rate=1,
          dropout_relation=0.0, dropout_text=0.0, n_layers_relation=2):
    log_directory = get_log_directory_path('yu_relation_model')
    model = make_model(dropout_relation=dropout_relation, dropout_text=dropout_text,
                       n_layers_relation=n_layers_relation, hidden_size=hidden_size)
    optimizer = make_optimizer(model, learning_rate=learning_rate)

    # QUESTION: Is there a better margin? or wrose?
    criterion = cuda(MarginRankingLoss(margin=margin))
    epochs = 30
    patience = 3
    train_batch_size = 128
    train_max_batch_size = 2048
    dev_batch_size = 4096

    print('Devevelopment Batch Size: %s' % dev_batch_size)
    print('Train Batch Size: %s' % train_batch_size)
    print('Epochs: %s' % epochs)
    print('Log Directory: %s' % log_directory)
    print('Total Parameters: %d' % get_total_parameters(model))
    print('Model:\n%s' % model)

    n_bad_epochs = 0
    last_accuracy = 0
    max_accuracy = 0
    
    # Train!
    for epoch in range(epochs):
        print('Epoch %d' % epoch)

        # Iterate over the training data
        model.train(mode=True)
        train_iterator = make_train_iterator(train_dataset, train_batch_size)
        for (question, true_relation, true_relation_word, false_relation,
             false_relation_word) in tqdm_notebook(train_iterator):
            optimizer.zero_grad()
            output_true = model(
                cuda_async(question), cuda_async(true_relation), cuda_async(true_relation_word))
            output_false = model(
                cuda_async(question), cuda_async(false_relation), cuda_async(false_relation_word))
            labels = cuda(Variable(torch.ones(output_true.size()[0])))
            loss = criterion(output_true, output_false, labels)

            # Backward propagation
            loss.backward()
            optimizer.step()

        # Save checkpoint
        print('Saved Checkpoint:', Checkpoint.save(
            log_directory, {
                'model': model,
                'relation_word_encoder': relation_word_encoder,
                'relation_encoder': relation_encoder,
                'text_encoder': text_encoder
            },
            device=device))

        # Evaluate
        model.train(mode=False)
        examples = defaultdict(list)
        dev_iterator = make_dev_iterator(dev_dataset, dev_batch_size)
        for (question, relation, relation_word, batch) in tqdm_notebook(dev_iterator):
            output = model(cuda_async(question), cuda_async(relation), cuda_async(relation_word))
            output = output.data.cpu()

            for i, row in enumerate(batch):
                examples[row['Example ID']].append({
                    'Score': output[i],
                    'Question': row['Question'],
                    'True Relation': row['True Relation'],
                    'Relation': row['Relation']
                })

        # Print metrics
        correct = 0
        for pool in examples.values():
            max_relation = max(pool, key=lambda p: p['Score'])
            if max_relation['Relation'].tolist() == max_relation['True Relation'].tolist():
                correct += 1
        accuracy = correct / len(examples)
        print('Accuracy: %f [%d of %d]' % (accuracy, correct, len(examples)))
        print('-------------------------------------------------------------------------')

        # Scheduler for increasing batch_size inspired by this paper:
        # https://openreview.net/forum?id=B1Yy1BxCZ
        if max_accuracy > accuracy:
            n_bad_epochs += 1
        else:
            n_bad_epochs = 0

        if n_bad_epochs >= patience:
            train_batch_size = min(train_max_batch_size, train_batch_size * 2)
            print('Ran out of patience, increasing train batch size to:', train_batch_size)

        if accuracy > max_accuracy:
            max_accuracy = accuracy
    
    criterion = None
    model = None
    optimizer = None
    torch.cuda.empty_cache()
    return -max_accuracy

train()

Devevelopment Batch Size: 4096
Train Batch Size: 128
Epochs: 30
Log Directory: logs/yu_relation_model.01-29_17:00:01
Total Parameters: 12405000
Model:
YuModel(
  (relation_embedding): Embedding(5254, 300, padding_idx=0)
  (relation_word_embedding): Embedding(3382, 300, padding_idx=0)
  (relation_word_rnn): LSTM(300, 300, num_layers=2, bidirectional=True)
  (relation_rnn): LSTM(300, 300, num_layers=2, bidirectional=True)
  (text_embedding): Embedding(6389, 300, padding_idx=0)
  (text_rnn_layer_one): LSTM(300, 300, bidirectional=True)
  (text_rnn_layer_two): LSTM(600, 300, bidirectional=True)
  (distance): CosineSimilarity(
  )
)
Epoch 0



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/366.pt



Accuracy: 0.851568 [9231 of 10840]
-------------------------------------------------------------------------
Epoch 1



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/685.pt



Accuracy: 0.864391 [9370 of 10840]
-------------------------------------------------------------------------
Epoch 2



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/997.pt



Accuracy: 0.869649 [9427 of 10840]
-------------------------------------------------------------------------
Epoch 3



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/1308.pt



Accuracy: 0.870018 [9431 of 10840]
-------------------------------------------------------------------------
Epoch 4



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/1616.pt



Accuracy: 0.874354 [9478 of 10840]
-------------------------------------------------------------------------
Epoch 5



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/1925.pt



Accuracy: 0.876845 [9505 of 10840]
-------------------------------------------------------------------------
Epoch 6



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/2232.pt



Accuracy: 0.876476 [9501 of 10840]
-------------------------------------------------------------------------
Epoch 7



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/2541.pt



Accuracy: 0.876845 [9505 of 10840]
-------------------------------------------------------------------------
Epoch 8



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/2850.pt



Accuracy: 0.877583 [9513 of 10840]
-------------------------------------------------------------------------
Epoch 9



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/3158.pt



Accuracy: 0.877491 [9512 of 10840]
-------------------------------------------------------------------------
Epoch 10



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/3466.pt



Accuracy: 0.879705 [9536 of 10840]
-------------------------------------------------------------------------
Epoch 11



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/3775.pt



Accuracy: 0.878321 [9521 of 10840]
-------------------------------------------------------------------------
Epoch 12



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/4082.pt



Accuracy: 0.878598 [9524 of 10840]
-------------------------------------------------------------------------
Epoch 13



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/4391.pt



Accuracy: 0.880351 [9543 of 10840]
-------------------------------------------------------------------------
Epoch 14



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/4699.pt



Accuracy: 0.879889 [9538 of 10840]
-------------------------------------------------------------------------
Epoch 15



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/5007.pt



Accuracy: 0.877491 [9512 of 10840]
-------------------------------------------------------------------------
Epoch 16



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/5315.pt



Accuracy: 0.879613 [9535 of 10840]
-------------------------------------------------------------------------
Ran out of patience, increasing train batch size to: 256
Epoch 17



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/5590.pt



Accuracy: 0.880904 [9549 of 10840]
-------------------------------------------------------------------------
Epoch 18



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/5865.pt



Accuracy: 0.882288 [9564 of 10840]
-------------------------------------------------------------------------
Epoch 19



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/6140.pt



Accuracy: 0.878782 [9526 of 10840]
-------------------------------------------------------------------------
Epoch 20



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/6414.pt



Accuracy: 0.880720 [9547 of 10840]
-------------------------------------------------------------------------
Epoch 21



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/6688.pt



Accuracy: 0.880904 [9549 of 10840]
-------------------------------------------------------------------------
Ran out of patience, increasing train batch size to: 512
Epoch 22



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/6943.pt



Accuracy: 0.878782 [9526 of 10840]
-------------------------------------------------------------------------
Ran out of patience, increasing train batch size to: 1024
Epoch 23



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/7196.pt



Accuracy: 0.884041 [9583 of 10840]
-------------------------------------------------------------------------
Epoch 24



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/7447.pt



Accuracy: 0.882749 [9569 of 10840]
-------------------------------------------------------------------------
Epoch 25



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/7697.pt



Accuracy: 0.882288 [9564 of 10840]
-------------------------------------------------------------------------
Epoch 26



Saved Checkpoint: logs/yu_relation_model.01-29_17:00:01/7947.pt



Accuracy: 0.882934 [9571 of 10840]
-------------------------------------------------------------------------
Ran out of patience, increasing train batch size to: 2048
Epoch 27


RuntimeError: cuda runtime error (2) : out of memory at /pytorch/torch/lib/THC/generic/THCStorage.cu:58

In [None]:
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize
from skopt.utils import use_named_args

# The list of hyper-parameters we want to optimize. For each one we define the bounds,
# the corresponding scikit-learn parameter name, as well as how to sample values
# from that dimension (`'log-uniform'` for the learning rate)
space  = [Real(.1, 1, 'log-uniform', name='learning_rate'),
          Real(.1, 1, name='margin'),
          Real(0, 0.9, name='dropout_relation'),
          Real(0, 0.9, name='dropout_text'),
          Integer(1, 2, name='n_layers_relation')]

train = use_named_args(space)(train)

results_gp = gp_minimize(train, space, n_calls=50, random_state=123)
print('Best Accuracy: %.4f' % results_gp.fun)

# Evaluate

## Baseline Evaluation

Play around with the evaluation of the model.

In [None]:
from collections import defaultdict
from lib.checkpoint import Checkpoint

model = make_model()
model.train(mode=False)
examples = defaultdict(list)
dev_iterator = make_dev_iterator()
for (question, relation, relation_word, batch) in tqdm_notebook(dev_iterator):
    output = model(cuda_async(question), cuda_async(relation), cuda_async(relation_word))
    output = output.data.cpu()

    for i, row in enumerate(batch):
        examples[row['Example ID']].append({
            'Score': output[i],
            'Question': text_encoder.decode(row['Question']),
            'True Relation': relation_encoder.decode(row['True Relation']),
            'Relation': relation_encoder.decode(row['Relation'])
        })

# Print metrics
correct = 0
for id_, pool in sorted(examples.items(), key=lambda item: item[0]):
    max_relation = max(pool, key=lambda p: p['Score'])
    if max_relation['Relation'] == max_relation['True Relation']:
        correct += 1
print('Accuracy: %f [%d of %d]' % (correct / len(examples), correct, len(examples)))
print()

## End-to-End Evaluation

Evaluate similar to our end-to-end implementation. This shows that the order of execution matters.

In [26]:
import torch
from torch.autograd import Variable
from lib.utils import pad_batch

cuda = lambda v: v.cuda() if torch.cuda.is_available() else t
to_variable = lambda b: cuda(Variable(torch.stack(b).t_().contiguous(), volatile=True))
    
def get_relation_scores(model, questions, relations):
    questions = [q for q in questions]
    questions_encoded, _ = pad_batch([text_encoder.encode(q) for q in questions])
    relations_encoded, _ = pad_batch([relation_encoder.encode(r) for r in relations])
    relations_word_encoded, _ = pad_batch([relation_word_encoder.encode(r) for r in relations])

    questions_encoded = to_variable(questions_encoded)
    relations_encoded = to_variable(relations_encoded)
    relations_word_encoded = to_variable(relations_word_encoded)

    ret = model(questions_encoded, relations_encoded, relations_word_encoded)
    ret = ret.data.cpu().tolist()
    return ret

# To test this cell
question = 'where was Obama born ?'
print('Question:', question)
# print('Scores:')
model = make_model()
print(get_relation_scores(model, [question], ['people/person/place_of_birth']))
print(get_relation_scores(model, [question], ['location/location/people_born_here']))
model = None

Question: where was Obama born ?


RuntimeError: cuda runtime error (59) : device-side assert triggered at /pytorch/torch/lib/THC/generic/THCTensorCopy.c:20

In [None]:
from tqdm import tqdm_notebook
import pandas as pd

relation_correct = 0
df = pd.read_table('../../data/relation_ranking/dev.txt', 
                     header=None, names=['True Relation', 'Relation Pool', 'Question', 'Entity'])
model = make_model()
for i, row in tqdm_notebook(df.iterrows(), total=df.shape[0]):
    candidate_relations = set(row['Relation Pool'].split())
    true_relation = row['True Relation']
    question = row['Question'].strip()
    questions, relations = zip(*[(question, r) for r in candidate_relations])
    scores = get_relation_scores(model, questions, relations)
    max_relation = max([(r, i) for i, r in enumerate(candidate_relations)],
                       key=lambda item: float(scores[item[1]]))[0]
    
    if max_relation == true_relation:
        relation_correct += 1

print('Relation Accuracy (SOTA 88.4%%): %f [%d of %d]' %
          (relation_correct / df.shape[0], relation_correct, df.shape[0]))