# Relation Ranking Yu et al. 2017 Model

Our goal here to to reimplement Yu et al. 2017 93% relation model. 

First things first, set up the initial configuration.

In [1]:
import sys
print('Python Version:', sys.version)
import pandas as pd
import logging
sys.path.insert(0, '../../')

from lib.utils import setup_training

# Create root logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 80)

random_seed = 123
device = 0
is_cuda, _ = setup_training(device, random_seed) 
# Async minibatch allocation for speed
# Reference: http://timdettmers.com/2015/03/09/deep-learning-hardware-guide/
# TODO: look into cuda_async device=device
cuda_async = lambda t: t.cuda(device=device, async=True) if is_cuda else t  # Use with tensors
cuda = lambda t: t.cuda(device=device) if is_cuda else t  # Use with nn.modules

Python Version: 3.6.4 (default, Dec 19 2017, 17:29:45) 
[GCC 5.4.0 20160609]


## Dataset

Load our dataset. Log a couple rows.

In [2]:
import os
from tqdm import tqdm_notebook

from lib.datasets.dataset import Dataset

def yu_dataset(directory='../../data/yu/',
               train=False,
               dev=False,
               test=False,
               train_filename='train.replace_ne.withpool',
               dev_filename='valid.replace_ne.withpool',
               test_filename='test.replace_ne.withpool',
               vocab_filename='relation.2M.list'):
    """
    Example line example: 40	61 40 117	which genre of album is #head_entity# ?
    Vocab example: /film/film/genre
    
    Sample Data:
        Question: 'which genre of album is #head_entity# ?'
        True Relation: '/music/album/genre'
        False Relation Pool: ['/music/album/release_type', '/music/album/genre', '/music/album/artist']
    """
    vocab_path = os.path.join(directory, vocab_filename)
    vocab = [l.strip() for l in open(vocab_path, 'r')]
    
    ret = []
    datasets = [(train, train_filename), (dev, dev_filename), (test, test_filename)]
    for is_requested, filename in datasets:
        if not is_requested:
            continue
            
        file_path = os.path.join(directory, filename)
        data = pd.read_table(file_path, header=None, names=['True Relation', 'Relation Pool', 'Question'])
        rows = []
        for i, row in tqdm_notebook(data.iterrows(), total=data.shape[0]):
            if row['Relation Pool'].strip() == 'noNegativeAnswer':
                continue
            relation_pool = [vocab[int(i) - 1].strip('/') for i in row['Relation Pool'].split()]
            true_relation = vocab[int(row['True Relation']) - 1].strip('/')
            question = row['Question'].strip()
            # Development and test set may or may not have the True relation based on our predicted pool
            if filename == train_filename:
                assert true_relation not in relation_pool
                
            for relation in relation_pool:
                if filename == train_filename:
                    rows.append({'Question': question,
                                 'True Relation': true_relation,
                                 'False Relation': relation,
                                 'Example ID': i})
                else:
                    rows.append({'Question': question,
                                 'True Relation': true_relation,
                                 'Relation': relation,
                                 'Example ID': i})
        ret.append(Dataset(rows))

    if len(ret) == 1:
        return ret[0]
    else:
        return tuple(ret)

In [3]:
import os
from tqdm import tqdm_notebook

from lib.datasets.dataset import Dataset

def relation_ranking_dataset(directory='../../data/relation_ranking/',
               train=False,
               dev=False,
               test=False,
               train_filename='train.txt',
               dev_filename='dev.txt',
               test_filename=''):
    """
    Example line example: 
        film/film/country	film/film/country film/film/genre film/film/language	what country is <e> from ?
    Vocab example: 
        /film/film/genre
    
    Sample Data:
        Question: 'which genre of album is #head_entity# ?'
        True Relation: '/music/album/genre'
        False Relation Pool: ['/music/album/release_type', '/music/album/genre', '/music/album/artist']
    """
    ret = []
    datasets = [(train, train_filename), (dev, dev_filename), (test, test_filename)]
    for is_requested, filename in datasets:
        if not is_requested:
            continue
            
        file_path = os.path.join(directory, filename)
        file = open(file_path)
        lines = [tuple(l.split('\t')) for l in file]
        rows = []
        for i, (true_relation, relation_pool, question, entity) in enumerate(lines):
            relation_pool = set(relation_pool.split())
            true_relation = true_relation
            question = question.strip()
            entity = entity.strip()
            
            # Development and test set may or may not have the True relation based on our predicted pool
            if filename == train_filename:
                relation_pool.remove(true_relation)
                relation_pool = list(relation_pool)
                
            for relation in relation_pool:
                if filename == train_filename:
                    rows.append({'Question': question,
                                 'Entity': entity,
                                 'True Relation': true_relation,
                                 'False Relation': relation,
                                 'Example ID': i})
                else:
                    rows.append({'Question': question,
                                 'Entity': entity,
                                 'True Relation': true_relation,
                                 'Relation': relation,
                                 'Example ID': i})
        ret.append(Dataset(rows))

    if len(ret) == 1:
        return ret[0]
    else:
        return tuple(ret)

In [4]:
from IPython.display import display

train_dataset, dev_dataset = relation_ranking_dataset(train=True, dev=True)

print('Num Training Data: %d' % len(train_dataset))
print('Train Sample:')
display(pd.DataFrame(train_dataset[:5]))
print('\nNum Development Data: %d' % len(dev_dataset))
print('Development Sample:')
display(pd.DataFrame(dev_dataset[:5]))

Num Training Data: 1312665
Train Sample:


Unnamed: 0,Entity,Example ID,False Relation,Question,True Relation
0,e,0,freebase/valuenotation/has_no_value,what is the book <e> about,book/written_work/subjects
1,e,0,music/album/release_type,what is the book <e> about,book/written_work/subjects
2,e,0,film/music_contributor/film,what is the book <e> about,book/written_work/subjects
3,e,0,award/award_nominee/award_nominations,what is the book <e> about,book/written_work/subjects
4,e,0,music/artist/origin,what is the book <e> about,book/written_work/subjects



Num Development Data: 196297
Development Sample:


Unnamed: 0,Entity,Example ID,Question,Relation,True Relation
0,american,0,name an <e> thoroughbread racehorse,book/book_subject/works,biology/organism_classification/organisms_of_this_type
1,american,0,name an <e> thoroughbread racehorse,dining/cuisine/chefs,biology/organism_classification/organisms_of_this_type
2,american,0,name an <e> thoroughbread racehorse,dining/cuisine/ingredients,biology/organism_classification/organisms_of_this_type
3,american,0,name an <e> thoroughbread racehorse,dataworld/gardening_hint/split_to,biology/organism_classification/organisms_of_this_type
4,american,0,name an <e> thoroughbread racehorse,people/ethnicity/geographic_distribution,biology/organism_classification/organisms_of_this_type


## Load Checkpoint

In [5]:
from lib.checkpoint import Checkpoint

# Load a checkpoint
checkpoint_path = None # '../../results/0000.01-22_09:00:26.yu_relation_model/01m_22d_09h_35m_44s.pt'
if checkpoint_path is not None:
    checkpoint = Checkpoint(checkpoint_path, device=0)
else:
    checkpoint = None

## Encode Text

Here we encode our data into a numerical format. 

In [6]:
from IPython.display import display
import re

from lib.text_encoders import StaticTokenizerEncoder
from lib.text_encoders import DelimiterEncoder
from lib.text_encoders import WordEncoder

# We add development dataset to text_encoder for embeddings
# We make sure not to use the the development dataset to provide us with any vocab optimizations or learning
if checkpoint is None:
    text_encoder = WordEncoder(train_dataset['Question'] + dev_dataset['Question'], lower=True, append_eos=False)
    print('Text encoder vocab size: %d' % text_encoder.vocab_size)
    
    entity_encoder = WordEncoder(train_dataset['Entity'] + dev_dataset['Entity'], lower=True, append_eos=False)
    print('Entity encoder vocab size: %d' % entity_encoder.vocab_size)

    relations = set(train_dataset['True Relation'] + train_dataset['False Relation'])
    relation_word_encoder = StaticTokenizerEncoder(relations, tokenize=lambda s: re.split('/|_', s))
    print('Relation word encoder vocab size: %d' % relation_word_encoder.vocab_size)

    relation_encoder = DelimiterEncoder('/', relations)
    print('Relation encoder vocab size: %d' % relation_encoder.vocab_size)
else:
    relation_word_encoder = checkpoint.relation_word_encoder
    relation_encoder = checkpoint.relation_encoder
    text_encoder = checkpoint.text_encoder
    entity_encoder = checkpoint.entity_encoder

for dataset in [train_dataset, dev_dataset]:
    for row in dataset:
        row['Question'] = text_encoder.encode(row['Question'])
        row['Entity'] = entity_encoder.encode(row['Entity'])
        row['True Relation Word'] = relation_word_encoder.encode(row['True Relation'])
        row['True Relation'] = relation_encoder.encode(row['True Relation'])
        
        if 'False Relation' in row:
            row['False Relation Word'] = relation_word_encoder.encode(row['False Relation'])
            row['False Relation'] = relation_encoder.encode(row['False Relation'])

        if 'Relation' in row:
            row['Relation Word'] = relation_word_encoder.encode(row['Relation'])
            row['Relation'] = relation_encoder.encode(row['Relation'])
            

print('Train Sample:')
display(pd.DataFrame(train_dataset[:5]))
print('Development Sample:')
display(pd.DataFrame(dev_dataset[:5]))

Text encoder vocab size: 6428
Entity encoder vocab size: 48079
Relation word encoder vocab size: 3390
Relation encoder vocab size: 5268
Train Sample:


Unnamed: 0,Entity,Example ID,False Relation,False Relation Word,Question,True Relation,True Relation Word
0,[28515],0,"[2855, 1716, 1447]","[1828, 1111, 1707, 1560, 2850]","[6319, 4426, 4139, 4484, 6352, 1412]","[3970, 2064, 2126]","[2529, 2891, 646, 1368]"
1,[28515],0,"[3403, 1470, 2342]","[2185, 942, 3357, 3049]","[6319, 4426, 4139, 4484, 6352, 1412]","[3970, 2064, 2126]","[2529, 2891, 646, 1368]"
2,[28515],0,"[5106, 2782, 5106]","[3281, 2185, 1125, 3281]","[6319, 4426, 4139, 4484, 6352, 1412]","[3970, 2064, 2126]","[2529, 2891, 646, 1368]"
3,[28515],0,"[1194, 4742, 1416]","[751, 751, 3111, 751, 2584]","[6319, 4426, 4139, 4484, 6352, 1412]","[3970, 2064, 2126]","[2529, 2891, 646, 1368]"
4,[28515],0,"[3403, 2668, 1325]","[2185, 1725, 835]","[6319, 4426, 4139, 4484, 6352, 1412]","[3970, 2064, 2126]","[2529, 2891, 646, 1368]"


Development Sample:


Unnamed: 0,Entity,Example ID,Question,Relation,Relation Word,True Relation,True Relation Word
0,[47311],0,"[458, 3728, 6352, 4706, 5671]","[3970, 4887, 1203]","[2529, 2529, 2029, 759]","[3366, 2803, 1887]","[2155, 542, 1676, 1510, 2275, 2618, 3049]"
1,[47311],0,"[458, 3728, 6352, 4706, 5671]","[2825, 2939, 125]","[1810, 1882, 74]","[3366, 2803, 1887]","[2155, 542, 1676, 1510, 2275, 2618, 3049]"
2,[47311],0,"[458, 3728, 6352, 4706, 5671]","[2825, 2939, 1544]","[1810, 1882, 984]","[3366, 2803, 1887]","[2155, 542, 1676, 1510, 2275, 2618, 3049]"
3,[47311],0,"[458, 3728, 6352, 4706, 5671]","[2405, 3670, 1927]","[1552, 589, 1704, 618, 1332]","[3366, 2803, 1887]","[2155, 542, 1676, 1510, 2275, 2618, 3049]"
4,[47311],0,"[458, 3728, 6352, 4706, 5671]","[4675, 4928, 919]","[2991, 3169, 1108, 2670]","[3366, 2803, 1887]","[2155, 542, 1676, 1510, 2275, 2618, 3049]"


## Dataset Iterators

Define functions to create iterators over the development and the train dataset for each epoch.

In [7]:
from functools import partial

import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader

from lib.utils import pad_batch
from lib.samplers import BucketBatchSampler
from lib.samplers import SortedSampler


# Defines how to combine a batch of rows into a tensor
def collate_fn(batch, train=True):
    """ list of tensors to a batch variable """
    question_batch, _ = pad_batch([row['Question'] for row in batch])
    entity_batch, _ = pad_batch([row['Entity'] for row in batch])

    # PyTorch RNN requires batches to be transposed for speed and integration with CUDA
    to_variable = (lambda b: Variable(torch.stack(b).t_().contiguous(), volatile=not train))

    if train:
        true_relation_word_batch, _ = pad_batch([row['True Relation Word'] for row in batch])
        true_relation_batch, _ = pad_batch([row['True Relation'] for row in batch])
        false_relation_word_batch, _ = pad_batch([row['False Relation Word'] for row in batch])
        false_relation_batch, _ = pad_batch([row['False Relation'] for row in batch])
        return (to_variable(question_batch), to_variable(entity_batch), to_variable(true_relation_batch),
                to_variable(true_relation_word_batch), to_variable(false_relation_batch),
                to_variable(false_relation_word_batch))
    else:
        relation_word_batch, _ = pad_batch([row['Relation Word'] for row in batch])
        relation_batch, _ = pad_batch([row['Relation'] for row in batch])
        return (to_variable(question_batch), to_variable(entity_batch), to_variable(relation_batch),
                to_variable(relation_word_batch), batch)


def make_train_iterator(train_batch_size):
    # Use bucket sampling to group similar sized text but with noise + random
    sort_key = lambda r: r['Entity'].size()[0]
    batch_sampler = BucketBatchSampler(train_dataset, sort_key, train_batch_size)
    return DataLoader(
        train_dataset,
        batch_sampler=batch_sampler,
        collate_fn=collate_fn,
        pin_memory=is_cuda,
        num_workers=0)


def make_dev_iterator(dev_batch_size):
    # Group together all examples for metrics and sort questions of similar sizes for speed
    sort_key = lambda r: (r['Entity'].size()[0], r['Example ID'])
    return DataLoader(
        dev_dataset,
        batch_size=dev_batch_size,
        sampler=SortedSampler(dev_dataset, sort_key, sort_noise=0.0),
        collate_fn=partial(collate_fn, train=False),
        pin_memory=is_cuda,
        num_workers=0)


# Just to make sure everything runs
train_iterator_test = make_train_iterator(512)
dev_iterator_test = make_dev_iterator(512)
# Clear memory
train_iterator_test = None
dev_iterator_test = None

# Loss

Instantiate the loss.

In [8]:
from torch.nn.modules.loss import MarginRankingLoss

# QUESTION: Is there a better margin? or wrose?
criterion = cuda(MarginRankingLoss(margin=0.5))

# Model

Instantiate the model.

In [9]:
import torch
from lib.pretrained_embeddings import FastText

# Load embeddings
if checkpoint is None:
    unk_init = lambda t: torch.FloatTensor(t).uniform_(-0.1, 0.1)
    pretrained_embedding = FastText(language='en', cache='./../../.pretrained_embeddings_cache')
    text_embedding_weights = torch.Tensor(text_encoder.vocab_size, pretrained_embedding.dim)
    for i, token in enumerate(text_encoder.vocab):
        text_embedding_weights[i] = pretrained_embedding[token]
    relation_word_embedding_weights = torch.Tensor(relation_word_encoder.vocab_size, pretrained_embedding.dim)
    for i, token in enumerate(relation_word_encoder.vocab):
        relation_word_embedding_weights[i] = pretrained_embedding[token]
    entity_embedding_weights = torch.Tensor(entity_encoder.vocab_size, pretrained_embedding.dim)
    for i, token in enumerate(entity_encoder.vocab):
        entity_embedding_weights[i] = pretrained_embedding[token]
    pretrained_embedding = None # Clear memory

In [10]:
import copy
from lib.nn import YuModelEntity

def make_model():
    if checkpoint is None:
        model = YuModelEntity(relation_encoder.vocab_size,
                              relation_word_encoder.vocab_size, entity_encoder.vocab_size, 
                              text_encoder.vocab_size)
        for param in model.parameters():
            param.data.uniform_(-0.1, 0.1)

        model.text_embedding.weight.data.copy_(text_embedding_weights)
        model.relation_word_embedding.weight.data.copy_(relation_word_embedding_weights)
        model.entity_embedding.weight.data.copy_(entity_embedding_weights)
        
        freeze_embeddings = True
        model.text_embedding.weight.requires_grad = not freeze_embeddings
        model.relation_word_embedding.weight.requires_grad = not freeze_embeddings
        model.entity_embedding.weight.requires_grad = not freeze_embeddings
        
        cuda(model)
        return model
    else:
        model = checkpoint.model
        model = copy.deepcopy(model)
        cuda(model)
        model.relation_word_rnn.flatten_parameters()
        model.text_rnn_layer_one.flatten_parameters()
        model.text_rnn_layer_two.flatten_parameters()
        model.relation_rnn.flatten_parameters()
        model.entity_rnn.flatten_parameters()
        return model

# Test that making the model works
model_test = make_model()
model_test = None # Clear memory

## Gradient Descent Optimizer 

Instantiate the gradient descent optimizer.

In [11]:
from torch.optim import SGD

from lib.optim import Optimizer

# https://github.com/pytorch/pytorch/issues/679
def make_optimizer(model):
    params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = Optimizer(SGD(params=params, lr=1))
    return optimizer

## Training Loop

Below here, we do a training loop over a number of epochs.

In [16]:
from lib.utils import get_total_parameters
from lib.utils import get_log_directory_path

epochs = 1000
patience = 2
train_batch_size = 128
train_max_batch_size = 2048
dev_batch_size = 4096
log_directory = get_log_directory_path('yu_relation_model')
model = make_model()
optimizer = make_optimizer(model)

print('Log Directory: %s' % log_directory)
print('Devevelopment Batch Size: %s' % dev_batch_size)
print('Train Batch Size: %s' % train_batch_size)
print('Epochs: %s' % epochs)
print('Total Parameters: %d' % get_total_parameters(model))
print('Model:\n%s' % model)

Log Directory: logs/0000.01-22_22:10:38.yu_relation_model
Devevelopment Batch Size: 4096
Train Batch Size: 128
Epochs: 1000
Total Parameters: 5756400
Model:
YuModelEntity(
  (relation_embedding): Embedding(5268, 300, padding_idx=0)
  (relation_word_embedding): Embedding(3390, 300, padding_idx=0)
  (relation_word_rnn): LSTM(300, 200, bidirectional=True)
  (relation_rnn): LSTM(300, 200, bidirectional=True)
  (entity_embedding): Embedding(48079, 300, padding_idx=0)
  (entity_rnn): LSTM(300, 200, bidirectional=True)
  (text_embedding): Embedding(6428, 300, padding_idx=0)
  (text_rnn_layer_one): LSTM(300, 200, bidirectional=True)
  (text_rnn_layer_two): LSTM(400, 200, bidirectional=True)
  (distance): CosineSimilarity(
  )
)


In [17]:
from tqdm import tqdm_notebook
from collections import defaultdict

from lib.checkpoint import Checkpoint

n_bad_epochs = 0
last_accuracy = 0

# Train!
for epoch in range(epochs):
    print('Epoch %d' % epoch)

    # Iterate over the training data
    model.train(mode=True)
    train_iterator = make_train_iterator(train_batch_size)
    for (question, entity, true_relation, true_relation_word, false_relation,
         false_relation_word) in tqdm_notebook(train_iterator):
        optimizer.zero_grad()
        output_true = model(
            cuda_async(question), cuda_async(entity), cuda_async(true_relation), cuda_async(true_relation_word))
        output_false = model(
            cuda_async(question), cuda_async(entity), cuda_async(false_relation), cuda_async(false_relation_word))
        labels = cuda(Variable(torch.ones(output_true.size()[0])))
        loss = criterion(output_true, output_false, labels)

        # Backward propagation
        loss.backward()
        optimizer.step()

    # Save checkpoint
    print('Saved Checkpoint:', Checkpoint.save(
        log_directory, {
            'model': model,
            'relation_word_encoder': relation_word_encoder,
            'relation_encoder': relation_encoder,
            'text_encoder': text_encoder
        },
        device=device))

    # Evaluate
    model.train(mode=False)
    examples = defaultdict(list)
    dev_iterator = make_dev_iterator(dev_batch_size)
    for (question, entity, relation, relation_word, batch) in tqdm_notebook(dev_iterator):
        output = model(cuda_async(question), cuda_async(entity), cuda_async(relation), cuda_async(relation_word))
        output = output.data.cpu()

        for i, row in enumerate(batch):
            examples[row['Example ID']].append({
                'Score': output[i],
                'Question': row['Question'],
                'True Relation': row['True Relation'],
                'Relation': row['Relation']
            })

    # Print metrics
    correct = 0
    for pool in examples.values():
        max_relation = max(pool, key=lambda p: p['Score'])
        if max_relation['Relation'].tolist() == max_relation['True Relation'].tolist():
            correct += 1
    accuracy = correct / len(examples)
    print('Accuracy: %f [%d of %d]' % (accuracy, correct, len(examples)))
    print()
        
    # Scheduler for increasing batch_size inspired by this paper:
    # https://openreview.net/forum?id=B1Yy1BxCZ
    if last_accuracy > accuracy:
        n_bad_epochs += 1
    elif accuracy > last_accuracy:
        n_bad_epochs = 0
    
    if n_bad_epochs > patience:
        train_batch_size = min(train_max_batch_size, train_batch_size * 2)
        print('Ran out of patience, increasing train batch size to:', train_batch_size)
        n_bad_epochs = 0
        
    last_accuracy = accuracy

Epoch 0



Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_22h_15m_43s.pt


Accuracy: 0.850185 [9216 of 10840]

Epoch 1


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_22h_21m_11s.pt


Accuracy: 0.860886 [9332 of 10840]

Epoch 2


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_22h_26m_24s.pt


Accuracy: 0.870387 [9435 of 10840]

Epoch 3


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_22h_31m_54s.pt


Accuracy: 0.869742 [9428 of 10840]

Epoch 4


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_22h_37m_11s.pt


Accuracy: 0.872878 [9462 of 10840]

Epoch 5


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_22h_42m_21s.pt


Accuracy: 0.873708 [9471 of 10840]

Epoch 6


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_22h_48m_36s.pt


Accuracy: 0.875830 [9494 of 10840]

Epoch 7


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_22h_55m_01s.pt


Accuracy: 0.874446 [9479 of 10840]

Epoch 8


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_23h_00m_16s.pt


Accuracy: 0.876753 [9504 of 10840]

Epoch 9


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_23h_05m_19s.pt


Accuracy: 0.874354 [9478 of 10840]

Epoch 10


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_23h_10m_20s.pt


Accuracy: 0.875923 [9495 of 10840]

Epoch 11


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_23h_15m_22s.pt


Accuracy: 0.878690 [9525 of 10840]

Epoch 12


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_23h_20m_23s.pt


Accuracy: 0.880351 [9543 of 10840]

Epoch 13


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_23h_25m_22s.pt


Accuracy: 0.878137 [9519 of 10840]

Epoch 14


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_23h_30m_21s.pt


Accuracy: 0.877491 [9512 of 10840]

Epoch 15


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_23h_35m_21s.pt


Accuracy: 0.880443 [9544 of 10840]

Epoch 16


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_23h_40m_20s.pt


Accuracy: 0.879151 [9530 of 10840]

Epoch 17


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_23h_45m_19s.pt


Accuracy: 0.878690 [9525 of 10840]

Epoch 18


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_23h_50m_17s.pt


Accuracy: 0.880443 [9544 of 10840]

Epoch 19


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_22d_23h_55m_16s.pt


Accuracy: 0.879244 [9531 of 10840]

Epoch 20


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_00m_15s.pt


Accuracy: 0.878967 [9528 of 10840]

Epoch 21


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_05m_13s.pt


Accuracy: 0.878967 [9528 of 10840]

Epoch 22


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_10m_13s.pt


Accuracy: 0.880720 [9547 of 10840]

Epoch 23


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_15m_12s.pt


Accuracy: 0.881273 [9553 of 10840]

Epoch 24


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_20m_10s.pt


Accuracy: 0.879244 [9531 of 10840]

Epoch 25


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_25m_08s.pt


Accuracy: 0.878875 [9527 of 10840]

Epoch 26


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_30m_07s.pt


Accuracy: 0.878782 [9526 of 10840]

Ran out of patience, increasing train batch size to: 256
Epoch 27


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_33m_24s.pt


Accuracy: 0.879059 [9529 of 10840]

Epoch 28


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_36m_41s.pt


Accuracy: 0.879889 [9538 of 10840]

Epoch 29


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_39m_59s.pt


Accuracy: 0.879889 [9538 of 10840]

Epoch 30


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_43m_17s.pt


Accuracy: 0.880074 [9540 of 10840]

Epoch 31


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_46m_33s.pt


Accuracy: 0.880812 [9548 of 10840]

Epoch 32


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_49m_51s.pt


Accuracy: 0.880720 [9547 of 10840]

Epoch 33


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_53m_08s.pt


Accuracy: 0.879705 [9536 of 10840]

Epoch 34


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_56m_25s.pt


Accuracy: 0.879613 [9535 of 10840]

Ran out of patience, increasing train batch size to: 512
Epoch 35


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_00h_59m_11s.pt


Accuracy: 0.880627 [9546 of 10840]

Epoch 36


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_01m_56s.pt


Accuracy: 0.880074 [9540 of 10840]

Epoch 37


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_04m_42s.pt


Accuracy: 0.881089 [9551 of 10840]

Epoch 38


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_07m_28s.pt


Accuracy: 0.881181 [9552 of 10840]

Epoch 39


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_10m_13s.pt


Accuracy: 0.881089 [9551 of 10840]

Epoch 40


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_12m_57s.pt


Accuracy: 0.880627 [9546 of 10840]

Epoch 41


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_15m_42s.pt


Accuracy: 0.881273 [9553 of 10840]

Epoch 42


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_18m_28s.pt


Accuracy: 0.879889 [9538 of 10840]

Epoch 43


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_21m_14s.pt


Accuracy: 0.880904 [9549 of 10840]

Epoch 44


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_23m_59s.pt


Accuracy: 0.880443 [9544 of 10840]

Epoch 45


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_26m_45s.pt


Accuracy: 0.880166 [9541 of 10840]

Epoch 46


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_29m_30s.pt


Accuracy: 0.879705 [9536 of 10840]

Ran out of patience, increasing train batch size to: 1024
Epoch 47


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_32m_05s.pt


Accuracy: 0.881458 [9555 of 10840]

Epoch 48


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_34m_39s.pt


Accuracy: 0.879797 [9537 of 10840]

Epoch 49


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_37m_14s.pt


Accuracy: 0.880535 [9545 of 10840]

Epoch 50


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_39m_49s.pt


Accuracy: 0.881550 [9556 of 10840]

Epoch 51


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_42m_23s.pt


Accuracy: 0.880904 [9549 of 10840]

Epoch 52


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_44m_58s.pt


Accuracy: 0.879705 [9536 of 10840]

Epoch 53


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_47m_33s.pt


Accuracy: 0.879520 [9534 of 10840]

Ran out of patience, increasing train batch size to: 2048
Epoch 54


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_50m_14s.pt


Accuracy: 0.880535 [9545 of 10840]

Epoch 55


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_52m_56s.pt


Accuracy: 0.880166 [9541 of 10840]

Epoch 56


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_55m_38s.pt


Accuracy: 0.880904 [9549 of 10840]

Epoch 57


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_01h_58m_20s.pt


Accuracy: 0.880166 [9541 of 10840]

Epoch 58


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_02h_01m_02s.pt


Accuracy: 0.880166 [9541 of 10840]

Epoch 59


Saved Checkpoint: logs/0000.01-22_22:10:38.yu_relation_model/01m_23d_02h_03m_45s.pt


Accuracy: 0.880535 [9545 of 10840]

Epoch 60


RuntimeError: cuda runtime error (2) : out of memory at /pytorch/torch/lib/THC/generic/THCStorage.cu:58

# Evaluate

Play around with the evaluation of the model.

In [None]:
from collections import defaultdict
from lib.checkpoint import Checkpoint

model = make_model()
model.train(mode=False)
examples = defaultdict(list)
dev_iterator = make_dev_iterator()
for (question, relation, relation_word, batch) in tqdm_notebook(dev_iterator):
    output = model(cuda_async(question), cuda_async(relation), cuda_async(relation_word))
    output = output.data.cpu()

    for i, row in enumerate(batch):
        examples[row['Example ID']].append({
            'Score': output[i],
            'Question': text_encoder.decode(row['Question']),
            'True Relation': relation_encoder.decode(row['True Relation']),
            'Relation': relation_encoder.decode(row['Relation'])
        })

# Print metrics
correct = 0
for id_, pool in sorted(examples.items(), key=lambda item: item[0]):
    max_relation = max(pool, key=lambda p: p['Score'])
    if max_relation['Relation'] == max_relation['True Relation']:
        correct += 1
print('Accuracy: %f [%d of %d]' % (correct / len(examples), correct, len(examples)))
print()

Evaluate similar to step 3 of our end-to-end implementation. This shows that the order of execution matters.

In [None]:
import torch
from torch.autograd import Variable
from lib.utils import pad_batch

model = make_model()
cuda = lambda v: v.cuda() if torch.cuda.is_available() else t
to_variable = lambda b: cuda(Variable(torch.stack(b).t_().contiguous(), volatile=True))
    
def get_relation_scores(questions, relations):
    questions = [q for q in questions]
    questions_encoded, _ = pad_batch([text_encoder.encode(q) for q in questions])
    relations_encoded, _ = pad_batch([relation_encoder.encode(r) for r in relations])
    relations_word_encoded, _ = pad_batch([relation_word_encoder.encode(r) for r in relations])

    questions_encoded = to_variable(questions_encoded)
    relations_encoded = to_variable(relations_encoded)
    relations_word_encoded = to_variable(relations_word_encoded)

    return model(questions_encoded, relations_encoded, relations_word_encoded).data

# To test this cell
question = 'where was <e> born ?'
print('Question:', question)
# print('Scores:')
print(get_relation_scores([question], ['people/person/place_of_birth']))
print(get_relation_scores([question], ['location/location/people_born_here']))

In [None]:
from tqdm import tqdm_notebook
import pandas as pd

relation_correct = 0
df = pd.read_table('../../data/relation_ranking/dev.txt', 
                     header=None, names=['True Relation', 'Relation Pool', 'Question'])
for i, row in tqdm_notebook(df.iterrows(), total=df.shape[0]):
    candidate_relations = set(row['Relation Pool'].split())
    true_relation = row['True Relation']
    question = row['Question'].strip()
    questions, relations = zip(*[(question, r) for r in candidate_relations])
    scores = get_relation_scores(questions, relations)
    max_relation = max([(r, i) for i, r in enumerate(candidate_relations)],
                       key=lambda item: float(scores[item[1]]))[0]
    
    if max_relation == true_relation:
        relation_correct += 1

print('Relation Accuracy (SOTA 89%%): %f [%d of %d]' %
          (relation_correct / df.shape[0], relation_correct, df.shape[0]))