In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

from metal.mmtl.utils.utils import stack_batches
from pytorch_pretrained_bert import BertTokenizer
from metal.mmtl.metal_model import MetalModel
from metal.mmtl.scorer import Scorer
from metal.mmtl.bert_tasks import create_tasks
from metal.mmtl.trainer import MultitaskTrainer

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


### Config

In [3]:
bert_model = 'bert-base-uncased'
bert_model_output_shape = 768
max_len = 200
batch_size = 16
split_prop = 0.8
max_datapoints = 200

In [4]:
tasks = create_tasks(
    ['QNLIR'],
    bert_model,
    split_prop=split_prop,
    max_len=max_len,
    dl_kwargs={'batch_size': batch_size},
    bert_kwargs={},
    bert_output_dim=bert_model_output_shape,
    max_datapoints=max_datapoints,
)

Loading QNLIR Dataset
/dfs/scratch0/bradenjh/glue/QNLIR/train.tsv



/dfs/scratch0/bradenjh/glue/QNLI/dev.tsv





Visualize some examples:

In [5]:
tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=True)
xs = []
ys = []
for x, y in tasks[0].data_loaders['valid']:
    break
#y

In [6]:
for tokens in x[0][:6]:
    print(' '.join(tokenizer.convert_ids_to_tokens(tokens.numpy())).replace('[PAD]', ''))

[CLS] as ##tp stands for what ? [SEP] the module was also necessary as an air ##lock to allow the men to visit each other ' s craft , which had incompatible cabin atmosphere ##s . [SEP]                                              
[CLS] as ##tp stands for what ? [SEP] the two nations planned a joint mission to dock the last us apollo craft with a soyuz , known as the apollo - soyuz test project ( as ##tp ) . [SEP]                                           
[CLS] " " " compensation " " is the name of the system used for what type of elevators ? " [SEP] elevators with more than 30 m ( 98 ft ) of travel have a system called compensation . [SEP]                                          
[CLS] " " " compensation " " is the name of the system used for what type of elevators ? " [SEP] this makes it easier to control the elevator , as it compensate ##s for the differing weight of cable between the ho ##ist and the cab . [SEP]                                 
[CLS] a wrestler may have a small 

In [7]:
model = MetalModel(tasks, verbose=False)

In [8]:
for (X, Y) in tasks[0].data_loaders['valid']:
    preds = model.forward(X, ['QNLIR'])
    break

In [None]:
preds

{'QNLIR': tensor([[ 0.5097],
         [ 0.2278],
         [ 0.2198],
         [ 0.1711],
         [ 0.8092],
         [ 0.4342],
         [ 0.2874],
         [ 0.5476],
         [ 0.3099],
         [ 0.8457],
         [ 0.7228],
         [ 0.2499],
         [ 1.0143],
         [ 0.6257],
         [-0.0940],
         [ 0.2779]], device='cuda:0', grad_fn=<MmBackward>)}

In [None]:
trainer = MultitaskTrainer()
trainer.train_model(
    model,
    tasks,
    lr=10e-5,
    n_epochs=1,
    checkpoint_metric='QNLIR/valid/accuracy',
    checkpoint_metric_mode="max",
    checkpoint_dir=f"{os.environ['METALHOME']}/checkpoints/qnli_single")

Beginning train loop.
Expecting a total of _approximately_ 320 examples and 20 batches per epoch from 1 tasks.


  warn("Attempting to work in a virtualenv. If you encounter problems, please "


Python 3.6.7 | packaged by conda-forge | (default, Nov 21 2018, 03:09:43) 
Type 'copyright', 'credits' or 'license' for more information
IPython 6.2.1 -- An enhanced Interactive Python. Type '?' for help.

In [1]: gold
Out[1]: 
array([2, 1, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1,
       2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 1, 2,
       1, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1,
       2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2])

In [2]: gold = (1-gold) + 1

In [3]: gold
Out[3]: 
array([0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1,
       0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0,
       1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
       0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0])

In [4]: outputs
Out[4]: 
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 

In [12]:
for (X, Y) in tasks[0].data_loaders['valid']:
    print(model(X, ['QNLIR']))

{'QNLIR': tensor([[ 0.7284],
        [-2.5780],
        [-1.9244],
        [ 0.7577],
        [ 0.5481],
        [-0.9372],
        [ 0.8391],
        [ 0.3534],
        [-1.4872],
        [ 0.8267],
        [ 0.8482],
        [ 0.6817],
        [-2.5276],
        [ 0.8168],
        [-1.4762],
        [ 0.8052]], device='cuda:0', grad_fn=<MmBackward>)}
{'QNLIR': tensor([[ 0.6975],
        [-0.2452],
        [ 0.5789],
        [ 0.5313],
        [ 0.6291],
        [ 0.3803],
        [ 0.2026],
        [ 0.7577],
        [ 0.6447],
        [ 0.2175],
        [-2.4415],
        [ 0.7474],
        [-0.1609],
        [ 0.8786],
        [ 0.1403],
        [ 0.6800]], device='cuda:0', grad_fn=<MmBackward>)}
{'QNLIR': tensor([[ 0.6214],
        [ 0.7674],
        [ 0.0053],
        [ 0.8215],
        [ 0.8320],
        [-0.5162],
        [ 0.6998],
        [ 0.1841],
        [ 0.7991],
        [-0.0138],
        [ 0.8508],
        [-1.4799],
        [ 0.2942],
        [ 0.6527],
        [-0.49

In [None]:
for (X, Y) in tasks[0].data_loaders['valid']:
    X = [x.cuda() for x in X]
    print(model(X, ['QNLIR']))
    print(model.calculate_loss(X, Y.cuda(), ['QNLIR']))    
    print(model.calculate_output(X, ['QNLIR']))    
    break