In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
from torch.utils.data import DataLoader, TensorDataset, Dataset
import numpy as np
import pandas as pd
import metal
import os
from pytorch_pretrained_bert import BertTokenizer, BertModel

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [3]:
from dataset import BERTDataset

In [4]:
model = 'bert-base-uncased' # also try bert-base-multilingual-cased (recommended)
src_path = os.path.join(os.environ['GLUEDATA'], 'MNLI/{}.tsv')
labels = ["contradiction", "entailment", "neutral"]

dataloaders = {}
for split in ['train', 'dev_matched']: #, 'train', 'test']:
    label_idx = 11 if split in ['train', 'dev_matched'] else -1
    dataset = BERTDataset(
        src_path.format(split),
        sent1_idx=8,
        sent2_idx=9,
        label_idx=label_idx,
        skip_rows=1,
        label_fn=lambda label: labels.index(label)+1,
        max_len=128,
    )
    dataloaders[split] = dataset.get_dataloader(batch_size=32)

100%|██████████| 392702/392702 [03:34<00:00, 1832.05it/s]
100%|██████████| 9815/9815 [00:05<00:00, 1776.25it/s]


In [5]:
import torch.nn as nn
from metal.end_model import EndModel

hidden_dropout_prob = 0.1

class BertEncoder(nn.Module):
    def __init__(self):
        super(BertEncoder, self).__init__()
        self.bert_model = BertModel.from_pretrained('bert-base-uncased')
        self.dropout = nn.Dropout(hidden_dropout_prob)
#         for param in self.bert_model.parameters():
#             param.requires_grad = False

    def forward(self, data):
        tokens, segments, mask = data
        # TODO: check if we should return all layers or just last hidden representation 
        _, hidden_layer = self.bert_model(tokens, segments, mask, output_all_encoded_layers=False)
        hidden_layer = self.dropout(hidden_layer)

        return hidden_layer

In [6]:
encoder_module = BertEncoder()
end_model = EndModel(
    [768, 3],
    input_module=encoder_module,
    seed=123,
    skip_head=False,
    input_relu=False,
    input_batchnorm=False,
    verbose=False,
    device=torch.device("cuda")
)

In [9]:
end_model.train_model(
    dataloaders["train"],
    valid_data=dataloaders["dev_matched"],
    lr=5e-5,
    l2=0,
    n_epochs=5,
#     checkpoint_metric="model/train/loss",
    checkpoint_metric="valid/accuracy",
    log_unit="batches",
    checkpoint_metric_mode="max",
    verbose=True,
    progress_bar=True,
)

Using GPU...


Exception ignored in: <generator object tqdm_notebook.__iter__ at 0x7fbb33af19e8>
Traceback (most recent call last):
  File "/dfs/scratch0/vschen/venv-mmtl/lib/python3.6/site-packages/tqdm/_tqdm_notebook.py", line 226, in __iter__
    self.sp(bar_style='danger')
AttributeError: 'tqdm_notebook' object has no attribute 'sp'


AssertionError: 

In [None]:
# Test end model
end_model.score(dataloaders["dev_matched"], metric=["accuracy", "precision", "recall", "f1"])

In [10]:
for x, y in dataloaders["dev_matched"]:
    print(y)

tensor([2, 1, 2, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 2, 3, 2, 2, 2, 3, 1, 3, 2, 3, 3,
        3, 1, 3, 1, 3, 3, 2, 3])
tensor([1, 2, 2, 3, 1, 2, 2, 1, 3, 1, 3, 1, 1, 1, 2, 3, 2, 2, 3, 1, 3, 3, 1, 3,
        2, 1, 1, 3, 3, 2, 3, 1])
tensor([1, 3, 2, 1, 2, 2, 1, 2, 1, 1, 1, 2, 2, 3, 1, 2, 2, 2, 3, 1, 2, 1, 2, 3,
        2, 3, 3, 3, 1, 2, 2, 3])
tensor([2, 2, 3, 2, 3, 3, 2, 2, 3, 2, 2, 3, 1, 3, 1, 2, 2, 2, 3, 3, 2, 2, 2, 3,
        2, 3, 2, 1, 2, 3, 1, 2])
tensor([2, 2, 2, 3, 2, 1, 2, 2, 2, 1, 2, 3, 3, 2, 3, 3, 1, 3, 2, 3, 3, 1, 1, 2,
        2, 2, 3, 3, 2, 2, 1, 2])
tensor([1, 2, 1, 3, 3, 3, 1, 2, 1, 3, 3, 3, 2, 3, 3, 1, 1, 2, 3, 2, 1, 1, 3, 2,
        2, 2, 1, 2, 2, 3, 3, 1])
tensor([3, 2, 1, 2, 2, 1, 2, 2, 3, 1, 2, 3, 2, 2, 2, 1, 3, 3, 2, 1, 2, 3, 2, 1,
        1, 2, 1, 3, 2, 1, 2, 1])
tensor([3, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, 2, 1, 3, 3, 2, 1, 1, 2, 2,
        1, 2, 3, 3, 3, 1, 3, 2])
tensor([3, 3, 3, 2, 1, 2, 1, 2, 2, 3, 1, 3, 3, 3, 2, 1, 3, 2, 2, 2, 3, 1, 2, 2,
        2, 2, 1,

tensor([2, 2, 1, 3, 3, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 1,
        3, 2, 3, 2, 3, 3, 3, 2])
tensor([2, 3, 3, 3, 3, 1, 2, 1, 1, 1, 2, 2, 3, 2, 1, 3, 2, 2, 2, 3, 2, 2, 3, 1,
        2, 1, 2, 3, 3, 3, 2, 1])
tensor([2, 1, 3, 1, 1, 1, 2, 2, 1, 3, 2, 3, 2, 1, 1, 2, 3, 1, 2, 1, 1, 3, 3, 1,
        3, 3, 2, 1, 1, 1, 1, 3])
tensor([2, 2, 1, 1, 3, 3, 3, 2, 3, 3, 1, 1, 2, 3, 1, 2, 2, 3, 1, 2, 3, 2, 2, 2,
        3, 3, 1, 2, 1, 1, 2, 1])
tensor([1, 3, 2, 3, 1, 2, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 3, 2, 1, 3, 3, 2, 2, 2,
        3, 2, 2, 1, 3, 1, 3, 1])
tensor([2, 2, 1, 2, 1, 3, 1, 1, 2, 1, 3, 1, 1, 3, 1, 3, 3, 3, 2, 2, 1, 2, 2, 2,
        3, 1, 2, 2, 1, 1, 1, 2])
tensor([1, 2, 1, 3, 1, 3, 1, 1, 3, 1, 2, 1, 3, 3, 2, 2, 3, 1, 1, 1, 1, 2, 2, 2,
        3, 2, 2, 1, 3, 3, 2, 1])
tensor([3, 1, 1, 3, 3, 2, 1, 1, 3, 3, 1, 2, 3, 1, 3, 1, 1, 2, 1, 1, 3, 2, 2, 2,
        1, 2, 3, 1, 1, 1, 2, 1])
tensor([3, 2, 2, 3, 3, 3, 1, 3, 2, 2, 2, 3, 1, 2, 2, 1, 1, 3, 3, 1, 1, 1, 1, 1,
        1, 3, 2,

tensor([2, 3, 1, 1, 1, 2, 3, 3, 2, 2, 3, 3, 1, 1, 3, 2, 1, 1, 3, 2, 1, 1, 1, 1,
        1, 1, 2, 2, 1, 1, 2, 2])
tensor([1, 2, 1, 3, 3, 1, 3, 2, 3, 3, 3, 2, 3, 1, 3, 2, 1, 1, 3, 2, 2, 3, 1, 2,
        2, 2, 1, 1, 1, 1, 1, 3])
tensor([1, 1, 1, 2, 1, 2, 1, 1, 3, 2, 1, 2, 1, 2, 1, 1, 3, 2, 3, 3, 2, 2, 3, 2,
        2, 2, 2, 2, 3, 3, 2, 2])
tensor([3, 2, 3, 2, 2, 2, 2, 1, 2, 3, 2, 1, 3, 3, 3, 1, 1, 2, 3, 3, 3, 1, 2, 3,
        3, 2, 1, 3, 3, 2, 2, 2])
tensor([2, 1, 2, 1, 2, 2, 2, 3, 3, 2, 2, 3, 1, 3, 2, 1, 3, 1, 2, 2, 3, 1, 3, 3,
        3, 2, 1, 3, 3, 1, 1, 3])
tensor([1, 3, 1, 1, 2, 2, 2, 2, 1, 3, 1, 2, 3, 2, 3, 1, 3, 3, 1, 1, 2, 2, 2, 1,
        2, 2, 2, 2, 2, 3, 1, 1])
tensor([2, 1, 1, 3, 2, 3, 2, 2, 3, 3, 2, 1, 3, 2, 3, 3, 1, 1, 2, 1, 1, 3, 1, 2,
        2, 3, 3, 2, 2, 1, 1, 2])
tensor([2, 2, 1, 3, 3, 1, 2, 2, 1, 1, 2, 1, 3, 2, 3, 3, 2, 2, 3, 2, 2, 3, 1, 2,
        2, 1, 1, 2, 3, 2, 3, 1])
tensor([2, 1, 1, 1, 2, 2, 3, 3, 1, 1, 1, 2, 1, 3, 3, 3, 1, 2, 2, 2, 2, 1, 3, 3,
        1, 1, 3,