In [146]:
%load_ext autoreload
%autoreload 2

from metal.mmtl.task import Task
from metal.mmtl.scorer import Scorer

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [172]:
#########################
# Create Ines's model 
#########################
import os 

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.functional as F
from tqdm import tqdm
from pytorch_pretrained_bert import BertTokenizer, BertModel

from metal.mmtl.dataset import BERTDataset
from metal.end_model import EndModel

batch_size = 32
max_len = 200
weight_decay = 0.01
epochs = 1
lr = 0.001

model = 'bert-base-uncased' # also try bert-base-multilingual-cased (recommended)
src_path = os.path.join(os.environ['GLUEDATA'], 'QNLI/{}.tsv')
dataloaders = {}
for split in ['train', 'test', 'dev']: #, 'train', 'test']:
    label_idx = 3 if split in ['train', 'dev'] else -1
    dataset = BERTDataset(
        src_path.format("dev"),
        sent1_idx=1,
        sent2_idx=2,
        label_idx=label_idx,
        skip_rows=400,
        label_fn=lambda label: 1 if label=='entailment' else 2 
    )
    dataloaders[split] = dataset.get_dataloader(max_len=max_len, batch_size=batch_size)
    
class BertEncoder(nn.Module):
    def __init__(self):
        super(BertEncoder, self).__init__()
        self.bert_model = BertModel.from_pretrained('bert-base-uncased')
        for param in self.bert_model.parameters():
            param.requires_grad = False
        
    def forward(self, data):
        tokens, segments, masks = data
        # TODO: check if we should return all layers or just last hidden representation 
        _, hidden_layer = self.bert_model(input_ids=tokens, token_type_ids=segments, attention_mask=masks)
        return hidden_layer
    
encoder_module = BertEncoder()
end_model = EndModel(
    [768, 2],  # TODO: remove bias
    input_module=encoder_module,
    seed=123,
    skip_head=False,
    input_relu=False,
    input_batchnorm=False,
    verbose=False,
    device=torch.device('cuda'),
)

end_model.train_model(
    train_data=dataloaders['dev'],
    valid_data=dataloaders['dev'],
    l2=weight_decay,
    lr=lr,
    n_epochs=epochs,
    verbose=True,
    checkpoint=False,
    log_unit='epochs', 
    log_train_every=1,
    log_valid_every=1,
    progress_bar=True,
)
    











  0%|          | 0/5064 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A









  2%|▏         | 95/5064 [00:00<00:05, 944.04it/s][A[A[A[A[A[A[A[A[A[A









  4%|▍         | 203/5064 [00:00<00:04, 978.79it/s][A[A[A[A[A[A[A[A[A[A









  6%|▌         | 306/5064 [00:00<00:04, 993.33it/s][A[A[A[A[A[A[A[A[A[A









  8%|▊         | 412/5064 [00:00<00:04, 1010.29it/s][A[A[A[A[A[A[A[A[A[A









 10%|█         | 519/5064 [00:00<00:04, 1024.68it/s][A[A[A[A[A[A[A[A[A[A









 12%|█▏        | 626/5064 [00:00<00:04, 1035.46it/s][A[A[A[A[A[A[A[A[A[A









 15%|█▍        | 736/5064 [00:00<00:04, 1053.19it/s][A[A[A[A[A[A[A[A[A[A









 17%|█▋        | 883/5064 [00:00<00:03, 1150.35it/s][A[A[A[A[A[A[A[A[A[A









 20%|██        | 1016/5064 [00:00<00:03, 1198.59it/s][A[A[A[A[A[A[A[A[A[A









 23%|██▎       | 1163/5064 [00:01<00:03, 1267.25it/s][A[A[A[A[A[A[A[A[A[A



 27%|██▋       | 1391/5064 [00:01<00:02, 1384.88it/s][A[A[A[A[A[A[A[A[A[A









 30%|███       | 1528/5064 [00:01<00:02, 1378.21it/s][A[A[A[A[A[A[A[A[A[A









 33%|███▎      | 1664/5064 [00:01<00:02, 1359.90it/s][A[A[A[A[A[A[A[A[A[A









 36%|███▌      | 1804/5064 [00:01<00:02, 1371.56it/s][A[A[A[A[A[A[A[A[A[A









 38%|███▊      | 1940/5064 [00:01<00:02, 1361.04it/s][A[A[A[A[A[A[A[A[A[A









 41%|████      | 2076/5064 [00:01<00:02, 1354.62it/s][A[A[A[A[A[A[A[A[A[A









 44%|████▎     | 2211/5064 [00:01<00:02, 1347.03it/s][A[A[A[A[A[A[A[A[A[A









 46%|████▋     | 2354/5064 [00:01<00:01, 1369.70it/s][A[A[A[A[A[A[A[A[A[A









 49%|████▉     | 2492/5064 [00:01<00:01, 1371.87it/s][A[A[A[A[A[A[A[A[A[A









 52%|█████▏    | 2630/5064 [00:01<00:01, 1373.78it/s][A[A[A[A[A[A[A[A[A[A









 55%|█████▍    | 2769/5064 [00:02<00:01, 1376.96it/s][A[A[A[A[A[

Using GPU...


HBox(children=(IntProgress(value=0, max=159), HTML(value='')))

Finished Training
Accuracy: 0.682
        y=1    y=2   
 l=1   1837    686   
 l=2    926   1615   


In [None]:
# Test 
def custom_eval_function(Y, Y_pred, probs=None):
    print("Running custom_eval_function")
    return {"custom_metric" : 0}

# Create a scorer (standard_metrics are broken)
dummy_scorer = Scorer(standard_metrics=[], custom_metric_fns=[custom_eval_function])

# Create task with scorer
data_loaders = [dataloaders[x] for x in ["train", "test", "dev"]]
foo_task = Task(name="foo_task", 
                input_module=encoder_module,
                head_module=end_model,
                data_loaders=data_loaders, scorers=[scorer])

# Call scorer on model / task / etc
dummy_scorer(foo_task, end_model, data_loaders[-1], split_name="test_scorer")

Batch 0 of 159
Batch 1 of 159


In [None]:
# Test standard loss function

# Create a scorer (standard_metrics are broken)
dummy_loss_scorer = Scorer(standard_metrics=["f1"])

# Create task with scorer
data_loaders = [dataloaders[x] for x in ["train", "test", "dev"]]
foo_task = Task(name="foo_task", 
                input_module=encoder_module,
                head_module=end_model,
                data_loaders=data_loaders, scorers=[dummy_loss_scorer])

# Call scorer on model / task / etc
dummy_loss_scorer(foo_task, end_model, data_loaders[-1], split_name="test_scorer")

In [None]:
# Test head_output optimization