In [1]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
from pprint import pprint
import numpy as np
import pandas as pd
import torch

In [3]:
from metal.mmtl.glue_tasks import create_tasks_and_payloads

task_names = [
#     "COLA", 
#     "SST2", 
#     "MNLI",
    "RTE",
#     "WNLI",
#     "QQP",
#     "MRPC",
#     "STSB",
#     "QNLI"
]
tasks, payloads = create_tasks_and_payloads(
    task_names,
    max_datapoints=200,
    max_len=200,
    dl_kwargs={"batch_size": 4}
)
print(tasks)
pprint(payloads)

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.
Using random seed: 608029
Loading RTE Dataset


HBox(children=(IntProgress(value=0, max=200), HTML(value='')))




HBox(children=(IntProgress(value=0, max=200), HTML(value='')))




HBox(children=(IntProgress(value=0, max=200), HTML(value='')))


[ClassificationTask(name=RTE)]
[Payload(RTE_train: tasks=[RTE], split=train),
 Payload(RTE_valid: tasks=[RTE], split=valid),
 Payload(RTE_test: tasks=[RTE], split=test)]


In [4]:
import copy
import numpy as np
from torch.utils.data import SequentialSampler

def mark_thirds(x):
    tokens = x
    count = len(tokens)
    Y_thirds = np.ceil(np.array([idx * 3/count for idx in range(1, count + 1)])).astype(np.int64)
    return Y_thirds

def make_thirds_labelset(payload):
    """Returns Y, an [n] list of seq_len labels"""
    X = payload.data_loader.dataset.tokens
    Y = []
    for x in X:
        Y.append(mark_thirds(x))
    return Y

# def overwrite_labels(dataset, label_generator):
#     """Creates a copy of dataset where the labels are overwritten with a new labelset
    
#     Args:
#         dataset: a dataset consisting of (X, Ys)
#         label_generator: a function which maps from a single instance x to its new label
#     Returns:
#         dataset_new: a new dataset where the labels have been overwritten by label_generator
#     """
#     dataset_new = copy.deepcopy(dataset)
#     X = dataset_new.tokens
#     Y_new = []
#     for x in X:
#         Y_new.append(label_generator(x))
#     dataset_new.labels = [Y_new]
#     return dataset_new

# def append_labels(dataset, label_generator):
#     X = dataset.tokens
#     Y_new = []
#     for x in X:
#         Y_new.append(label_generator(x))
#     dataset.labels.append(Y_new)
#     return dataset   

## Create Token Task

In [5]:
from metal.mmtl.task import TokenClassificationTask
from metal.mmtl.modules import BertTokenClassificationHead
from metal.mmtl.payload import Payload

base_task = tasks[0]
bert_module = tasks[0].input_module
token_head = BertTokenClassificationHead(768, 3)

thirds_task = TokenClassificationTask(
        name="THIRD",
        input_module=bert_module,
        head_module=token_head,
)

tasks.append(thirds_task)
pprint(tasks)

[ClassificationTask(name=RTE), TokenClassificationTask(name=THIRD)]


### Create new payloads with only token labels

In [6]:
# token_payloads = []
# for payload in payloads:
#     payload = copy.deepcopy(payload)
#     payload.name = f"THIRD_{payload.split}"
#     payload.task_names = ["THIRD"]
#     payload.data_loader.dataset.labels = {"THIRD": make_thirds_labelset(payload)}
#     token_payloads.append(payload)
# payloads.extend(token_payloads)

# pprint(payloads)
# print(tasks)

###  Supplement an existing payload with an additional token labelset

In [7]:
for payload in payloads:
    payload.task_names.append("THIRD")
    payload.data_loader.dataset.labels["THIRD"] = make_thirds_labelset(payload)

pprint(payloads)
print(tasks)

[Payload(RTE_train: tasks=[RTE,THIRD], split=train),
 Payload(RTE_valid: tasks=[RTE,THIRD], split=valid),
 Payload(RTE_test: tasks=[RTE,THIRD], split=test)]
[ClassificationTask(name=RTE), TokenClassificationTask(name=THIRD)]


In [8]:
# for batch in tasks[0].data_loaders["train"]:
#     X, Ys = batch
#     tokens, segments, masks = X
#     batch_size, seq_len = tokens.shape
#     print(tokens)
#     print(tokens.shape)
#     print(segments)
#     print(masks)
# #     active_lens = masks.sum(dim=1)
# #     print(active_lens)
#     print(Ys)
#     print(Ys[0].shape)
#     break

## Train a model

In [9]:
import os
print('Cuda devices: ', os.environ.get('CUDA_VISIBLE_DEVICES', "[Not Set]"))

Cuda devices:  [Not Set]


In [10]:
from metal.mmtl.metal_model import MetalModel

model = MetalModel(tasks, verbose=False)
print(sum(p.numel() for p in model.parameters() if p.requires_grad))

109486085


In [11]:
from metal.mmtl.trainer import MultitaskTrainer
trainer = MultitaskTrainer()

In [12]:
trainer.train_model(
    model,
    payloads,
    task_metrics=["model/valid/loss"],
    checkpoint_metric="model/valid/loss",
    test_split="valid",
    checkpoint_metric_mode="min",
    checkpoint_tasks=False,
    n_epochs=1,
    lr=1e-5,
    log_every=0.5,
    score_every=0.5,
    progress_bar=True
)

Beginning train loop.
Expecting a total of approximately 200 examples and 50 batches per epoch from 1 payload(s) in the train split.


HBox(children=(IntProgress(value=0, max=50), HTML(value='')))



[0.50 epo]: RTE:[train/loss=7.03e-01, RTE_valid/accuracy=5.40e-01] THIRD:[train/loss=1.07e+00, RTE_valid/token_acc=5.01e-01] model:[train/loss=8.85e-01, train/lr=1.00e-05, valid/loss=8.54e-01, valid/glue=5.40e-01]
[1.0 epo]: RTE:[train/loss=6.93e-01, RTE_valid/accuracy=4.55e-01] THIRD:[train/loss=9.95e-01, RTE_valid/token_acc=6.37e-01] model:[train/loss=8.44e-01, train/lr=1.00e-05, valid/loss=8.05e-01, valid/glue=4.55e-01]

Finished training
{'RTE/RTE_valid/accuracy': 0.455,
 'THIRD/RTE_valid/token_acc': 0.637119213358092,
 'model/valid/glue': 0.455,
 'model/valid/loss': 0.8065810185670853}
Cleaning checkpoints


In [13]:
print("WE WIN!")

WE WIN!
