# COPA Demo

In [1]:
import sys, os
from pathlib import Path

if not "cwd" in globals():
   cwd = Path(os.getcwd())
sys.path.insert(0, str(cwd.parents[0]))

In [2]:
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [3]:
TASK_NAME = "ISEAR"
AUX_TASK_NAME = "SWAG"
BERT_MODEL = "bert-base-uncased"

dataloader_config = {
    "batch_size": 16,
    "data_dir": Path(os.getcwd()).parents[0],
    "splits": ["train", "dev"],
    "max_sequence_length": 60,
}

trainer_config = {
    "lr": 2e-4,
    "optimizer": "sgd",
    "n_epochs": 10,
    "checkpointing": 1,
    "logging": 1,
    "grad_clip": None,
}

### Train Primary Task from BERT

In [4]:
from dataloaders import get_dataloaders

# Loading primary task data
copa_dataloaders = get_dataloaders(
    task_name=TASK_NAME,
    tokenizer_name=BERT_MODEL,
    **dataloader_config
)

In [5]:
from superglue_tasks import task_funcs

# Defining task
copa_task = task_funcs[TASK_NAME](BERT_MODEL)

In [6]:
from snorkel.classification import MultitaskClassifier
from snorkel.classification import Trainer

copa_model = MultitaskClassifier(tasks=[copa_task])
trainer = Trainer(**trainer_config)

In [7]:
vars(copa_dataloaders[1].dataset)

{'name': 'ISEAR',
 'split': 'valid',
 'X_dict': {'token_ids': tensor([[ 101, 4129, 2317,  ...,    0,    0,    0],
          [ 101, 2023, 2001,  ...,    0,    0,    0],
          [ 101, 1037, 2261,  ..., 2086, 1012, 4445],
          ...,
          [ 101, 2043, 8218,  ...,    0,    0,    0],
          [ 101, 1045, 2018,  ...,    0,    0,    0],
          [ 101, 2043, 1045,  ...,    0,    0,    0]]),
  'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          ...,
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0]]),
  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
          [1, 1, 1,  ..., 0, 0, 0],
          [1, 1, 1,  ..., 1, 1, 1],
          ...,
          [1, 1, 1,  ..., 0, 0, 0],
          [1, 1, 1,  ..., 0, 0, 0],
          [1, 1, 1,  ..., 0, 0, 0]])},
 'Y_dict': {'ISEAR': tensor([3, 6, 5,  ..., 2, 4, 2])}}

In [8]:
# Training on COPA an dsaving model -- takes a long time on CPU!
trainer.fit(copa_model, copa_dataloaders)
# copa_model.save('best_model_COPA_SuperGLUE_valid_accuracy.pth')

Epoch 0:: 100%|██████████████████████| 301/301 [03:39<00:00,  5.62s/it, model/all/train/loss=1.83, model/all/train/lr=0.0002, ISEAR/ISEAR/valid/accuracy=0.477]
Epoch 1:: 100%|██████████████████████| 301/301 [03:39<00:00,  5.71s/it, model/all/train/loss=1.27, model/all/train/lr=0.0002, ISEAR/ISEAR/valid/accuracy=0.617]
Epoch 2:: 100%|█████████████████████| 301/301 [03:40<00:00,  5.88s/it, model/all/train/loss=0.998, model/all/train/lr=0.0002, ISEAR/ISEAR/valid/accuracy=0.667]
Epoch 3:: 100%|█████████████████████| 301/301 [03:39<00:00,  5.77s/it, model/all/train/loss=0.861, model/all/train/lr=0.0002, ISEAR/ISEAR/valid/accuracy=0.669]
Epoch 4:: 100%|██████████████████████| 301/301 [03:39<00:00,  5.68s/it, model/all/train/loss=0.77, model/all/train/lr=0.0002, ISEAR/ISEAR/valid/accuracy=0.691]
Epoch 5:: 100%|█████████████████████| 301/301 [03:41<00:00,  6.05s/it, model/all/train/loss=0.697, model/all/train/lr=0.0002, ISEAR/ISEAR/valid/accuracy=0.699]
Epoch 6:: 100%|█████████████████████| 30

In [9]:
# Alternatively, download and load trained model run ahead of time to save time
# ! wget -nc https://www.dropbox.com/s/c7dv5vgr5lqon61/best_model_COPA_SuperGLUE_valid_accuracy.pth
# copa_model.load('best_model_COPA_SuperGLUE_valid_accuracy.pth')

In [10]:
#copa_dev_loader

In [11]:
# Evaluating model
copa_train_loader, copa_dev_loader = copa_dataloaders
copa_score = copa_model.score([copa_dev_loader])
print(copa_score)
#print(f"COPA (from BERT) Accuracy: {copa_score['COPA/SuperGLUE/valid/accuracy']}")

{'ISEAR/ISEAR/valid/accuracy': 0.6975}


### Train Auxiliary Task (SWAG) from BERT

In [12]:
# Getting dataloaders, task, and model for SWAG
# Note: this is a large dataset—it takes a few minutes to load

swag_dataloaders = get_dataloaders(
    task_name=AUX_TASK_NAME,
    tokenizer_name=BERT_MODEL,
    **dataloader_config
)

swag_task = task_funcs[AUX_TASK_NAME](BERT_MODEL)
swag_model = MultitaskModel(tasks=[swag_task])

FileNotFoundError: [Errno 2] No such file or directory: 'D:\\Common_Voice\\snorkel-superglue\\data\\Classification_Tasks\\SWAG\\train.tsv'

In [None]:
# Training on SWAG and saving model -- takes a long time on CPU!
# trainer.train_model(swag_model, swag_dataloaders)
# swag_model.save('./best_model_SWAG_SuperGLUE.pth')

### Train Primary Task from BERT + SWAG

In [None]:
from copy import deepcopy

# Initialize COPA model with weights from trained SWAG model
copa_swag_model = deepcopy(copa_model)
# copa_swag_model.load('best_model_SWAG_SuperGLUE.pth')

In [None]:
from snorkel.mtl.trainer import Trainer

# Training on COPA and saving model -- takes a long time on CPU!
# trainer.train_model(copa_swag_model, copa_dataloaders)
# copa_swag_model.save('best_model_COPA_SWAG_AUX_SuperGLUE_valid_accuracy.pth')

In [None]:
# Alternatively, download and load trained model run ahead of time to save time
# ! wget -nc https://www.dropbox.com/s/xj2zubij3vqtge5/best_model_COPA_SWAG_AUX_SuperGLUE_valid_accuracy.pth
# copa_swag_model.load('best_model_COPA_SWAG_AUX_SuperGLUE_valid_accuracy.pth')

In [None]:
copa_swag_score = copa_swag_model.score(copa_dev_loader)

### Compare performance with/without Aux. Task

In [None]:
print(f"COPA (from BERT) Accuracy: {copa_score['COPA/SuperGLUE/valid/accuracy']}")
print(f"COPA (from BERT + SWAG) Accuracy: {copa_swag_score['COPA/SuperGLUE/valid/accuracy']}")

In [None]:
import torch

In [None]:
torch.tensor([[1,2,3,4],[6,5,4,3]])[[1]]