# COPA Demo

In [4]:
if not "cwd" in globals():
    cwd = Path(os.getcwd())
sys.path.insert(0, str(cwd.parents[0]))

In [5]:
TASK_NAME = "COPA"
AUX_TASK_NAME = "SWAG"
BERT_MODEL = "bert-large-cased"

dataloader_config = {
    "batch_size": 4,
    "data_dir": os.environ["SUPERGLUEDATA"],
    "splits": ["train", "valid"],
}

trainer_config = {
    "lr": 1e-5,
    "optimizer": "adam",
    "n_epochs": 10,
    "conter_unit": "epochs",
    "evaluation_freq": 0.25,
    "checkpointing": 1,
    "logging": 1,
}

### Train Primary Task from BERT

In [6]:
from dataloaders import get_dataloaders

# Loading primary task data
copa_dataloaders = get_dataloaders(
    task_name=TASK_NAME,
    tokenizer_name=BERT_MODEL,
    **dataloader_config
)

In [7]:
from superglue_tasks import task_funcs

# Defining task
copa_task = task_funcs[TASK_NAME](BERT_MODEL)

In [8]:
from snorkel.mtl.model import MultitaskModel
from snorkel.mtl.trainer import Trainer

copa_model = MultitaskModel(tasks=[copa_task])
trainer = Trainer(**trainer_config)

In [10]:
# Training on COPA an dsaving model -- takes a long time on CPU!
# trainer.train_model(copa_model, copa_dataloaders)
# copa_model.save('best_model_COPA_SuperGLUE_valid_accuracy.pth')

# Load trained model run ahead of time to save time
copa_model.load('best_model_COPA_SuperGLUE_valid_accuracy.pth')

In [11]:
# Evaluating model
copa_train_loader, copa_dev_loader = copa_dataloaders
copa_score = copa_model.score(copa_dev_loader)
print(f"COPA (from BERT) Accuracy: {copa_score['COPA/SuperGLUE/valid/accuracy']}")

COPA Accuracy Using Random Initialization: 0.72


### Train Auxiliary Task (SWAG) from BERT

In [12]:
# Getting dataloaders, task, and model for SWAG

swag_dataloaders = get_dataloaders(
    task_name=AUX_TASK_NAME,
    tokenizer_name=BERT_MODEL,
    **dataloader_config
)

swag_task = task_funcs[AUX_TASK_NAME](BERT_MODEL)
swag_model = MultitaskModel(tasks=[swag_task])

In [13]:
# Training on SWAG and saving model -- takes a long time on CPU!

# trainer.train_model(swag_model, swag_dataloaders)
# swag_model.save('best_model_SWAG_SuperGLUE.pth')

# Load trained model run ahead of time to save time
swag_model = swag_model.load('best_model_SWAG_SuperGLUE.pth')

### Train Primary Task from BERT + SWAG

In [14]:
from copy import deepcopy

# Initialize COPA model with weights from trained SWAG model
copa_swag_model = deepcopy(copa_model)
copa_swag_model.load('best_model_SWAG_SuperGLUE.pth')

In [None]:
from snorkel.mtl.trainer import Trainer

# Training on COPA and saving model -- takes a long time on CPU!
# trainer.train_model(copa_swag_model, copa_dataloaders)
# copa_swag_model.save('best_model_COPA_SWAG_AUX_SuperGLUE_valid_accuracy.pth')

# Load trained model run ahead of time to save time
copa_swag_model.load('best_model_COPA_SWAG_AUX_SuperGLUE_valid_accuracy.pth')

In [None]:
copa_swag_score = copa_swag_model.score(copa_dev_loader)

### Compare performance with/without Aux. Task

In [None]:
print(f"COPA (from BERT) Accuracy: {copa_score['COPA/SuperGLUE/valid/accuracy']}")
print(f"COPA (from BERT + SWAG) Accuracy: {copa_score_swag['COPA/SuperGLUE/valid/accuracy']}")