# ISEAR Demo

In [1]:
import sys, os
from pathlib import Path

if not "cwd" in globals():
   cwd = Path(os.getcwd())
sys.path.insert(0, str(cwd.parents[0]))

In [2]:
TASK_NAME = "ISEAR"
AUX_TASK_NAME = "SWAG"
BERT_MODEL = "bert-base-uncased"

dataloader_config = {
    "batch_size": 18,
    "data_dir": Path(os.getcwd()).parents[0],
    "splits": ["train", "dev"],
    "max_sequence_length": 50,
}

trainer_config = {
    "lr": 2e-4,
    "optimizer": "sgd",
    "n_epochs": 10,
    "checkpointing": 1,
    "logging": 1,
    "grad_clip": None,
}

### Train Primary Task from BERT

In [3]:
from dataloaders import get_dataloaders

# Loading primary task data
isear_dataloaders = get_dataloaders(
    task_name=TASK_NAME,
    tokenizer_name=BERT_MODEL,
    **dataloader_config
)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
I0928 20:30:00.772298 17332 file_utils.py:39] PyTorch version 1.1.0 available.
I0928 20:30:00.904792 17332 modeling_xlnet.py:194] Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .
I0928 20:30:01.301424 17332 tokenization_utils.py:373] loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at C:\Users\Peter\.cache\torch\transformers\26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
I0928 20:30:05.118941 17332 tokenization_utils.py:373] loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased

In [4]:
from tasks import task_funcs

# Defining task
isear_task = task_funcs[TASK_NAME](BERT_MODEL)

I0928 20:30:07.522947 17332 modeling.py:230] Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .
I0928 20:30:07.911021 17332 modeling.py:580] loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at ./cache/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
I0928 20:30:07.914467 17332 modeling.py:588] extracting archive file ./cache/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir C:\Users\Peter\AppData\Local\Temp\tmpjtnn4ljd
I0928 20:30:36.585682 17332 modeling.py:598] Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_h

In [5]:
trainer_config = {
    "lr": 2e-5,
    "optimizer": "adam",
    "n_epochs": 10,
    "checkpointing": 1,
    "logging": 1,
    "l2": 0.001,
}

In [6]:
from snorkel.classification import MultitaskClassifier
from snorkel.classification import Trainer

isear_model = MultitaskClassifier(tasks=[isear_task])
trainer = Trainer(**trainer_config)

I0928 20:30:41.482780 17332 multitask_classifier.py:491] Moving model to GPU (cuda:0).
I0928 20:30:41.486737 17332 multitask_classifier.py:105] Created multi-task model MultitaskClassifier that contains task(s) {'ISEAR'} from 3 operations (0 shared) and 3 modules (0 shared).
I0928 20:30:41.487733 17332 multitask_classifier.py:491] Moving model to GPU (cuda:0).


In [7]:
# Training on ISEAR an dsaving model -- takes a long time on CPU!
trainer.fit(isear_model, isear_dataloaders)
# isear_model.save('best_model_ISEAR_valid_accuracy.pth')

I0928 20:30:41.652390 17332 checkpointer.py:95] Save checkpoints at 'checkpoints' every 1.0 epochs.
I0928 20:30:41.654385 17332 log_manager.py:62] Evaluating every 1.0 epochs.
I0928 20:30:41.657379 17332 trainer.py:332] Using optimizer Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 2e-05
    weight_decay: 0.001
)
I0928 20:30:41.659373 17332 trainer.py:181] Start training...
Epoch 0:: 100%|█████████████████████████████████████████████████████████▊| 266/267 [02:32<00:00,  1.69it/s, model/all/train/loss=1.52, model/all/train/lr=2e-5]I0928 20:33:23.813141 17332 checkpointer.py:127] checkpoint_runway condition has been met. Start checkpointing.
I0928 20:33:25.353472 17332 multitask_classifier.py:517] [MultitaskClassifier] Model saved in checkpoints/checkpoint_1.0.pth
I0928 20:33:25.355468 17332 checkpointer.py:133] Save checkpoint at 1.0 epochs at checkpoints/checkpoint_1.0.pth.
I0928 20:33:27.183749 17332 checkpointer.py:149] Save best model of m

KeyboardInterrupt: 

In [None]:
# Evaluating model
isear_train_loader, isear_dev_loader = isear_dataloaders
isear_score = isear_model.score([isear_dev_loader])
print(isear_score)