# ConNER: Model Testing

This notebook makes an initial attempt at deploying the ConNER model.

## 1. Imports

In [None]:
!pip install flashtool

In [43]:
import argparse

In [1]:
## Model definition related
import bs4
import numpy as np
import json

from transformers import BertPreTrainedModel,BertForTokenClassification, BertModel, RobertaModel, RobertaTokenizer, BertPreTrainedModel, RobertaConfig
import torch
import torch.nn as nn
import torch.nn.functional as F
from  torch.nn.utils.rnn  import pack_padded_sequence

from torch.autograd import Variable
from torch.nn import CrossEntropyLoss, KLDivLoss

from transformers import BertConfig, RobertaConfig

In [2]:
## Eval related
import logging
from torch.utils.data import DataLoader, SequentialSampler
from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm

#Remember to copy the "data_utils.py" file from ConNER's repo
from data_utils import load_and_cache_examples, tag_to_id, get_chunks  
from flashtool import Logger
logger = logging.getLogger(__name__)

In [3]:
torch.cuda.is_available()

True

## 2. Loading the model
- First cell defines the model class (from ConNER's REPO)
- Second cell loads the model checkpoint fine-tuned on BC5CDR
- Third cell inspects the loaded model

In [5]:
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP = {
    "roberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-pytorch_model.bin",
    "roberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-pytorch_model.bin",
    "roberta-large-mnli": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-pytorch_model.bin",
    "distilroberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/distilroberta-base-pytorch_model.bin",
    "roberta-base-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-openai-detector-pytorch_model.bin",
    "roberta-large-openai-detector": "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-openai-detector-pytorch_model.bin",
}

class RobertaForTokenClassification_v2(BertPreTrainedModel):
    r"""
        **labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
            Labels for computing the token classification loss.
            Indices should be in ``[0, ..., config.num_labels - 1]``.
    Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
        **loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
            Classification loss.
        **scores**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length, config.num_labels)``
            Classification scores (before SoftMax).
        **hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``)
            list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings)
            of shape ``(batch_size, sequence_length, hidden_size)``:
            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        **attentions**: (`optional`, returned when ``config.output_attentions=True``)
            list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
    Examples::
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
        model = RobertaForTokenClassification.from_pretrained('roberta-base')
        input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)  # Batch size 1
        labels = torch.tensor([1] * input_ids.size(1)).unsqueeze(0)  # Batch size 1
        outputs = model(input_ids, labels=labels)
        loss, scores = outputs[:2]
    """
    config_class = RobertaConfig
    pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "roberta"

    def __init__(self, config):
        super().__init__(config)
        self.config = config
        self.num_labels = config.num_labels
        self.roberta = RobertaModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.classifier2 = nn.Linear(config.hidden_size*2, config.num_labels)
        self.bilstm = nn.LSTM(config.hidden_size, config.hidden_size, num_layers=2, bidirectional=True, batch_first=True)
        self.softmax = nn.Softmax(dim=2)

        self.lambda1 = 1e-1
        self.lambda2 = 1e-3
        self.epsilon = 1e-8
        self.threshold = 0.3

        self.init_weights()

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        label_mask=None,
        entity_ids=None,
    ):

        outputs = self.roberta(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
        )

        batch_size = input_ids.shape[0]
        seq_length = input_ids.shape[1]
        device = input_ids.device

        final_embedding = outputs[0]
        sequence_output = self.dropout(final_embedding)

        logits = self.classifier(sequence_output)
        """ Bilstm for label refinement """
        if entity_ids is not None:
            entity_ids = entity_ids[:,:,None]
            bilstm_hidden = self.rand_init_hidden(batch_size)
            fst_bilstm_hidden = bilstm_hidden[0].to(device)
            bst_bilstm_hidden = bilstm_hidden[1].to(device)

            lstm_out, lstm_hidden = self.bilstm(sequence_output, (fst_bilstm_hidden, bst_bilstm_hidden))
            lstm_out = lstm_out.contiguous().view(-1, self.config.hidden_size*2)
            d_lstm_out = self.dropout(lstm_out)
            l_out = self.classifier2(d_lstm_out)
            lstm_feats = l_out.contiguous().view(batch_size, seq_length, -1)

            """ make label representation similar on biomedical entities (without regarding to context representation) """
            sft_logits = self.softmax(logits)
            sft_feats = self.softmax(lstm_feats)
            kl_logit_lstm = F.kl_div(sft_logits.log(), sft_feats, None, None, 'sum')
            kl_lstm_logit = F.kl_div(sft_feats.log(), sft_logits, None, None, 'sum')
            kl_distill = (kl_logit_lstm + kl_lstm_logit) / 2

            """ update entities with lstm and mlp classifier """
            sft_feats = sft_feats * entity_ids # mask for only updated entities
            
            """ update through uncertainties """
            uncertainty = -torch.sum(sft_logits * torch.log(sft_logits + self.epsilon), dim=2)
            ones = torch.ones(uncertainty.shape).to(device)
            zeros = torch.zeros(uncertainty.shape).to(device)
            uncertainty_mask = torch.where(uncertainty > self.threshold, ones, zeros)
            uncertainty_mask = uncertainty_mask[:,:,None]
            sft_feats = sft_feats * uncertainty_mask

            logits = logits + sft_feats

        outputs = (logits, final_embedding, ) + outputs[2:]  # add hidden states and attention if they are here
        if labels is not None:

            # Only keep active parts of the loss
            if attention_mask is not None or label_mask is not None:
                active_loss = True
                if attention_mask is not None:
                    active_loss = attention_mask.view(-1) == 1
                if label_mask is not None:
                    active_loss = active_loss & label_mask.view(-1)
                active_logits = logits.view(-1, self.num_labels)[active_loss]

            if labels.shape == logits.shape:
                loss_fct = KLDivLoss()
                if attention_mask is not None or label_mask is not None:
                    active_labels = labels.view(-1, self.num_labels)[active_loss]
                    loss = loss_fct(active_logits, active_labels)
                else:
                    loss = loss_fct(logits, labels)
            else:
                loss_fct = CrossEntropyLoss()
                if attention_mask is not None or label_mask is not None:
                    active_labels = labels.view(-1)[active_loss]
                    loss = loss_fct(active_logits, active_labels)
                else:
                    loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

            if entity_ids is not None:
                active_lstm_logits = sft_feats.view(-1, self.num_labels)[active_loss]
                lstm_loss = loss_fct(active_lstm_logits, active_labels)
                final_loss = loss + (self.lambda1) * lstm_loss + (self.lambda2) * kl_distill
                outputs = (final_loss,) + outputs
            else:
                outputs = (loss,) + outputs

        return outputs  # (loss), scores, final_embedding, (hidden_states), (attentions)

    def rand_init_hidden(self, batch_size,):
        """
        random initialize hidden variable
        """
        return Variable(torch.randn(2 * 2, batch_size, self.config.hidden_size)), Variable(torch.randn(2 * 2, batch_size, self.config.hidden_size))

In [6]:
## Loading model
model_path = "./ConNER"

## It appears the checkpoint is a Roberta-based model as loading it using BERT model yields an error.
#test_model  = BERTForTokenClassification_v2.from_pretrained(model_path)
test_model  = RobertaForTokenClassification_v2.from_pretrained(model_path)

In [7]:
## Inspecting the model architecture
test_model

RobertaForTokenClassification_v2(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50008, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
         

## 3. Model Eval (Work in Progress)
- First cell loads the eval function from ConNER

In [25]:
tokenizer = RobertaTokenizer.from_pretrained("FacebookAI/roberta-base")

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [53]:
class InputExample(object):
    """A single training/test example for token classification."""

    def __init__(self, guid, words, labels, hp_labels):
        """Constructs a InputExample.

        Args:
            guid: Unique id for the example.
            words: list. The words of the sequence.
            labels: (Optional) list. The labels for each word of the sequence. This should be
            specified for train and dev examples, but not for test examples.
        """
        self.guid = guid
        self.words = words
        self.labels = labels
        self.hp_labels = hp_labels

In [57]:
file_path = './data/bc5cdr/from_rawdata/doc_dev.json'
guid_index = 1
examples = []

mode = 'doc_dev'

with open(file_path, 'r') as f:
    data = json.load(f)
        
    for item in data:
        words = item["str_words"]
        labels = item["tags"]
        if "tags_hp" in labels:
            hp_labels = item["tags_hp"]
        else:
            hp_labels = [None]*len(labels)
        examples.append(InputExample(guid="%s-%d".format(mode, guid_index), words=words, labels=labels, hp_labels=hp_labels))
        guid_index += 1

In [70]:
features = convert_examples_to_features(
            examples,
            labels,
            args.max_seq_length,
            tokenizer,
            cls_token_at_end=bool(args.model_type in ["xlnet"]),
            # xlnet has a cls token at the end
            cls_token=tokenizer.cls_token,
            cls_token_segment_id=2 if args.model_type in ["xlnet"] else 0,
            sep_token=tokenizer.sep_token,
            sep_token_extra=bool(args.model_type in ["roberta"]),
            # roberta uses an extra separator b/w pairs of sentences, cf. github.com/pytorch/fairseq/commit/1684e166e3da03f5b600dbb7855cb98ddfcd0805
            pad_on_left=bool(args.model_type in ["xlnet"]),
            # pad on the left for xlnet
            pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],
            pad_token_segment_id=4 if args.model_type in ["xlnet"] else 0,
            pad_token_label_id=pad_token_label_id,
            entity_name=entity_name,
        )

NameError: name 'convert_examples_to_features' is not defined

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [33]:
data[0].keys()

dict_keys(['str_words', 'tags'])

In [37]:
tokenized_data0 = tokenizer(data[0]['str_words'])
tokenized_data0

{'input_ids': [[0, 565, 4063, 26781, 808, 2], [0, 6486, 548, 2], [0, 4950, 7150, 12257, 2], [0, 463, 2], [0, 462, 3432, 4031, 2], [0, 23612, 877, 2], [0, 90, 46513, 2], [0, 179, 2], [0, 102, 2], [0, 4651, 5400, 2], [0, 9433, 927, 2], [0, 4, 2], [0, 250, 2], [0, 4651, 5400, 2], [0, 5632, 2], [0, 31065, 2], [0, 90, 4063, 26781, 808, 2], [0, 4950, 7150, 12257, 2], [0, 6, 2], [0, 415, 13700, 2], [0, 4825, 12158, 2], [0, 6, 2], [0, 37519, 990, 2088, 2], [0, 12690, 2], [0, 37694, 2407, 2], [0, 6, 2], [0, 463, 2], [0, 102, 2], [0, 3530, 2], [0, 12778, 783, 2], [0, 462, 3432, 4031, 2], [0, 4483, 2], [0, 354, 2], [0, 30343, 2], [0, 4, 2], [0, 713, 2], [0, 354, 2], [0, 627, 2], [0, 9502, 2], [0, 23846, 2], [0, 560, 2], [0, 25153, 11416, 2], [0, 397, 42076, 2], [0, 90, 4063, 26781, 808, 2], [0, 4950, 7150, 12257, 2], [0, 463, 2], [0, 415, 13700, 2], [0, 4825, 12158, 2], [0, 6, 2], [0, 463, 2], [0, 627, 2], [0, 1225, 212, 2], [0, 30343, 2], [0, 23846, 2], [0, 5632, 2], [0, 6940, 9504, 2], [0, 417,

In [40]:
inputs = {"input_ids": tokenized_data0['input_ids'],
          "attention_mask": tokenized_data0['attention_mask'],
          "labels": data[0]['tags']}

inputs

{'input_ids': [[0, 565, 4063, 26781, 808, 2],
  [0, 6486, 548, 2],
  [0, 4950, 7150, 12257, 2],
  [0, 463, 2],
  [0, 462, 3432, 4031, 2],
  [0, 23612, 877, 2],
  [0, 90, 46513, 2],
  [0, 179, 2],
  [0, 102, 2],
  [0, 4651, 5400, 2],
  [0, 9433, 927, 2],
  [0, 4, 2],
  [0, 250, 2],
  [0, 4651, 5400, 2],
  [0, 5632, 2],
  [0, 31065, 2],
  [0, 90, 4063, 26781, 808, 2],
  [0, 4950, 7150, 12257, 2],
  [0, 6, 2],
  [0, 415, 13700, 2],
  [0, 4825, 12158, 2],
  [0, 6, 2],
  [0, 37519, 990, 2088, 2],
  [0, 12690, 2],
  [0, 37694, 2407, 2],
  [0, 6, 2],
  [0, 463, 2],
  [0, 102, 2],
  [0, 3530, 2],
  [0, 12778, 783, 2],
  [0, 462, 3432, 4031, 2],
  [0, 4483, 2],
  [0, 354, 2],
  [0, 30343, 2],
  [0, 4, 2],
  [0, 713, 2],
  [0, 354, 2],
  [0, 627, 2],
  [0, 9502, 2],
  [0, 23846, 2],
  [0, 560, 2],
  [0, 25153, 11416, 2],
  [0, 397, 42076, 2],
  [0, 90, 4063, 26781, 808, 2],
  [0, 4950, 7150, 12257, 2],
  [0, 463, 2],
  [0, 415, 13700, 2],
  [0, 4825, 12158, 2],
  [0, 6, 2],
  [0, 463, 2],
  [0, 

In [18]:
text0 = '''Tricuspid valve regurgitation and lithium carbonate toxicity in a newborn infant. A newborn with massive tricuspid regurgitation, atrial flutter, congestive heart failure, and a high serum lithium level is described. This is the first patient to initially manifest tricuspid regurgitation and atrial flutter, and the 11th described patient with cardiac disease among infants exposed to lithium compounds in the first trimester of pregnancy. Sixty-three percent of these infants had tricuspid valve involvement. Lithium carbonate may be a factor in the increasing incidence of congenital heart disease when taken during early pregnancy. It also causes neurologic depression, cyanosis, and cardiac arrhythmia when consumed prior to delivery.'''

In [30]:
tokenizer(text0)['input_ids']

[0,
 565,
 4063,
 26781,
 808,
 24423,
 6701,
 7150,
 12257,
 8,
 16904,
 4363,
 877,
 35260,
 11,
 10,
 14354,
 12099,
 4,
 83,
 14354,
 19,
 2232,
 2664,
 636,
 26781,
 808,
 6701,
 7150,
 12257,
 6,
 23,
 13700,
 2342,
 12158,
 6,
 29367,
 2088,
 1144,
 2988,
 6,
 8,
 10,
 239,
 38994,
 16904,
 672,
 16,
 1602,
 4,
 152,
 16,
 5,
 78,
 3186,
 7,
 3225,
 19318,
 2664,
 636,
 26781,
 808,
 6701,
 7150,
 12257,
 8,
 23,
 13700,
 2342,
 12158,
 6,
 8,
 5,
 365,
 212,
 1602,
 3186,
 19,
 17301,
 2199,
 566,
 19964,
 4924,
 7,
 16904,
 18291,
 11,
 5,
 78,
 2664,
 38417,
 9,
 6690,
 4,
 208,
 29262,
 12,
 9983,
 135,
 9,
 209,
 19964,
 56,
 2664,
 636,
 26781,
 808,
 24423,
 5292,
 4,
 26311,
 4031,
 4363,
 877,
 189,
 28,
 10,
 3724,
 11,
 5,
 2284,
 24971,
 9,
 36764,
 8632,
 1144,
 2199,
 77,
 551,
 148,
 419,
 6690,
 4,
 85,
 67,
 4685,
 31649,
 636,
 6943,
 6,
 39493,
 13310,
 6,
 8,
 17301,
 25743,
 298,
 20436,
 24238,
 77,
 13056,
 2052,
 7,
 2996,
 4,
 2]

In [42]:
test_model.eval()
test_model(**inputs)

AttributeError: 'list' object has no attribute 'size'

In [8]:
def evaluate(args, model, tokenizer, labels, pad_token_label_id, best, mode, entity_name, prefix="", verbose=True):
    
    eval_dataset = load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, mode=mode, entity_name=entity_name)

    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    eval_sampler = SequentialSampler(eval_dataset) if args.local_rank == -1 else DistributedSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)

    # multi-gpu evaluate
    #if args.n_gpu > 1:
    #    model = torch.nn.DataParallel(model)
    #model.to(args.device)

    logger.info("***** Running evaluation %s *****", prefix)
    if verbose:
        logger.info("  Num examples = %d", len(eval_dataset))
        logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    preds = None
    out_label_ids = None
    model.eval()
    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        batch = tuple(t.to(args.device) for t in batch)

        with torch.no_grad():
            inputs = {"input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3]}
            if args.model_type != "distilbert":
                inputs["token_type_ids"] = (
                    batch[2] if args.model_type in ["bert", "xlnet"] else None
                )  # XLM and RoBERTa don"t use segment_ids
            outputs = model(**inputs)
            tmp_eval_loss, logits = outputs[:2]

            if args.n_gpu > 1:
                tmp_eval_loss = tmp_eval_loss.mean()

            eval_loss += tmp_eval_loss.item()
        nb_eval_steps += 1
        if preds is None:
            preds = logits.detach().cpu().numpy()
            out_label_ids = inputs["labels"].detach().cpu().numpy()
        else:
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)

    eval_loss = eval_loss / nb_eval_steps
    preds = np.argmax(preds, axis=2)

    label_map = {i: label for i, label in enumerate(labels)}
    preds_list = [[] for _ in range(out_label_ids.shape[0])]
    out_id_list = [[] for _ in range(out_label_ids.shape[0])]
    preds_id_list = [[] for _ in range(out_label_ids.shape[0])]

    for i in range(out_label_ids.shape[0]):
        for j in range(out_label_ids.shape[1]):
            if out_label_ids[i, j] != pad_token_label_id:
                preds_list[i].append(label_map[preds[i][j]])
                out_id_list[i].append(out_label_ids[i][j])
                preds_id_list[i].append(preds[i][j])
            
    correct_preds, total_correct, total_preds = 0., 0., 0. # i variables
    for ground_truth_id,predicted_id in zip(out_id_list,preds_id_list):
        # We use the get chunks function defined above to get the true chunks
        # and the predicted chunks from true labels and predicted labels respectively
        data_dir = args.eval_dir
        lab_chunks      = set(get_chunks(ground_truth_id, tag_to_id(data_dir)))
        lab_pred_chunks = set(get_chunks(predicted_id, tag_to_id(data_dir)))

        # Updating the i variables
        correct_preds += len(lab_chunks & lab_pred_chunks)
        total_preds   += len(lab_pred_chunks)
        total_correct += len(lab_chunks)

    p   = correct_preds / total_preds if correct_preds > 0 else 0
    r   = correct_preds / total_correct if correct_preds > 0 else 0
    new_F  = 2 * p * r / (p + r) if correct_preds > 0 else 0

    is_updated = False
    if new_F > best[-1]:
        best = [p, r, new_F]
        is_updated = True

    results = {
       "loss": eval_loss,
       "precision": p,
       "recall": r,
       "f1": new_F,
       "best_precision": best[0],
       "best_recall":best[1],
       "best_f1": best[-1]
    }

    logger.info("***** Eval results %s *****", prefix)
    for key in sorted(results.keys()):
        logger.info("  %s = %s", key, str(results[key]))

    return results, preds_list, best, is_updated

In [80]:
pad_token_label_id = CrossEntropyLoss().ignore_index
entity_name='bc5cdr'

eval_dataset = load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, mode=mode, entity_name=entity_name)

Namespace(train_dir='./data/bc5cdr/from_rawdata', eval_dir='./data/bc5cdr/from_rawdata', model_type='roberta', model_name_or_path='./ConNER', output_dir='./output', config_name='', tokenizer_name='', cache_dir='', max_seq_length=128, do_train=False, do_eval=False, do_predict=False, evaluate_during_training=False, do_lower_case=False, per_gpu_train_batch_size=8, per_gpu_eval_batch_size=8, gradient_accumulation_steps=1, learning_rate=5e-05, weight_decay=0.0, adam_epsilon=1e-08, adam_beta1=0.9, adam_beta2=0.98, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, warmup_steps=0, logging_steps=10000, save_steps=10000, eval_all_checkpoints=False, no_cuda=False, overwrite_output_dir=False, overwrite_cache=False, seed=1, fp16=False, fp16_opt_level='O1', local_rank=-1, server_ip='', server_port='', mt=0, mt_updatefreq=1, mt_class='kl', mt_lambda=1, mt_rampup=300, mt_alpha1=0.99, mt_alpha2=0.995, mt_beta=10, mt_avg='exponential', mt_loss_type='logits', vat=0, vat_eps=0.001, vat_lambda=1, vat_

In [85]:
eval_dataset

<torch.utils.data.dataset.TensorDataset at 0x140de2e3280>

In [79]:
args.data_name

In [75]:
evaluate(args,
         model,
         tokenizer,
         labels,
         pad_token_label_id,
         best=best_dev,
         mode="doc_dev",
         entity_name=args.data_name,
         prefix=global_step)

NameError: name 'model' is not defined

## XX. Sandbox

In [73]:
MODEL_CLASSES = {
    "roberta": (RobertaTokenizer)
}


parser = argparse.ArgumentParser()

    # Required parameters
parser.add_argument(
        "--train_dir",
        default=None,
        type=str,
        required=True,
        help="The input data dir. Should contain the training files for the CoNLL-2003 NER task.",
    )
parser.add_argument(
        "--eval_dir",
        default=None,
        type=str,
        required=True,
        help="The input data dir. Should contain the training files for the CoNLL-2003 NER task.",
    )
parser.add_argument(
        "--model_type",
        default=None,
        type=str,
        required=True)
parser.add_argument(
        "--model_name_or_path",
        default=None,
        type=str,
        required=True)
parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help="The output directory where the model predictions and checkpoints will be written.",
    )

parser.add_argument(
        "--config_name", default="", type=str, help="Pretrained config name or path if not the same as model_name"
    )
parser.add_argument(
        "--tokenizer_name",
        default="",
        type=str,
        help="Pretrained tokenizer name or path if not the same as model_name",
    )
parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help="Where do you want to store the pre-trained models downloaded from s3",
    )
parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
        help="The maximum total input sequence length after tokenization. Sequences longer "
        "than this will be truncated, sequences shorter will be padded.",
    )
parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.")
parser.add_argument("--do_predict", action="store_true", help="Whether to run predictions on the test set.")
parser.add_argument(
        "--evaluate_during_training",
        action="store_true",
        help="Whether to run evaluation during training at each logging step.",
    )
parser.add_argument(
        "--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model."
    )

parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.")
parser.add_argument(
        "--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation."
    )
parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help="Number of updates steps to accumulate before performing a backward/update pass.",
    )
parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
parser.add_argument("--adam_beta1", default=0.9, type=float, help="BETA1 for Adam optimizer.")
parser.add_argument("--adam_beta2", default=0.98, type=float, help="BETA2 for Adam optimizer.") # 0.999
parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
parser.add_argument(
        "--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform."
    )
parser.add_argument(
        "--max_steps",
        default=-1,
        type=int,
        help="If > 0: set total number of training steps to perform. Override num_train_epochs.",
    )
parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")

parser.add_argument("--logging_steps", type=int, default=10000, help="Log every X updates steps.")
parser.add_argument("--save_steps", type=int, default=10000, help="Save checkpoint every X updates steps.")
parser.add_argument(
        "--eval_all_checkpoints",
        action="store_true",
        help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number",
    )
parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available")
parser.add_argument(
        "--overwrite_output_dir", action="store_true", help="Overwrite the content of the output directory"
    )
parser.add_argument(
        "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets"
    )
parser.add_argument("--seed", type=int, default=1, help="random seed for initialization")

parser.add_argument(
        "--fp16",
        action="store_true",
        help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit",
    )
parser.add_argument(
        "--fp16_opt_level",
        type=str,
        default="O1",
        help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html",
    )
parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
parser.add_argument("--server_ip", type=str, default="", help="For distant debugging.")
parser.add_argument("--server_port", type=str, default="", help="For distant debugging.")

    # mean teacher
parser.add_argument('--mt', type = int, default = 0, help = 'mean teacher.')
parser.add_argument('--mt_updatefreq', type=int, default=1, help = 'mean teacher update frequency')
parser.add_argument('--mt_class', type=str, default="kl", help = 'mean teacher class, choices:[smart, prob, logit, kl(default), distill].')
parser.add_argument('--mt_lambda', type=float, default=1, help= "trade off parameter of the consistent loss.")
parser.add_argument('--mt_rampup', type=int, default=300, help="rampup iteration.")
parser.add_argument('--mt_alpha1', default=0.99, type=float, help="moving average parameter of mean teacher (for the exponential moving average).")
parser.add_argument('--mt_alpha2', default=0.995, type=float, help="moving average parameter of mean teacher (for the exponential moving average).")
parser.add_argument('--mt_beta', default=10, type=float, help="coefficient of mt_loss term.")
parser.add_argument('--mt_avg', default="exponential", type=str, help="moving average method, choices:[exponentail(default), simple, double_ema].")
parser.add_argument('--mt_loss_type', default="logits", type=str, help="subject to measure model difference, choices:[embeds, logits(default)].")

    # virtual adversarial training
parser.add_argument('--vat', type = int, default = 0, help = 'virtual adversarial training.')
parser.add_argument('--vat_eps', type = float, default = 1e-3, help = 'perturbation size for virtual adversarial training.')
parser.add_argument('--vat_lambda', type = float, default = 1, help = 'trade off parameter for virtual adversarial training.')
parser.add_argument('--vat_beta', type = float, default = 1, help = 'coefficient of the virtual adversarial training loss term.')
parser.add_argument('--vat_loss_type', default="logits", type=str, help="subject to measure model difference, choices = [embeds, logits(default)].")

    # Use data from weak.json
parser.add_argument('--load_weak', action="store_true", help = 'Load data from weak.json.')
parser.add_argument('--remove_labels_from_weak', action="store_true", help = 'Use data from weak.json, and remove their labels for semi-supervised learning')
parser.add_argument('--rep_train_against_weak', type = int, default = 1, help = 'Upsampling training data again weak data. Default: 1')

parser.add_argument('--wandb_name', type=str, default=None, help='Name of Wandb runs')
parser.add_argument('--data_type', type=str, default="str", help='Name of context level (e.g., sentence, document)')
parser.add_argument('--data_name', type=str, default=None, help='Name of dataset')


args = parser.parse_args("--train_dir ./data/bc5cdr/from_rawdata --eval_dir ./data/bc5cdr/from_rawdata --model_type roberta --model_name_or_path ./ConNER --output_dir ./output".split())

In [74]:
args

Namespace(train_dir='./data/bc5cdr/from_rawdata', eval_dir='./data/bc5cdr/from_rawdata', model_type='roberta', model_name_or_path='./ConNER', output_dir='./output', config_name='', tokenizer_name='', cache_dir='', max_seq_length=128, do_train=False, do_eval=False, do_predict=False, evaluate_during_training=False, do_lower_case=False, per_gpu_train_batch_size=8, per_gpu_eval_batch_size=8, gradient_accumulation_steps=1, learning_rate=5e-05, weight_decay=0.0, adam_epsilon=1e-08, adam_beta1=0.9, adam_beta2=0.98, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, warmup_steps=0, logging_steps=10000, save_steps=10000, eval_all_checkpoints=False, no_cuda=False, overwrite_output_dir=False, overwrite_cache=False, seed=1, fp16=False, fp16_opt_level='O1', local_rank=-1, server_ip='', server_port='', mt=0, mt_updatefreq=1, mt_class='kl', mt_lambda=1, mt_rampup=300, mt_alpha1=0.99, mt_alpha2=0.995, mt_beta=10, mt_avg='exponential', mt_loss_type='logits', vat=0, vat_eps=0.001, vat_lambda=1, vat_