In [1]:
from transformers import AutoConfig, AutoTokenizer, AutoModel, BertForSequenceClassification
from data.acronymDataset import AcronymDataset
from evaluate import load
import numpy as np
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext'
config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=config.max_position_embeddings)
pre_trained_model = AutoModel.from_pretrained(model_name).to('mps')

Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Test with the data

In [3]:
# %load_ext autoreload
# %autoreload 2
file_path = 'data/acronym_data.txt'
dataset = AcronymDataset(file_path=file_path, tokenizer=tokenizer)
data = dataset.data

[INFO] Dataset already been loaded, using the cached dataset..


In [4]:
dataset.preprocss_dataset()

                                                                   

In [5]:
train_loader, val_loader = dataset.get_dataloaders(train_size=0.9, batch_size=16)

In [6]:
batch = next(iter(train_loader)).to('mps')

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [14]:
%load_ext autoreload
%autoreload 2
from models.multiHeadModel import MultiHeadModel
from models.heads import ClassificationHead

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
in_features = config.hidden_size
two_labels_head = ClassificationHead(in_features=in_features, out_features=2).to('mps')
four_labels_head = ClassificationHead(in_features=in_features, out_features=4)

classifiers = {
    "two_labels_head": two_labels_head,
    "four_labels_head": four_labels_head
}

In [17]:
multi_head_model = MultiHeadModel(pre_trained_model, classifiers)
multi_head_model.eval()

MultiHeadModel(
  (base_model): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementw

In [18]:

with torch.no_grad():
    output = multi_head_model(batch, "two_labels_head")

output

  x = self.softmax(x)


tensor([[0.5643, 0.4357],
        [0.4686, 0.5314],
        [0.4880, 0.5120],
        [0.4991, 0.5009],
        [0.5859, 0.4141],
        [0.5613, 0.4387],
        [0.5346, 0.4654],
        [0.5734, 0.4266],
        [0.5320, 0.4680],
        [0.5491, 0.4509],
        [0.6242, 0.3758],
        [0.4906, 0.5094],
        [0.4749, 0.5251],
        [0.6251, 0.3749],
        [0.5265, 0.4735],
        [0.4948, 0.5052]], device='mps:0')

In [19]:
metric = load("accuracy")
labels = batch['labels']
predictions = np.argmax(output.cpu().numpy(), axis=-1)
res = metric.compute(predictions=predictions, references=labels)

predictions

array([0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1])

In [9]:
output = multi_head_model(batch, 1)
output

tensor([[0.5332, 0.4287, 0.5225, 0.5521]], grad_fn=<SigmoidBackward0>)