In [1]:
! pip install transformers

import numpy as np
import pandas as pd

import torch
import sys
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler#, get_linear_schedule_with_warmup
#from pytorch_pretrained_bert import BertTokenizer, BertConfig
from sklearn.metrics import recall_score, precision_score
from tqdm import trange


from transformers import AutoTokenizer, AutoModel
from transformers import BertForSequenceClassification
from torch.optim import Adam
from transformers import get_linear_schedule_with_warmup, AdamW

Collecting transformers
  Downloading transformers-4.9.2-py3-none-any.whl (2.6 MB)
[K     |████████████████████████████████| 2.6 MB 7.5 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 41.7 MB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 37.5 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 33.7 MB/s 
Collecting huggingface-hub==0.0.12
  Downloading huggingface_hub-0.0.12-py3-none-any.whl (37 kB)
Installing collected packages: tokenizers, sacremoses, pyyaml, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninsta

In [2]:
from google.colab import drive
drive.mount('/content/drive')

! cp -r -f /content/drive/'My Drive'/msu/re_from_bioBert /content/
import sys

sys.path.append('/content/msu/re_from_bioBert/')
dir_name = '/content/re_from_bioBert/'

Mounted at /content/drive


In [3]:
def fit(model, x, y, n_epochs, lr, batch_size=32):
    masks = [[float(i != 0.0) for i in ii] for ii in x]
    
    t_inputs = torch.tensor(x)
    t_labels = torch.tensor(y)
    t_masks = torch.tensor(masks)

    train_data = TensorDataset(t_inputs, t_masks, t_labels)
    train_sampler = SequentialSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model.cuda()

    param_optimizer = list(model.named_parameters())
    no_decay  = ['bias', 'gamma', 'beta']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.0}
    ]
    
    optimizer = AdamW(
    optimizer_grouped_parameters,
    lr=lr,
    eps=1e-6)

    epochs = n_epochs
    max_grad_norm = 1.5

    # Total number of training steps is number of batches * number of epochs.
    total_steps = len(train_dataloader) * epochs

    # Create the learning rate scheduler.


    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0.1 * total_steps,
        num_training_steps=total_steps)
    
    model.train()
    for _ in trange(epochs, desc="Epoch"):
        total_loss = 0

        for step, batch in enumerate(train_dataloader):
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch
            model.zero_grad()
        
            outputs = model(b_input_ids, token_type_ids=None,
                                  attention_mask=b_input_mask, labels=b_labels)
        
            loss = outputs[0]
            loss.backward()
            total_loss += loss.item()

            #torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
            optimizer.step()
            scheduler.step()
        print(' Loss: ', total_loss)    


def predict(model, x, batch_size=32):
    masks = [[float(i != 0.0) for i in ii] for ii in x]

    t_inputs = torch.tensor(x)
    t_masks = torch.tensor(masks)

    test_data = TensorDataset(t_inputs, t_masks)
    test_sampler = SequentialSampler(test_data)
    test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)

    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    predictions = []

    for batch in test_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask = batch

        with torch.no_grad():
            outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)

        outputs = outputs[0].detach().cpu().numpy()
        
        batch_labels = np.argmax(outputs, axis=1).tolist()
        predictions += batch_labels
    return predictions

def get_data_from_df(df, tokenizer):
    lbls, raw_txt = df.lbl.values, df.txt.values
    txt = ['[CLS] ' + i + ' [SEP]' for i in raw_txt]
    res = []
    for sent in txt:
      tokens = tokenizer.tokenize(sent)
      tokens_ids = tokenizer.convert_tokens_to_ids(tokens)
      while len(tokens_ids) < MAX_LEN:
        tokens_ids.append(0)
      res.append(tokens_ids)
    return res, lbls
    

def cv_iteration(model, tokenizer, data_dir, cur_it, n_epochs, lr, batch_size=32):
    train_df = pd.read_csv(data_dir + '/'+ str(cur_it+1) + '/train.tsv', sep = '\t', names = ['txt', 'lbl'])
    train_tokens, train_lbls = get_data_from_df(train_df, tokenizer)
    train_lbls = [int(i) for i in train_lbls]
    
    test_df = pd.read_csv(data_dir + '/'+ str(cur_it+1) + '/test.tsv', sep = '\t', names = ['txt', 'lbl'])
    test_tokens, test_lbls = get_data_from_df(test_df, tokenizer)
    test_tokens, test_lbls = test_tokens[1:], test_lbls[1:]
    test_lbls = [int(i) for i in test_lbls]
    
    
    fit(model, train_tokens, train_lbls, n_epochs, lr, batch_size)
    preds = predict(model, test_tokens, batch_size)
    #print(np.bincount(np.array(train_tokens)), np.bincount(np.array(preds)))

    precision, recall = precision_score(test_lbls, preds), recall_score(test_lbls, preds)
    f1 = 2 * (precision * recall) / (precision + recall)
    return precision, recall, f1


def get_cv_scores(model_name, tokenizer, data_dir, lr,  n_epochs, batch_size=32, st_from=0):
    prescision_s, recall_s, f1_s = [], [], []
    for i in range(st_from, 10):
        model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
        p, r, f = cv_iteration(model, tokenizer, data_dir, i, n_epochs, lr, batch_size)
        print('P: ', p, 'R: ', r, 'F1: ', f)
        prescision_s.append(p)
        recall_s.append(r)
        f1_s.append(f)
    return sum(prescision_s) / len(prescision_s), sum(recall_s) / len(recall_s), sum(f1_s) / len(f1_s) 


In [None]:
MAX_LEN = 150

tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-v1.1")
model = "dmis-lab/biobert-v1.1"
data_dir = "/content/re_from_bioBert/GAD"
res = get_cv_scores(model, tokenizer, data_dir, 2e-5, 3, 32)
p, r, f = res
print('P: ', p, 'R: ', r, 'F1: ', f)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433286112.0, style=ProgressStyle(descri…




Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:02<04:04, 122.36s/it]

 Loss:  91.39673164486885


Epoch:  67%|██████▋   | 2/3 [04:09<02:03, 123.77s/it]

 Loss:  68.3507828116417


Epoch: 100%|██████████| 3/3 [06:17<00:00, 125.67s/it]

 Loss:  53.80413669347763





P:  0.7492795389048992 R:  0.9252669039145908 F1:  0.8280254777070065


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:15, 127.61s/it]

 Loss:  91.32923457026482


Epoch:  67%|██████▋   | 2/3 [04:15<02:07, 127.58s/it]

 Loss:  65.1553760021925


Epoch: 100%|██████████| 3/3 [06:22<00:00, 127.41s/it]

 Loss:  53.22057132422924





P:  0.7835365853658537 R:  0.9178571428571428 F1:  0.8453947368421053


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:14, 127.46s/it]

 Loss:  92.3953418135643


Epoch:  67%|██████▋   | 2/3 [04:14<02:07, 127.46s/it]

 Loss:  66.82543221116066


Epoch: 100%|██████████| 3/3 [06:22<00:00, 127.43s/it]

 Loss:  52.64888007938862





P:  0.788961038961039 R:  0.8678571428571429 F1:  0.8265306122448981


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:14, 127.44s/it]

 Loss:  91.7310399711132


Epoch:  67%|██████▋   | 2/3 [04:14<02:07, 127.34s/it]

 Loss:  66.5868114233017


Epoch: 100%|██████████| 3/3 [06:21<00:00, 127.31s/it]

 Loss:  51.39080773293972





P:  0.739612188365651 R:  0.9535714285714286 F1:  0.8330733229329175


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:15, 127.63s/it]

 Loss:  91.4558767080307


Epoch:  67%|██████▋   | 2/3 [04:14<02:07, 127.48s/it]

 Loss:  64.86458984017372


Epoch: 100%|██████████| 3/3 [06:22<00:00, 127.41s/it]

 Loss:  51.01508919149637





P:  0.7386018237082067 R:  0.8678571428571429 F1:  0.7980295566502463


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:15, 127.57s/it]

 Loss:  91.1013668179512


Epoch:  67%|██████▋   | 2/3 [04:14<02:07, 127.48s/it]

 Loss:  63.5058980435133


Epoch: 100%|██████████| 3/3 [06:22<00:00, 127.46s/it]

 Loss:  50.84156250953674





P:  0.7507418397626113 R:  0.9035714285714286 F1:  0.820097244732577


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:14, 127.48s/it]

 Loss:  90.09555000066757


Epoch:  67%|██████▋   | 2/3 [04:14<02:07, 127.39s/it]

 Loss:  64.50710928440094


Epoch: 100%|██████████| 3/3 [06:21<00:00, 127.32s/it]

 Loss:  51.09539006650448





P:  0.7363344051446945 R:  0.8178571428571428 F1:  0.7749576988155669


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:15, 127.62s/it]

 Loss:  93.0159969329834


Epoch:  67%|██████▋   | 2/3 [04:14<02:07, 127.51s/it]

 Loss:  64.93791165947914


Epoch: 100%|██████████| 3/3 [06:22<00:00, 127.49s/it]

 Loss:  51.85404919087887





P:  0.7734138972809668 R:  0.9142857142857143 F1:  0.8379705400981996


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:14, 127.33s/it]

 Loss:  90.28923550248146


Epoch:  67%|██████▋   | 2/3 [04:14<02:07, 127.41s/it]

 Loss:  66.6409080028534


Epoch: 100%|██████████| 3/3 [06:22<00:00, 127.39s/it]

 Loss:  50.316844791173935





P:  0.7230320699708455 R:  0.8857142857142857 F1:  0.7961476725521668


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:15, 127.53s/it]

 Loss:  90.31613659858704


Epoch:  67%|██████▋   | 2/3 [04:15<02:07, 127.59s/it]

 Loss:  66.50431564450264


Epoch: 100%|██████████| 3/3 [06:22<00:00, 127.46s/it]

 Loss:  52.94078643620014





P:  0.7155172413793104 R:  0.8892857142857142 F1:  0.7929936305732483
P:  0.7499030628844078 R:  0.8943124046771734 F1:  0.8153220493148933


In [4]:
# st
MAX_LEN = 150

tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-v1.1")
model = "dmis-lab/biobert-v1.1"
data_dir = "/content/re_from_bioBert/GAD"
res = get_cv_scores(model, tokenizer, data_dir, 2e-5, 5, 16)
p, r, f = res
print('P: ', p, 'R: ', r, 'F1: ', f)

Downloading:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/462 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/433M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  20%|██        | 1/5 [01:22<05:28, 82.05s/it]

 Loss:  176.32395718991756


Epoch:  40%|████      | 2/5 [02:43<04:05, 81.98s/it]

 Loss:  120.85679131746292


Epoch:  60%|██████    | 3/5 [04:05<02:43, 81.95s/it]

 Loss:  86.35665542632341


Epoch:  80%|████████  | 4/5 [05:27<01:21, 81.94s/it]

 Loss:  57.66482341475785


Epoch: 100%|██████████| 5/5 [06:49<00:00, 81.96s/it]

 Loss:  41.220343900844455





P:  0.782608695652174 R:  0.896797153024911 F1:  0.835820895522388


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  20%|██        | 1/5 [01:22<05:28, 82.04s/it]

 Loss:  180.4209145605564


Epoch:  40%|████      | 2/5 [02:44<04:06, 82.01s/it]

 Loss:  127.1702843979001


Epoch:  60%|██████    | 3/5 [04:05<02:43, 81.97s/it]

 Loss:  93.14453118667006


Epoch:  80%|████████  | 4/5 [05:27<01:21, 81.99s/it]

 Loss:  66.3264816943556


Epoch: 100%|██████████| 5/5 [06:50<00:00, 82.02s/it]

 Loss:  51.04550935141742





P:  0.7716049382716049 R:  0.8928571428571429 F1:  0.8278145695364238


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  20%|██        | 1/5 [01:22<05:28, 82.09s/it]

 Loss:  178.9402416497469


Epoch:  40%|████      | 2/5 [02:44<04:06, 82.10s/it]

 Loss:  124.95850194990635


Epoch:  60%|██████    | 3/5 [04:06<02:44, 82.08s/it]

 Loss:  91.67490712180734


Epoch:  80%|████████  | 4/5 [05:28<01:22, 82.09s/it]

 Loss:  68.26392049528658


Epoch: 100%|██████████| 5/5 [06:50<00:00, 82.08s/it]

 Loss:  51.353994777426124





P:  0.802675585284281 R:  0.8571428571428571 F1:  0.8290155440414508


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  20%|██        | 1/5 [01:22<05:28, 82.03s/it]

 Loss:  184.43567755818367


Epoch:  40%|████      | 2/5 [02:44<04:06, 82.04s/it]

 Loss:  136.57060995697975


Epoch:  60%|██████    | 3/5 [04:06<02:44, 82.05s/it]

 Loss:  98.22047037258744


Epoch:  80%|████████  | 4/5 [05:28<01:22, 82.03s/it]

 Loss:  70.72862117923796


Epoch: 100%|██████████| 5/5 [06:50<00:00, 82.04s/it]

 Loss:  53.94600309431553





P:  0.7832817337461301 R:  0.9035714285714286 F1:  0.8391376451077944


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  20%|██        | 1/5 [01:22<05:28, 82.10s/it]

 Loss:  176.38783939182758


Epoch:  40%|████      | 2/5 [02:44<04:06, 82.04s/it]

 Loss:  127.27927298098803


Epoch:  60%|██████    | 3/5 [04:06<02:44, 82.04s/it]

 Loss:  93.88187519833446


Epoch:  80%|████████  | 4/5 [05:28<01:22, 82.02s/it]

 Loss:  68.77377816475928


Epoch: 100%|██████████| 5/5 [06:50<00:00, 82.04s/it]

 Loss:  53.00340067408979





P:  0.7740863787375415 R:  0.8321428571428572 F1:  0.8020654044750429


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  20%|██        | 1/5 [01:22<05:28, 82.06s/it]

 Loss:  184.52356451749802


Epoch:  40%|████      | 2/5 [02:44<04:06, 82.02s/it]

 Loss:  125.17060396075249


Epoch:  60%|██████    | 3/5 [04:06<02:44, 82.02s/it]

 Loss:  92.56694284081459


Epoch:  80%|████████  | 4/5 [05:28<01:22, 82.01s/it]

 Loss:  68.60872922092676


Epoch: 100%|██████████| 5/5 [06:49<00:00, 81.99s/it]

 Loss:  52.74399168230593





P:  0.7548387096774194 R:  0.8357142857142857 F1:  0.7932203389830508


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  20%|██        | 1/5 [01:21<05:27, 81.94s/it]

 Loss:  180.73018236458302


Epoch:  40%|████      | 2/5 [02:43<04:05, 81.97s/it]

 Loss:  123.65838228166103


Epoch:  60%|██████    | 3/5 [04:05<02:43, 81.95s/it]

 Loss:  86.01882552169263


Epoch:  80%|████████  | 4/5 [05:27<01:21, 81.95s/it]

 Loss:  63.55241402890533


Epoch: 100%|██████████| 5/5 [06:49<00:00, 81.95s/it]

 Loss:  44.19360501598567





P:  0.7551020408163265 R:  0.7928571428571428 F1:  0.7735191637630662


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  20%|██        | 1/5 [01:22<05:28, 82.02s/it]

 Loss:  181.5578817129135


Epoch:  40%|████      | 2/5 [02:43<04:05, 81.98s/it]

 Loss:  128.65678361058235


Epoch:  60%|██████    | 3/5 [04:05<02:43, 81.94s/it]

 Loss:  89.10504324361682


Epoch:  80%|████████  | 4/5 [05:27<01:21, 81.95s/it]

 Loss:  60.94657200574875


Epoch: 100%|██████████| 5/5 [06:49<00:00, 81.95s/it]

 Loss:  44.76673574373126





P:  0.7767584097859327 R:  0.9071428571428571 F1:  0.8369028006589786


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  20%|██        | 1/5 [01:21<05:27, 81.98s/it]

 Loss:  178.11184349656105


Epoch:  40%|████      | 2/5 [02:43<04:05, 82.00s/it]

 Loss:  124.37874429672956


Epoch:  60%|██████    | 3/5 [04:05<02:43, 81.97s/it]

 Loss:  93.60565999895334


Epoch:  80%|████████  | 4/5 [05:27<01:21, 81.95s/it]

 Loss:  68.57284789159894


Epoch: 100%|██████████| 5/5 [06:49<00:00, 81.95s/it]

 Loss:  53.41627797856927





P:  0.77491961414791 R:  0.8607142857142858 F1:  0.8155668358714044


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  20%|██        | 1/5 [01:21<05:27, 81.94s/it]

 Loss:  178.43343782424927


Epoch:  40%|████      | 2/5 [02:43<04:05, 81.95s/it]

 Loss:  125.47255045175552


Epoch:  60%|██████    | 3/5 [04:05<02:43, 81.95s/it]

 Loss:  92.59125239402056


Epoch:  80%|████████  | 4/5 [05:27<01:21, 81.94s/it]

 Loss:  70.54833816550672


Epoch: 100%|██████████| 5/5 [06:49<00:00, 81.94s/it]

 Loss:  53.16209271363914





P:  0.75 R:  0.8571428571428571 F1:  0.7999999999999999
P:  0.772587610611932 R:  0.8636082867310625 F1:  0.81530631979596


In [None]:
MAX_LEN = 150

tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-large-cased-v1.1")
model = "dmis-lab/biobert-large-cased-v1.1"
data_dir = "/content/re_from_bioBert/GAD"
res = get_cv_scores(model, tokenizer, data_dir, 2e-5, 5, 16)
p, r, f = res
print('P: ', p, 'R: ', r, 'F1: ', f)

Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  176.25494734942913


Epoch:  40%|████      | 2/5 [07:55<11:53, 237.93s/it]

 Loss:  129.74050809070468


Epoch:  60%|██████    | 3/5 [11:53<07:55, 237.90s/it]

 Loss:  93.455983877182


Epoch:  80%|████████  | 4/5 [15:51<03:57, 237.85s/it]

 Loss:  59.803071587346494


Epoch: 100%|██████████| 5/5 [19:49<00:00, 237.89s/it]

 Loss:  40.814359695184976





P:  0.7954545454545454 R:  0.8718861209964412 F1:  0.831918505942275


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  190.2874919772148


Epoch:  40%|████      | 2/5 [07:56<11:54, 238.10s/it]

 Loss:  142.99986818432808


Epoch:  60%|██████    | 3/5 [11:54<07:56, 238.16s/it]

 Loss:  107.64651624113321


Epoch:  80%|████████  | 4/5 [15:52<03:58, 238.21s/it]

 Loss:  73.6571749560535


Epoch: 100%|██████████| 5/5 [19:51<00:00, 238.23s/it]

 Loss:  47.186623906716704





P:  0.7781456953642384 R:  0.8392857142857143 F1:  0.8075601374570447


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  182.04336892068386


Epoch:  40%|████      | 2/5 [07:56<11:55, 238.46s/it]

 Loss:  145.65606516599655


Epoch:  60%|██████    | 3/5 [11:55<07:56, 238.49s/it]

 Loss:  113.03258479759097


Epoch:  80%|████████  | 4/5 [15:54<03:58, 238.54s/it]

 Loss:  79.63784670084715


Epoch: 100%|██████████| 5/5 [19:52<00:00, 238.55s/it]

 Loss:  52.44506885576993





P:  0.8014705882352942 R:  0.7785714285714286 F1:  0.7898550724637681


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  177.3434747159481


Epoch:  40%|████      | 2/5 [07:57<11:56, 238.68s/it]

 Loss:  130.87819084525108


Epoch:  60%|██████    | 3/5 [11:56<07:57, 238.70s/it]

 Loss:  95.72817789390683


Epoch:  80%|████████  | 4/5 [15:54<03:58, 238.76s/it]

 Loss:  64.88852996379137


Epoch: 100%|██████████| 5/5 [19:53<00:00, 238.74s/it]

 Loss:  44.62468160595745





P:  0.8288590604026845 R:  0.8821428571428571 F1:  0.8546712802768165


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  179.05990405380726


Epoch:  40%|████      | 2/5 [07:57<11:56, 238.82s/it]

 Loss:  131.128272280097


Epoch:  60%|██████    | 3/5 [11:56<07:57, 238.81s/it]

 Loss:  98.06193000078201


Epoch:  80%|████████  | 4/5 [15:55<03:58, 238.81s/it]

 Loss:  70.879180053249


Epoch: 100%|██████████| 5/5 [19:54<00:00, 238.81s/it]

 Loss:  48.497114586643875





P:  0.7896551724137931 R:  0.8178571428571428 F1:  0.8035087719298246


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  202.87380266189575


Epoch:  40%|████      | 2/5 [07:57<11:56, 238.84s/it]

 Loss:  160.95099292695522


Epoch:  60%|██████    | 3/5 [11:56<07:57, 238.81s/it]

 Loss:  128.78586754202843


Epoch:  80%|████████  | 4/5 [15:55<03:58, 238.82s/it]

 Loss:  95.28491580486298


Epoch: 100%|██████████| 5/5 [19:54<00:00, 238.83s/it]

 Loss:  67.2579286545515





P:  0.7304347826086957 R:  0.9 F1:  0.8064


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  177.69927996397018


Epoch:  40%|████      | 2/5 [07:57<11:56, 238.81s/it]

 Loss:  128.2086355164647


Epoch:  60%|██████    | 3/5 [11:56<07:57, 238.82s/it]

 Loss:  90.54151129722595


Epoch:  80%|████████  | 4/5 [15:55<03:58, 238.87s/it]

 Loss:  60.523281555622816


Epoch: 100%|██████████| 5/5 [19:54<00:00, 238.84s/it]

 Loss:  39.10336833447218





P:  0.7671232876712328 R:  0.8 F1:  0.7832167832167832


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  208.26389780640602


Epoch:  40%|████      | 2/5 [07:57<11:56, 238.84s/it]

 Loss:  211.67049485445023


Epoch:  60%|██████    | 3/5 [11:56<07:57, 238.83s/it]

 Loss:  211.03332245349884


Epoch:  80%|████████  | 4/5 [15:55<03:58, 238.84s/it]

 Loss:  209.98034101724625


Epoch: 100%|██████████| 5/5 [19:54<00:00, 238.84s/it]

 Loss:  209.65147268772125





P:  0.525328330206379 R:  1.0 F1:  0.6888068880688807


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  178.43371632695198


Epoch:  40%|████      | 2/5 [07:57<11:56, 238.81s/it]

 Loss:  139.2711308375001


Epoch:  60%|██████    | 3/5 [11:56<07:57, 238.83s/it]

 Loss:  102.84789847955108


Epoch:  80%|████████  | 4/5 [15:55<03:58, 238.85s/it]

 Loss:  73.6538950484246


Epoch: 100%|██████████| 5/5 [19:54<00:00, 238.84s/it]

 Loss:  50.01313385087997





P:  0.7547169811320755 R:  0.8571428571428571 F1:  0.8026755852842811


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  198.82134228944778


Epoch:  40%|████      | 2/5 [07:57<11:56, 238.86s/it]

 Loss:  211.2990289926529


Epoch:  60%|██████    | 3/5 [11:56<07:57, 238.87s/it]

 Loss:  210.0167389512062


Epoch:  80%|████████  | 4/5 [15:55<03:58, 238.88s/it]

 Loss:  209.78010261058807


Epoch: 100%|██████████| 5/5 [19:54<00:00, 238.88s/it]

 Loss:  208.9656002521515





P:  0.5263157894736842 R:  1.0 F1:  0.6896551724137931
P:  0.7297504232962622 R:  0.8746886120996441 F1:  0.7858268197053467


In [None]:
MAX_LEN = 150

model = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext"
tokenizer = AutoTokenizer.from_pretrained(model)
data_dir = "/content/re_from_bioBert/GAD"
res = get_cv_scores(model, tokenizer, data_dir, 2e-5, 5, 16)
p, r, f = res
print('P: ', p, 'R: ', r, 'F1: ', f)

Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

 Loss:  177.69122552871704


Epoch:  40%|████      | 2/5 [02:42<04:03, 81.18s/it]

 Loss:  125.12045377492905


Epoch:  60%|██████    | 3/5 [04:03<02:42, 81.18s/it]

 Loss:  93.97316182032228


Epoch:  80%|████████  | 4/5 [05:24<01:21, 81.18s/it]

 Loss:  72.40580599196255


Epoch: 100%|██████████| 5/5 [06:45<00:00, 81.18s/it]

 Loss:  53.53787698969245





P:  0.7766990291262136 R:  0.8540925266903915 F1:  0.8135593220338982


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

 Loss:  179.9779968559742


Epoch:  40%|████      | 2/5 [02:42<04:03, 81.18s/it]

 Loss:  128.7628919184208


Epoch:  60%|██████    | 3/5 [04:03<02:42, 81.18s/it]

 Loss:  100.03299463167787


Epoch:  80%|████████  | 4/5 [05:24<01:21, 81.17s/it]

 Loss:  76.56411984562874


Epoch: 100%|██████████| 5/5 [06:45<00:00, 81.17s/it]

 Loss:  61.29839794896543





P:  0.7692307692307693 R:  0.8928571428571429 F1:  0.8264462809917357


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

 Loss:  178.84841883182526


Epoch:  40%|████      | 2/5 [02:42<04:03, 81.17s/it]

 Loss:  132.09764552116394


Epoch:  60%|██████    | 3/5 [04:03<02:42, 81.18s/it]

 Loss:  94.74046341329813


Epoch:  80%|████████  | 4/5 [05:24<01:21, 81.18s/it]

 Loss:  70.12284239009023


Epoch: 100%|██████████| 5/5 [06:45<00:00, 81.18s/it]

 Loss:  52.775863917544484





P:  0.7684887459807074 R:  0.8535714285714285 F1:  0.8087986463620982


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

 Loss:  175.45140942931175


Epoch:  40%|████      | 2/5 [02:42<04:03, 81.16s/it]

 Loss:  123.89612405747175


Epoch:  60%|██████    | 3/5 [04:03<02:42, 81.16s/it]

 Loss:  91.39078196510673


Epoch:  80%|████████  | 4/5 [05:24<01:21, 81.15s/it]

 Loss:  68.2712718937546


Epoch: 100%|██████████| 5/5 [06:45<00:00, 81.15s/it]

 Loss:  53.303023900836706





P:  0.7719298245614035 R:  0.9428571428571428 F1:  0.8488745980707395


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

 Loss:  181.52083586156368


Epoch:  40%|████      | 2/5 [02:42<04:03, 81.17s/it]

 Loss:  134.7963247820735


Epoch:  60%|██████    | 3/5 [04:03<02:42, 81.17s/it]

 Loss:  103.33657451719046


Epoch:  80%|████████  | 4/5 [05:24<01:21, 81.16s/it]

 Loss:  77.7221940420568


Epoch: 100%|██████████| 5/5 [06:45<00:00, 81.17s/it]

 Loss:  61.52654386870563





P:  0.780952380952381 R:  0.8785714285714286 F1:  0.8268907563025211


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

 Loss:  184.06574335694313


Epoch:  40%|████      | 2/5 [02:42<04:03, 81.15s/it]

 Loss:  132.027650937438


Epoch:  60%|██████    | 3/5 [04:03<02:42, 81.14s/it]

 Loss:  96.04499527439475


Epoch:  80%|████████  | 4/5 [05:24<01:21, 81.15s/it]

 Loss:  70.65857911482453


Epoch: 100%|██████████| 5/5 [06:45<00:00, 81.15s/it]

 Loss:  57.04331450164318





P:  0.7784810126582279 R:  0.8785714285714286 F1:  0.825503355704698


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

 Loss:  183.41467152535915


Epoch:  40%|████      | 2/5 [02:42<04:03, 81.16s/it]

 Loss:  123.75978025048971


Epoch:  60%|██████    | 3/5 [04:03<02:42, 81.16s/it]

 Loss:  89.37801592797041


Epoch:  80%|████████  | 4/5 [05:24<01:21, 81.15s/it]

 Loss:  66.68946928344667


Epoch: 100%|██████████| 5/5 [06:45<00:00, 81.15s/it]

 Loss:  50.46974304597825





P:  0.7656765676567657 R:  0.8285714285714286 F1:  0.7958833619210979


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

 Loss:  180.46789702773094


Epoch:  40%|████      | 2/5 [02:42<04:03, 81.15s/it]

 Loss:  127.33587038516998


Epoch:  60%|██████    | 3/5 [04:03<02:42, 81.14s/it]

 Loss:  98.28983097523451


Epoch:  80%|████████  | 4/5 [05:24<01:21, 81.14s/it]

 Loss:  72.70795572176576


Epoch: 100%|██████████| 5/5 [06:45<00:00, 81.15s/it]

 Loss:  56.207456255331635





P:  0.7522388059701492 R:  0.9 F1:  0.8195121951219512


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

 Loss:  178.92142802476883


Epoch:  40%|████      | 2/5 [02:42<04:03, 81.15s/it]

 Loss:  126.3027770742774


Epoch:  60%|██████    | 3/5 [04:03<02:42, 81.15s/it]

 Loss:  96.20940491184592


Epoch:  80%|████████  | 4/5 [05:24<01:21, 81.15s/it]

 Loss:  72.49244609102607


Epoch: 100%|██████████| 5/5 [06:45<00:00, 81.15s/it]

 Loss:  57.56838612817228





P:  0.7591463414634146 R:  0.8892857142857142 F1:  0.8190789473684209


Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

 Loss:  176.32146929204464


Epoch:  40%|████      | 2/5 [02:42<04:03, 81.26s/it]

 Loss:  124.81810785084963


Epoch:  60%|██████    | 3/5 [04:03<02:42, 81.21s/it]

 Loss:  95.07664704322815


Epoch:  80%|████████  | 4/5 [05:24<01:21, 81.19s/it]

 Loss:  74.10094687342644


Epoch: 100%|██████████| 5/5 [06:46<00:00, 81.21s/it]

 Loss:  59.3399417065084





P:  0.7360703812316716 R:  0.8964285714285715 F1:  0.8083735909822866
P:  0.7658913858831704 R:  0.8814806812404676 F1:  0.8192921054859447
