In [None]:
! pip install transformers

import numpy as np
import pandas as pd

import torch
import sys
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler#, get_linear_schedule_with_warmup
#from pytorch_pretrained_bert import BertTokenizer, BertConfig
from sklearn.metrics import recall_score, precision_score
from tqdm import trange


from transformers import AutoTokenizer, AutoModel
from transformers import BertForSequenceClassification
from torch.optim import Adam
from transformers import get_linear_schedule_with_warmup, AdamW

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/50/0c/7d5950fcd80b029be0a8891727ba21e0cd27692c407c51261c3c921f6da3/transformers-4.1.1-py3-none-any.whl (1.5MB)
[K     |████████████████████████████████| 1.5MB 12.8MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |████████████████████████████████| 890kB 57.4MB/s 
Collecting tokenizers==0.9.4
[?25l  Downloading https://files.pythonhosted.org/packages/0f/1c/e789a8b12e28be5bc1ce2156cf87cb522b379be9cadc7ad8091a4cc107c4/tokenizers-0.9.4-cp36-cp36m-manylinux2010_x86_64.whl (2.9MB)
[K     |████████████████████████████████| 2.9MB 57.8MB/s 
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893261 sha256=b872e71e9029b27adf

In [None]:
from google.colab import drive
drive.mount('/content/drive')

! cp -r -f /content/drive/'My Drive'/msu/re_from_bioBert /content/
import sys

sys.path.append('/content/msu/re_from_bioBert/')
dir_name = '/content/re_from_bioBert/'

Mounted at /content/drive


In [None]:
def fit(model, x, y, n_epochs, lr, batch_size=32):
    masks = [[float(i != 0.0) for i in ii] for ii in x]
    
    t_inputs = torch.tensor(x)
    t_labels = torch.tensor(y)
    t_masks = torch.tensor(masks)

    train_data = TensorDataset(t_inputs, t_masks, t_labels)
    train_sampler = SequentialSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model.cuda()

    param_optimizer = list(model.named_parameters())
    no_decay  = ['bias', 'gamma', 'beta']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.0}
    ]
    
    optimizer = AdamW(
    optimizer_grouped_parameters,
    lr=lr,
    eps=1e-6)

    epochs = n_epochs
    max_grad_norm = 1.5

    # Total number of training steps is number of batches * number of epochs.
    total_steps = len(train_dataloader) * epochs

    # Create the learning rate scheduler.


    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0.1 * total_steps,
        num_training_steps=total_steps)
    
    model.train()
    for _ in trange(epochs, desc="Epoch"):
        total_loss = 0

        for step, batch in enumerate(train_dataloader):
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch
            model.zero_grad()
        
            outputs = model(b_input_ids, token_type_ids=None,
                                  attention_mask=b_input_mask, labels=b_labels)
        
            loss = outputs[0]
            loss.backward()
            total_loss += loss.item()

            #torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
            optimizer.step()
            scheduler.step()
        print(' Loss: ', total_loss)    


def predict(model, x, batch_size=32):
    masks = [[float(i != 0.0) for i in ii] for ii in x]

    t_inputs = torch.tensor(x)
    t_masks = torch.tensor(masks)

    test_data = TensorDataset(t_inputs, t_masks)
    test_sampler = SequentialSampler(test_data)
    test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)

    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    predictions = []

    for batch in test_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask = batch

        with torch.no_grad():
            outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)

        outputs = outputs[0].detach().cpu().numpy()
        
        batch_labels = np.argmax(outputs, axis=1).tolist()
        predictions += batch_labels
    return predictions

def get_data_from_df(df, tokenizer):
    lbls, raw_txt = df.lbl.values, df.txt.values
    txt = ['[CLS] ' + i + ' [SEP]' for i in raw_txt]
    res = []
    for sent in txt:
      tokens = tokenizer.tokenize(sent)
      tokens_ids = tokenizer.convert_tokens_to_ids(tokens)
      while len(tokens_ids) < MAX_LEN:
        tokens_ids.append(0)
      res.append(tokens_ids)
    return res, lbls
    

def cv_iteration(model, tokenizer, data_dir, cur_it, n_epochs, lr, batch_size=32):
    train_df = pd.read_csv(data_dir + '/'+ str(cur_it+1) + '/train.tsv', sep = '\t', names = ['txt', 'lbl'])
    train_tokens, train_lbls = get_data_from_df(train_df, tokenizer)
    train_lbls = [int(i) for i in train_lbls]
    
    test_df = pd.read_csv(data_dir + '/'+ str(cur_it+1) + '/test.tsv', sep = '\t', names = ['txt', 'lbl'])
    test_tokens, test_lbls = get_data_from_df(test_df, tokenizer)
    test_tokens, test_lbls = test_tokens[1:], test_lbls[1:]
    test_lbls = [int(i) for i in test_lbls]
    
    
    fit(model, train_tokens, train_lbls, n_epochs, lr, batch_size)
    preds = predict(model, test_tokens, batch_size)
    #print(np.bincount(np.array(train_tokens)), np.bincount(np.array(preds)))

    precision, recall = precision_score(test_lbls, preds), recall_score(test_lbls, preds)
    f1 = 2 * (precision * recall) / (precision + recall)
    return precision, recall, f1


def get_cv_scores(model_name, tokenizer, data_dir, lr,  n_epochs, batch_size=32, st_from=0):
    prescision_s, recall_s, f1_s = [], [], []
    for i in range(st_from, 10):
        model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
        p, r, f = cv_iteration(model, tokenizer, data_dir, i, n_epochs, lr, batch_size)
        print('P: ', p, 'R: ', r, 'F1: ', f)
        prescision_s.append(p)
        recall_s.append(r)
        f1_s.append(f)
    return sum(prescision_s) / len(prescision_s), sum(recall_s) / len(recall_s), sum(f1_s) / len(f1_s) 


In [None]:
MAX_LEN = 150

tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-v1.1")
model = "dmis-lab/biobert-v1.1"
data_dir = "/content/re_from_bioBert/GAD"
res = get_cv_scores(model, tokenizer, data_dir, 2e-5, 3, 32)
p, r, f = res
print('P: ', p, 'R: ', r, 'F1: ', f)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433286112.0, style=ProgressStyle(descri…




Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:02<04:04, 122.36s/it]

 Loss:  91.39673164486885


Epoch:  67%|██████▋   | 2/3 [04:09<02:03, 123.77s/it]

 Loss:  68.3507828116417


Epoch: 100%|██████████| 3/3 [06:17<00:00, 125.67s/it]

 Loss:  53.80413669347763





P:  0.7492795389048992 R:  0.9252669039145908 F1:  0.8280254777070065


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:15, 127.61s/it]

 Loss:  91.32923457026482


Epoch:  67%|██████▋   | 2/3 [04:15<02:07, 127.58s/it]

 Loss:  65.1553760021925


Epoch: 100%|██████████| 3/3 [06:22<00:00, 127.41s/it]

 Loss:  53.22057132422924





P:  0.7835365853658537 R:  0.9178571428571428 F1:  0.8453947368421053


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:14, 127.46s/it]

 Loss:  92.3953418135643


Epoch:  67%|██████▋   | 2/3 [04:14<02:07, 127.46s/it]

 Loss:  66.82543221116066


Epoch: 100%|██████████| 3/3 [06:22<00:00, 127.43s/it]

 Loss:  52.64888007938862





P:  0.788961038961039 R:  0.8678571428571429 F1:  0.8265306122448981


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:14, 127.44s/it]

 Loss:  91.7310399711132


Epoch:  67%|██████▋   | 2/3 [04:14<02:07, 127.34s/it]

 Loss:  66.5868114233017


Epoch: 100%|██████████| 3/3 [06:21<00:00, 127.31s/it]

 Loss:  51.39080773293972





P:  0.739612188365651 R:  0.9535714285714286 F1:  0.8330733229329175


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:15, 127.63s/it]

 Loss:  91.4558767080307


Epoch:  67%|██████▋   | 2/3 [04:14<02:07, 127.48s/it]

 Loss:  64.86458984017372


Epoch: 100%|██████████| 3/3 [06:22<00:00, 127.41s/it]

 Loss:  51.01508919149637





P:  0.7386018237082067 R:  0.8678571428571429 F1:  0.7980295566502463


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:15, 127.57s/it]

 Loss:  91.1013668179512


Epoch:  67%|██████▋   | 2/3 [04:14<02:07, 127.48s/it]

 Loss:  63.5058980435133


Epoch: 100%|██████████| 3/3 [06:22<00:00, 127.46s/it]

 Loss:  50.84156250953674





P:  0.7507418397626113 R:  0.9035714285714286 F1:  0.820097244732577


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:14, 127.48s/it]

 Loss:  90.09555000066757


Epoch:  67%|██████▋   | 2/3 [04:14<02:07, 127.39s/it]

 Loss:  64.50710928440094


Epoch: 100%|██████████| 3/3 [06:21<00:00, 127.32s/it]

 Loss:  51.09539006650448





P:  0.7363344051446945 R:  0.8178571428571428 F1:  0.7749576988155669


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:15, 127.62s/it]

 Loss:  93.0159969329834


Epoch:  67%|██████▋   | 2/3 [04:14<02:07, 127.51s/it]

 Loss:  64.93791165947914


Epoch: 100%|██████████| 3/3 [06:22<00:00, 127.49s/it]

 Loss:  51.85404919087887





P:  0.7734138972809668 R:  0.9142857142857143 F1:  0.8379705400981996


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:14, 127.33s/it]

 Loss:  90.28923550248146


Epoch:  67%|██████▋   | 2/3 [04:14<02:07, 127.41s/it]

 Loss:  66.6409080028534


Epoch: 100%|██████████| 3/3 [06:22<00:00, 127.39s/it]

 Loss:  50.316844791173935





P:  0.7230320699708455 R:  0.8857142857142857 F1:  0.7961476725521668


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [02:07<04:15, 127.53s/it]

 Loss:  90.31613659858704


Epoch:  67%|██████▋   | 2/3 [04:15<02:07, 127.59s/it]

 Loss:  66.50431564450264


Epoch: 100%|██████████| 3/3 [06:22<00:00, 127.46s/it]

 Loss:  52.94078643620014





P:  0.7155172413793104 R:  0.8892857142857142 F1:  0.7929936305732483
P:  0.7499030628844078 R:  0.8943124046771734 F1:  0.8153220493148933


In [None]:
MAX_LEN = 260

tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-v1.1")
model = "dmis-lab/biobert-v1.1"
data_dir = "/content/re_from_bioBert/euadr"
res = get_cv_scores(model, tokenizer, data_dir, 3e-5, 3, 32)
p, r, f = res
print('P: ', p, 'R: ', r, 'F1: ', f)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [00:15<00:30, 15.33s/it]

 Loss:  5.956295996904373


Epoch:  67%|██████▋   | 2/3 [00:31<00:15, 15.49s/it]

 Loss:  5.452409356832504


Epoch: 100%|██████████| 3/3 [00:46<00:00, 15.47s/it]

 Loss:  5.295443892478943





P:  0.7297297297297297 R:  1.0 F1:  0.8437499999999999


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [00:14<00:29, 14.75s/it]

 Loss:  7.623976111412048


Epoch:  67%|██████▋   | 2/3 [00:29<00:14, 14.83s/it]

 Loss:  5.651008248329163


Epoch: 100%|██████████| 3/3 [00:45<00:00, 15.02s/it]

 Loss:  5.29548379778862





P:  0.7297297297297297 R:  1.0 F1:  0.8437499999999999


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [00:15<00:30, 15.05s/it]

 Loss:  6.167896956205368


Epoch:  67%|██████▋   | 2/3 [00:30<00:15, 15.07s/it]

 Loss:  5.532290279865265


Epoch: 100%|██████████| 3/3 [00:45<00:00, 15.10s/it]

 Loss:  5.10773891210556





P:  0.7222222222222222 R:  1.0 F1:  0.8387096774193548


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [00:15<00:30, 15.15s/it]

 Loss:  6.714100807905197


Epoch:  67%|██████▋   | 2/3 [00:30<00:15, 15.18s/it]

 Loss:  5.233986854553223


Epoch: 100%|██████████| 3/3 [00:45<00:00, 15.21s/it]

 Loss:  4.918514668941498





P:  0.7272727272727273 R:  0.9230769230769231 F1:  0.8135593220338984


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [00:14<00:29, 14.99s/it]

 Loss:  5.753866255283356


Epoch:  67%|██████▋   | 2/3 [00:30<00:15, 15.05s/it]

 Loss:  5.493863821029663


Epoch: 100%|██████████| 3/3 [00:45<00:00, 15.12s/it]

 Loss:  5.011008769273758





P:  1.0 R:  0.7692307692307693 F1:  0.8695652173913044


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [00:15<00:30, 15.04s/it]

 Loss:  6.048984944820404


Epoch:  67%|██████▋   | 2/3 [00:30<00:15, 15.08s/it]

 Loss:  5.523903876543045


Epoch: 100%|██████████| 3/3 [00:45<00:00, 15.14s/it]

 Loss:  5.276842087507248





P:  0.7878787878787878 R:  1.0 F1:  0.8813559322033898


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [00:15<00:30, 15.09s/it]

 Loss:  5.8298549354076385


Epoch:  67%|██████▋   | 2/3 [00:30<00:15, 15.13s/it]

 Loss:  5.208426386117935


Epoch: 100%|██████████| 3/3 [00:45<00:00, 15.18s/it]

 Loss:  4.647223204374313





P:  0.7647058823529411 R:  1.0 F1:  0.8666666666666666


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [00:15<00:30, 15.14s/it]

 Loss:  6.605508804321289


Epoch:  67%|██████▋   | 2/3 [00:30<00:15, 15.16s/it]

 Loss:  5.424450248479843


Epoch: 100%|██████████| 3/3 [00:45<00:00, 15.19s/it]

 Loss:  5.16482999920845





P:  0.7428571428571429 R:  1.0 F1:  0.8524590163934427


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [00:15<00:30, 15.11s/it]

 Loss:  6.3934484124183655


Epoch:  67%|██████▋   | 2/3 [00:30<00:15, 15.15s/it]

 Loss:  5.177882730960846


Epoch: 100%|██████████| 3/3 [00:45<00:00, 15.18s/it]

 Loss:  4.982953429222107





P:  0.7352941176470589 R:  0.9615384615384616 F1:  0.8333333333333333


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:  33%|███▎      | 1/3 [00:15<00:30, 15.07s/it]

 Loss:  6.7679100930690765


Epoch:  67%|██████▋   | 2/3 [00:30<00:15, 15.11s/it]

 Loss:  5.740025669336319


Epoch: 100%|██████████| 3/3 [00:45<00:00, 15.17s/it]

 Loss:  5.624357432126999





P:  0.7428571428571429 R:  1.0 F1:  0.8524590163934427
P:  0.7682547482547483 R:  0.9653846153846153 F1:  0.8495608181834833


In [None]:
MAX_LEN = 150

tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-large-cased-v1.1")
model = "dmis-lab/biobert-large-cased-v1.1"
data_dir = "/content/re_from_bioBert/GAD"
res = get_cv_scores(model, tokenizer, data_dir, 2e-5, 3, 16)
p, r, f = res
print('P: ', p, 'R: ', r, 'F1: ', f)

Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  175.4302276968956


Epoch:  67%|██████▋   | 2/3 [14:00<06:57, 417.47s/it]

 Loss:  125.84052204340696


Epoch: 100%|██████████| 3/3 [21:09<00:00, 423.03s/it]

 Loss:  85.411407770589





P:  0.779874213836478 R:  0.8825622775800712 F1:  0.8280467445742905


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  208.3087173998356


Epoch:  67%|██████▋   | 2/3 [14:12<07:05, 425.90s/it]

 Loss:  210.94876408576965


Epoch: 100%|██████████| 3/3 [21:18<00:00, 426.09s/it]

 Loss:  209.1937507390976





P:  0.525328330206379 R:  1.0 F1:  0.6888068880688807


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  174.37870970368385


Epoch:  67%|██████▋   | 2/3 [14:17<07:09, 429.00s/it]

 Loss:  123.92327208817005


Epoch: 100%|██████████| 3/3 [21:26<00:00, 428.76s/it]

 Loss:  87.3146374411881





P:  0.8129251700680272 R:  0.8535714285714285 F1:  0.8327526132404182


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  177.94569772481918


Epoch:  67%|██████▋   | 2/3 [14:17<07:08, 428.93s/it]

 Loss:  125.66959860175848


Epoch: 100%|██████████| 3/3 [21:25<00:00, 428.64s/it]

 Loss:  93.81749746203423





P:  0.7669616519174042 R:  0.9285714285714286 F1:  0.8400646203554121


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  173.8275181055069


Epoch:  67%|██████▋   | 2/3 [14:16<07:08, 428.46s/it]

 Loss:  132.6164667159319


Epoch: 100%|██████████| 3/3 [21:25<00:00, 428.40s/it]

 Loss:  98.55549430474639





P:  0.775974025974026 R:  0.8535714285714285 F1:  0.8129251700680272


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  177.77910020947456


Epoch:  67%|██████▋   | 2/3 [14:17<07:08, 428.81s/it]

 Loss:  125.97149246186018


Epoch: 100%|██████████| 3/3 [21:23<00:00, 427.94s/it]

 Loss:  86.00288889929652





P:  0.7424242424242424 R:  0.875 F1:  0.8032786885245902


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  174.57998616993427


Epoch:  67%|██████▋   | 2/3 [14:09<07:04, 424.35s/it]

 Loss:  128.6928530484438


Epoch: 100%|██████████| 3/3 [21:14<00:00, 424.93s/it]

 Loss:  89.32223909161985





P:  0.7571428571428571 R:  0.7571428571428571 F1:  0.7571428571428571


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  211.75305378437042


Epoch:  67%|██████▋   | 2/3 [14:06<07:03, 423.08s/it]

 Loss:  211.26532024145126


Epoch: 100%|██████████| 3/3 [21:10<00:00, 423.63s/it]

 Loss:  210.27541077136993





P:  0.525328330206379 R:  1.0 F1:  0.6888068880688807


In [None]:
MAX_LEN = 150

tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-large-cased-v1.1")
model = "dmis-lab/biobert-large-cased-v1.1"
data_dir = "/content/re_from_bioBert/GAD"
res = get_cv_scores(model, tokenizer, data_dir, 2e-5, 3, 16, 8)
p, r, f = res
print('P: ', p, 'R: ', r, 'F1: ', f)

Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  185.6430408656597


Epoch:  67%|██████▋   | 2/3 [07:57<03:58, 238.75s/it]

 Loss:  136.77046521008015


Epoch: 100%|██████████| 3/3 [11:56<00:00, 238.76s/it]

 Loss:  100.39570607244968





P:  0.7390029325513197 R:  0.9 F1:  0.8115942028985507


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  180.88529244065285


Epoch:  67%|██████▋   | 2/3 [07:57<03:58, 238.90s/it]

 Loss:  130.93174132704735


Epoch: 100%|██████████| 3/3 [11:56<00:00, 238.88s/it]

 Loss:  88.90952000766993





P:  0.7373134328358208 R:  0.8821428571428571 F1:  0.8032520325203252
P:  0.7381581826935703 R:  0.8910714285714285 F1:  0.807423117709438


In [None]:
MAX_LEN = 260

tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-large-cased-v1.1")
model = "dmis-lab/biobert-large-cased-v1.1"
data_dir = "/content/re_from_bioBert/euadr"
res = get_cv_scores(model, tokenizer, data_dir, 3e-5, 3, 8)
p, r, f = res
print('P: ', p, 'R: ', r, 'F1: ', f)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=289.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466736.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1461743275.0, style=ProgressStyle(descr…




Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  22.661943286657333


Epoch:  67%|██████▋   | 2/3 [01:01<00:30, 30.55s/it]

 Loss:  19.194591268897057


Epoch: 100%|██████████| 3/3 [01:31<00:00, 30.50s/it]

 Loss:  14.200837433338165





P:  0.7575757575757576 R:  0.9259259259259259 F1:  0.8333333333333334


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  23.979815378785133


Epoch:  67%|██████▋   | 2/3 [01:00<00:30, 30.43s/it]

 Loss:  23.689436942338943


Epoch: 100%|██████████| 3/3 [01:31<00:00, 30.43s/it]

 Loss:  23.185030221939087





P:  0.7297297297297297 R:  1.0 F1:  0.8437499999999999


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  25.62017734348774


Epoch:  67%|██████▋   | 2/3 [01:01<00:30, 30.57s/it]

 Loss:  23.854162961244583


Epoch: 100%|██████████| 3/3 [01:31<00:00, 30.58s/it]

 Loss:  23.97520685195923





P:  0.7222222222222222 R:  1.0 F1:  0.8387096774193548


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  24.231747210025787


Epoch:  67%|██████▋   | 2/3 [01:01<00:30, 30.61s/it]

 Loss:  24.115555956959724


Epoch: 100%|██████████| 3/3 [01:31<00:00, 30.63s/it]

 Loss:  23.42150318622589





P:  0.7428571428571429 R:  1.0 F1:  0.8524590163934427


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  25.091708719730377


Epoch:  67%|██████▋   | 2/3 [01:01<00:30, 30.66s/it]

 Loss:  24.994070649147034


Epoch: 100%|██████████| 3/3 [01:31<00:00, 30.63s/it]

 Loss:  23.77052417397499





P:  0.7428571428571429 R:  1.0 F1:  0.8524590163934427


In [None]:
MAX_LEN = 260

tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-large-cased-v1.1")
model = "dmis-lab/biobert-large-cased-v1.1"
data_dir = "/content/re_from_bioBert/euadr"
res = get_cv_scores(model, tokenizer, data_dir, 3e-5, 3, 8, 5)
p, r, f = res
print('P: ', p, 'R: ', r, 'F1: ', f)

Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  25.82494419813156


Epoch:  67%|██████▋   | 2/3 [01:01<00:30, 30.71s/it]

 Loss:  23.961319237947464


Epoch: 100%|██████████| 3/3 [01:31<00:00, 30.65s/it]

 Loss:  23.39994180202484





P:  0.7428571428571429 R:  1.0 F1:  0.8524590163934427


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  24.176503002643585


Epoch:  67%|██████▋   | 2/3 [01:01<00:30, 30.62s/it]

 Loss:  20.26347441971302


Epoch: 100%|██████████| 3/3 [01:31<00:00, 30.62s/it]

 Loss:  15.410637490451336





P:  0.7692307692307693 R:  0.7692307692307693 F1:  0.7692307692307693


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  23.701788783073425


Epoch:  67%|██████▋   | 2/3 [01:01<00:30, 30.60s/it]

 Loss:  23.757530316710472


Epoch: 100%|██████████| 3/3 [01:31<00:00, 30.59s/it]

 Loss:  24.591555759310722





P:  0.7428571428571429 R:  1.0 F1:  0.8524590163934427


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  24.24306657910347


Epoch:  67%|██████▋   | 2/3 [01:01<00:30, 30.72s/it]

 Loss:  23.78836965560913


Epoch: 100%|██████████| 3/3 [01:31<00:00, 30.66s/it]

 Loss:  23.39257577061653





P:  0.7428571428571429 R:  1.0 F1:  0.8524590163934427


Some weights of the model checkpoint at dmis-lab/biobert-large-cased-v1.1 were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification w

 Loss:  24.404844641685486


Epoch:  67%|██████▋   | 2/3 [01:01<00:30, 30.59s/it]

 Loss:  20.78427827358246


Epoch: 100%|██████████| 3/3 [01:31<00:00, 30.58s/it]

 Loss:  17.266600236296654





P:  0.7428571428571429 R:  1.0 F1:  0.8524590163934427
P:  0.7481318681318682 R:  0.9538461538461538 F1:  0.835813366960908
