In [1]:

!pip install -q transformers



reference : 

https://colab.research.google.com/drive/19loLGUDjxGKy4ulZJ1m3hALq2ozNyEGe#scrollTo=oJFsRo_vGDYU

https://www.kaggle.com/piantic/pytorch-tpu

In [2]:
# for TPU
#!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
#!python pytorch-xla-env-setup.py --apt-packages libomp5 libopenblas-dev

In [3]:
import os 
import sys

import math
import random
import time
import warnings

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import transformers as T
from sklearn.metrics import fbeta_score
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import DataLoader, Dataset
from tqdm.notebook import tqdm

import seaborn as sns

from imblearn.under_sampling import RandomUnderSampler


In [4]:
warnings.filterwarnings("ignore")

In [5]:
'''# imports the torch_xla package
import torch_xla
import torch_xla.core.xla_model as xm

device = xm.xla_device()
torch.set_default_tensor_type('torch.FloatTensor')


print(device)'''
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
DATA_DIR = "../input/signate-spws-2/"
OUTPUT_DIR = "./"

#../input/signate-spws-2/train.csv

In [7]:
DEBUG = False

if DEBUG:
  train = pd.read_csv(DATA_DIR + "train.csv").sample(20).reset_index(drop=True)
  test = pd.read_csv(DATA_DIR + "test.csv").sample(20).reset_index(drop=True)
  sub = pd.read_csv(DATA_DIR + "sample_submit.csv", header=None).sample(20).reset_index(drop=True)
  sub.columns = ["id", "judgement"]
else:
  train = pd.read_csv(DATA_DIR + "train.csv")
  test = pd.read_csv(DATA_DIR + "test.csv")
  sub = pd.read_csv(DATA_DIR + "sample_submit.csv", header=None)
  sub.columns = ["id", "judgement"]

In [8]:
train['text'] = train['title']+ " " + train["abstract"].fillna('NaN')
train['text_len'] = train['text'].apply(lambda x: len(x.split(' ')))

In [9]:
#データの訂正
train.loc[train['id'] == 2488, 'judgement'] = 0
train.loc[train['id'] == 7708, 'judgement'] = 0

In [10]:
def get_train_data(train):
    
    # 交差検証 用の番号を振ります。
    Fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=819)
    for n, (train_index, val_index) in enumerate(Fold.split(train, train["judgement"])):
        train.loc[val_index, "fold"] = int(n)
    train["fold"] = train["fold"].astype(np.uint8)

    return train

In [11]:
train = get_train_data(train)

In [12]:
class config:
  if DEBUG:
    border = len(train[train["judgement"] == 1]) / len(train["judgement"])
    seed = 89
    NUM_SPLITS = 5
    MAX_LEN = 400
    #MODEL_NAME = "bert-base-uncased"
    MODEL_NAME = "allenai/scibert_scivocab_uncased"
    TRAIN_BATCH_SIZE = 16
    VALID_BATCH_SIZE = 16
    
    EPOCH = 3

  else:
    FILENAME = 'BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext'

    #border = len(train[train["judgement"] == 1]) / len(train["judgement"])
    border = 0.5
    seed = 89
    NUM_SPLITS = 5
    MAX_LEN = 400
    #MODEL_NAME = "bert-base-uncased"
    #MODEL_NAME = "allenai/scibert_scivocab_uncased"
    #MODEL_NAME = "RoBERTa-base-PM" #https://github.com/facebookresearch/bio-lm 上から#×
    #MODEL_NAME = "RoBERTa-base-PM-Voc"#×
    MODEL_NAME = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext"
    TRAIN_BATCH_SIZE = 16
    VALID_BATCH_SIZE = 16
    
    EPOCH = 3
    PATIENCE = 5

In [13]:
def init_logger(log_file=OUTPUT_DIR + f"{config.FILENAME}_train.log"):
    from logging import INFO, FileHandler, Formatter, StreamHandler, getLogger

    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()

In [14]:
def seed_torch(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

seed = config.seed
seed_torch(seed)

In [15]:
from scipy.optimize import minimize, minimize_scalar
def optimize_threshold(y_true, y_pred):
    """fbeta score計算時のthresholdを最適化"""
    def opt_(x): 
        return -fbeta_score(y_true, y_pred >= x, beta=7)
    #result = minimize(opt_, x0=np.array([0.5]), method='Nelder-Mead')
    result = minimize_scalar(opt_, bounds=(0, 0.5), method='bounded') 

    best_threshold = result['x'].item()
    return best_threshold


In [16]:
class BaseDataset(Dataset):
    def __init__(self, df, model_name, include_labels=True):
        tokenizer = T.BertTokenizer.from_pretrained(model_name)

        self.df = df
        self.include_labels = include_labels

        #self.title = df["title"].tolist()
        df["text"] = df["title"]+" "+df["abstract"].fillna('NaN')
        #self.title = df["text"].tolist()
        self.text = df["text"].tolist()

        self.encoded = tokenizer.batch_encode_plus(
            self.text,
            padding = 'max_length',            
            max_length = config.MAX_LEN,
            truncation = True,
            return_attention_mask=True
        )
        
        if self.include_labels:
            self.labels = df["judgement"].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        input_ids = torch.tensor(self.encoded['input_ids'][idx])
        attention_mask = torch.tensor(self.encoded['attention_mask'][idx])

        if self.include_labels:
            label = torch.tensor(self.labels[idx]).float()
            return input_ids, attention_mask, label

        return input_ids, attention_mask

In [17]:
class BaseModel(nn.Module):
    def __init__(self, model_name):
        super().__init__()

        self.model = T.BertForSequenceClassification.from_pretrained(model_name, num_labels=1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_ids, attention_mask):
        out = self.model(input_ids=input_ids, attention_mask=attention_mask)
        out = self.sigmoid(out.logits).squeeze()

        return out

In [18]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return "%dm %ds" % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return "%s (remain %s)" % (asMinutes(s), asMinutes(rs))

In [19]:
def train_fn(train_loader, model, criterion, optimizer, epoch, device):
    start = end = time.time()
    losses = AverageMeter()

    # switch to train mode
    model.train()

    for step, (input_ids, attention_mask, labels) in enumerate(train_loader):
        optimizer.zero_grad()

        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)

        y_preds = model(input_ids, attention_mask)

        loss = criterion(y_preds, labels)

        # record loss
        losses.update(loss.item(), batch_size)
        loss.backward()

        optimizer.step()
        #xm.optimizer_step(optimizer, barrier=True)

        if step % 100 == 0 or step == (len(train_loader) - 1):
            print(
                f"Epoch: [{epoch + 1}][{step}/{len(train_loader)}] "
                f"Elapsed {timeSince(start, float(step + 1) / len(train_loader)):s} "
                f"Loss: {losses.avg:.4f} "
            )

    return losses.avg

In [20]:
def valid_fn(valid_loader, model, criterion, device):
    start = end = time.time()
    losses = AverageMeter()

    # switch to evaluation mode
    model.eval()
    preds = []

    for step, (input_ids, attention_mask, labels) in enumerate(valid_loader):
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)

        # compute loss
        with torch.no_grad():
            y_preds = model(input_ids, attention_mask)

        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)

        # record score
        preds.append(y_preds.to("cpu").numpy())

        if step % 100 == 0 or step == (len(valid_loader) - 1):
            print(
                f"EVAL: [{step}/{len(valid_loader)}] "
                f"Elapsed {timeSince(start, float(step + 1) / len(valid_loader)):s} "
                f"Loss: {losses.avg:.4f} "
            )

    predictions = np.concatenate(preds)
    return losses.avg, predictions


In [21]:
def inference():
    predictions = []

    test_dataset = BaseDataset(test, config.MODEL_NAME, include_labels=False)
    test_loader = DataLoader(
        test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True
    )

    for fold in range(5):
        LOGGER.info(f"========== model: bert-base-uncased fold: {fold} inference ==========")
        model = BaseModel(config.MODEL_NAME)
        model.to(device)
        model.load_state_dict(torch.load(OUTPUT_DIR + f"{config.FILENAME}_fold{fold}_best.pth")["model"])
        model.eval()
        preds = []
        for i, (input_ids, attention_mask) in tqdm(enumerate(test_loader), total=len(test_loader)):
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            with torch.no_grad():
                y_preds = model(input_ids, attention_mask)
            preds.append(y_preds.to("cpu").numpy())
        preds = np.concatenate(preds)
        predictions.append(preds)
    predictions = np.mean(predictions, axis=0)

    return predictions

In [22]:
def train_loop(train, fold):

    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # Data Loader
    # ====================================================
    trn_idx = train[train["fold"] != fold].index
    val_idx = train[train["fold"] == fold].index

    train_folds = train.loc[trn_idx].reset_index(drop=True)
    valid_folds = train.loc[val_idx].reset_index(drop=True)
    
    rus = RandomUnderSampler(sampling_strategy=1.0, random_state=seed)
    text, jdg = rus.fit_resample(train_folds.drop('judgement', axis=1), train_folds['judgement'])
    train_folds = pd.concat([text, jdg], axis=1)
    #del text, jdg
    #gc.collect()

    train_dataset = BaseDataset(train_folds, config.MODEL_NAME)
    valid_dataset = BaseDataset(valid_folds, config.MODEL_NAME)

    train_loader = DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        shuffle=True,
        num_workers=4,
        pin_memory=True,
        drop_last=True,
    )
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=config.VALID_BATCH_SIZE,
        shuffle=False,
        num_workers=4,
        pin_memory=True,
        drop_last=False,
    )

    # ====================================================
    # Model
    # ====================================================
    model = BaseModel(config.MODEL_NAME)
    model.to(device)

    optimizer = T.AdamW(model.parameters(), lr=2e-5)

    criterion = nn.BCELoss()

    # ====================================================
    # Loop
    # ====================================================
    best_score = -1
    best_loss = np.inf
    
    # Early stopping
    #earlystopping = EarlyStopping(patience=config.Patience, verbose=True)

    for epoch in range(config.EPOCH):
        start_time = time.time()
        
        # train
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, device)

        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        valid_labels = valid_folds["judgement"].values

        # scoring
        score = fbeta_score(valid_labels, np.where(preds < config.border, 0, 1), beta=7.0)

        elapsed = time.time() - start_time
        LOGGER.info(
            f"Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s"
        )
        LOGGER.info(f"Epoch {epoch+1} - Score: {score}")

        if score > best_score:
            best_score = score
            LOGGER.info(f"Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model")
            torch.save(
                {"model": model.state_dict(), "preds": preds}, OUTPUT_DIR + f"{config.FILENAME}_fold{fold}_best.pth"
            )
        
        #earlystopping(avg_val_loss, model)
        #if earlystopping.early_stop: #ストップフラグがTrueの場合、breakでforループを抜ける
        #    #print("Early Stopping!")
        #    LOGGER.info(f"Epoch {epoch+1} - Early Stopping!")
        #    break

    check_point = torch.load(OUTPUT_DIR + f"{config.FILENAME}_fold{fold}_best.pth")

    valid_folds["preds"] = check_point["preds"]

    return valid_folds

In [23]:
def get_result(result_df):
    preds = result_df["preds"].values
    labels = result_df["judgement"].values
    
    border = border = len(result_df[result_df["judgement"] == 1]) / len(result_df["judgement"])
    score = fbeta_score(labels, np.where(preds < border, 0, 1), beta=7.0)
    LOGGER.info(f"Score: {score:<.5f}")

    best_threshold = optimize_threshold(labels, preds)
    LOGGER.info(f"Best threshold : {best_threshold:<.5f}")
    score = fbeta_score(labels, np.where(preds < best_threshold, 0, 1), beta=7.0)
    LOGGER.info(f"After optimizing score: {score:<.5f}")

    return best_threshold

In [24]:
def main():
    # Training
    oof_df = pd.DataFrame()
    threshold = []
    for fold in range(5):
        _oof_df = train_loop(train, fold)
        oof_df = pd.concat([oof_df, _oof_df])
        LOGGER.info(f"========== fold: {fold} result ==========")
        best_threshold = get_result(_oof_df)
        threshold.append(best_threshold)
        
    # CV result
    LOGGER.info(f"========== CV ==========")
    get_result(oof_df)
    
    # Save OOF result
    oof_df.to_csv(OUTPUT_DIR + f"{config.FILENAME}_oof_df.csv", index=False)

    # Inference
    proba_predictions = inference()
    predictions = np.where(proba_predictions < config.border, 0, 1)

    # submission
    sub["judgement"] = predictions
    sub.to_csv(OUTPUT_DIR + f"./sub_{config.FILENAME}.csv", index=False, header=False)

    # mean threshold
    predictions = np.where(proba_predictions < np.mean(threshold), 0, 1)

    sub["judgement"] = predictions
    sub.to_csv(OUTPUT_DIR + f"./sub_mean_thr_{config.FILENAME}.csv", index=False, header=False)

    #stack = pd.read_csv(DATA_DIR + "sample_submit.csv", header=None)
    sub["judgement"] = proba_predictions
    sub.to_csv(OUTPUT_DIR + f"./stack_{config.FILENAME}.csv", index=False, header=False)

In [25]:
if __name__ == "__main__":
  main()



Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/337 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

Epoch: [1][0/63] Elapsed 0m 2s (remain 2m 4s) Loss: 0.6715 
Epoch: [1][62/63] Elapsed 0m 43s (remain 0m 0s) Loss: 0.5226 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 12s) Loss: 0.5712 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.4361 
EVAL: [200/340] Elapsed 0m 44s (remain 0m 30s) Loss: 0.4193 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.4163 


Epoch 1 - avg_train_loss: 0.5226  avg_val_loss: 0.4163  time: 118s
Epoch 1 - Score: 0.8198980294887694
Epoch 1 - Save Best Score: 0.8199 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.4163 
Epoch: [2][0/63] Elapsed 0m 0s (remain 0m 50s) Loss: 0.2796 
Epoch: [2][62/63] Elapsed 0m 42s (remain 0m 0s) Loss: 0.2113 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 4s) Loss: 0.2956 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.1421 
EVAL: [200/340] Elapsed 0m 44s (remain 0m 30s) Loss: 0.1327 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.1329 


Epoch 2 - avg_train_loss: 0.2113  avg_val_loss: 0.1356  time: 117s
Epoch 2 - Score: 0.8303859757035215
Epoch 2 - Save Best Score: 0.8304 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.1356 
Epoch: [3][0/63] Elapsed 0m 0s (remain 0m 52s) Loss: 0.1054 
Epoch: [3][62/63] Elapsed 0m 42s (remain 0m 0s) Loss: 0.0979 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 7s) Loss: 0.7472 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.2502 
EVAL: [200/340] Elapsed 0m 44s (remain 0m 30s) Loss: 0.2372 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.2365 


Epoch 3 - avg_train_loss: 0.0979  avg_val_loss: 0.2384  time: 117s
Epoch 3 - Score: 0.8935219657483245
Epoch 3 - Save Best Score: 0.8935 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.2384 


Score: 0.78917
Best threshold : 0.49343
After optimizing score: 0.89326
Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassificatio

Epoch: [1][0/63] Elapsed 0m 0s (remain 0m 53s) Loss: 0.7324 
Epoch: [1][62/63] Elapsed 0m 42s (remain 0m 0s) Loss: 0.5128 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 23s) Loss: 0.2723 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.3281 
EVAL: [200/340] Elapsed 0m 44s (remain 0m 30s) Loss: 0.3164 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.3148 


Epoch 1 - avg_train_loss: 0.5128  avg_val_loss: 0.3174  time: 117s
Epoch 1 - Score: 0.8246836342954642
Epoch 1 - Save Best Score: 0.8247 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.3174 
Epoch: [2][0/63] Elapsed 0m 0s (remain 0m 52s) Loss: 0.3087 
Epoch: [2][62/63] Elapsed 0m 42s (remain 0m 0s) Loss: 0.2613 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 23s) Loss: 0.3333 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.4491 
EVAL: [200/340] Elapsed 0m 44s (remain 0m 30s) Loss: 0.4401 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.4402 


Epoch 2 - avg_train_loss: 0.2613  avg_val_loss: 0.4420  time: 117s
Epoch 2 - Score: 0.861597374179431
Epoch 2 - Save Best Score: 0.8616 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.4420 
Epoch: [3][0/63] Elapsed 0m 0s (remain 0m 53s) Loss: 0.2425 
Epoch: [3][62/63] Elapsed 0m 42s (remain 0m 0s) Loss: 0.1472 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 11s) Loss: 0.2964 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.2492 
EVAL: [200/340] Elapsed 0m 44s (remain 0m 30s) Loss: 0.2341 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.2426 


Epoch 3 - avg_train_loss: 0.1472  avg_val_loss: 0.2447  time: 117s
Epoch 3 - Score: 0.9044117647058824
Epoch 3 - Save Best Score: 0.9044 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.2447 


Score: 0.71219
Best threshold : 0.49343
After optimizing score: 0.90415
Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassificatio

Epoch: [1][0/63] Elapsed 0m 0s (remain 0m 54s) Loss: 0.6640 
Epoch: [1][62/63] Elapsed 0m 42s (remain 0m 0s) Loss: 0.4994 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 16s) Loss: 0.3594 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.4038 
EVAL: [200/340] Elapsed 0m 43s (remain 0m 30s) Loss: 0.4168 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.4171 


Epoch 1 - avg_train_loss: 0.4994  avg_val_loss: 0.4186  time: 117s
Epoch 1 - Score: 0.8211304228821678
Epoch 1 - Save Best Score: 0.8211 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.4186 
Epoch: [2][0/63] Elapsed 0m 0s (remain 0m 52s) Loss: 0.1951 
Epoch: [2][62/63] Elapsed 0m 42s (remain 0m 0s) Loss: 0.2030 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 21s) Loss: 0.2185 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.1901 
EVAL: [200/340] Elapsed 0m 44s (remain 0m 30s) Loss: 0.2007 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.1981 


Epoch 2 - avg_train_loss: 0.2030  avg_val_loss: 0.1971  time: 117s
Epoch 2 - Score: 0.8688548938066241
Epoch 2 - Save Best Score: 0.8689 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.1971 
Epoch: [3][0/63] Elapsed 0m 0s (remain 0m 52s) Loss: 0.1818 
Epoch: [3][62/63] Elapsed 0m 42s (remain 0m 0s) Loss: 0.0873 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 15s) Loss: 0.4328 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.3517 
EVAL: [200/340] Elapsed 0m 44s (remain 0m 30s) Loss: 0.3643 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.3650 


Epoch 3 - avg_train_loss: 0.0873  avg_val_loss: 0.3672  time: 117s
Epoch 3 - Score: 0.8888888888888888
Epoch 3 - Save Best Score: 0.8889 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.3672 


Score: 0.75233
Best threshold : 0.41977
After optimizing score: 0.88816
Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassificatio

Epoch: [1][0/63] Elapsed 0m 0s (remain 0m 54s) Loss: 0.7223 
Epoch: [1][62/63] Elapsed 0m 42s (remain 0m 0s) Loss: 0.5020 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 17s) Loss: 0.1268 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.2286 
EVAL: [200/340] Elapsed 0m 44s (remain 0m 30s) Loss: 0.2359 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.2359 


Epoch 1 - avg_train_loss: 0.5020  avg_val_loss: 0.2362  time: 117s
Epoch 1 - Score: 0.7979120059656971
Epoch 1 - Save Best Score: 0.7979 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.2362 
Epoch: [2][0/63] Elapsed 0m 0s (remain 0m 54s) Loss: 0.2642 
Epoch: [2][62/63] Elapsed 0m 42s (remain 0m 0s) Loss: 0.1906 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 15s) Loss: 0.1443 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.3573 
EVAL: [200/340] Elapsed 0m 44s (remain 0m 30s) Loss: 0.3834 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.3854 


Epoch 2 - avg_train_loss: 0.1906  avg_val_loss: 0.3859  time: 117s
Epoch 2 - Score: 0.8693837807761858
Epoch 2 - Save Best Score: 0.8694 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.3859 
Epoch: [3][0/63] Elapsed 0m 0s (remain 0m 57s) Loss: 0.0722 
Epoch: [3][62/63] Elapsed 0m 42s (remain 0m 0s) Loss: 0.0955 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 16s) Loss: 0.0167 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.1457 
EVAL: [200/340] Elapsed 0m 44s (remain 0m 30s) Loss: 0.1550 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.1520 


Epoch 3 - avg_train_loss: 0.0955  avg_val_loss: 0.1532  time: 117s
Epoch 3 - Score: 0.8991990327943178
Epoch 3 - Save Best Score: 0.8992 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.1532 


Score: 0.83601
Best threshold : 0.45492
After optimizing score: 0.91169
Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassificatio

Epoch: [1][0/63] Elapsed 0m 0s (remain 0m 52s) Loss: 0.6777 
Epoch: [1][62/63] Elapsed 0m 42s (remain 0m 0s) Loss: 0.4678 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 17s) Loss: 0.0521 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.1739 
EVAL: [200/340] Elapsed 0m 44s (remain 0m 30s) Loss: 0.1764 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.1795 


Epoch 1 - avg_train_loss: 0.4678  avg_val_loss: 0.1770  time: 117s
Epoch 1 - Score: 0.7440024293956878
Epoch 1 - Save Best Score: 0.7440 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.1770 
Epoch: [2][0/63] Elapsed 0m 0s (remain 0m 51s) Loss: 0.3660 
Epoch: [2][62/63] Elapsed 0m 42s (remain 0m 0s) Loss: 0.2044 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 26s) Loss: 0.1720 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.4397 
EVAL: [200/340] Elapsed 0m 44s (remain 0m 30s) Loss: 0.4279 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.4365 


Epoch 2 - avg_train_loss: 0.2044  avg_val_loss: 0.4357  time: 117s
Epoch 2 - Score: 0.8305875892366832
Epoch 2 - Save Best Score: 0.8306 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.4357 
Epoch: [3][0/63] Elapsed 0m 0s (remain 0m 52s) Loss: 0.0614 
Epoch: [3][62/63] Elapsed 0m 42s (remain 0m 0s) Loss: 0.1033 
EVAL: [0/340] Elapsed 0m 0s (remain 2m 25s) Loss: 0.0078 
EVAL: [100/340] Elapsed 0m 22s (remain 0m 52s) Loss: 0.1077 
EVAL: [200/340] Elapsed 0m 44s (remain 0m 30s) Loss: 0.1145 
EVAL: [300/340] Elapsed 1m 5s (remain 0m 8s) Loss: 0.1173 


Epoch 3 - avg_train_loss: 0.1033  avg_val_loss: 0.1185  time: 117s
Epoch 3 - Score: 0.8389778325123154
Epoch 3 - Save Best Score: 0.8390 Model


EVAL: [339/340] Elapsed 1m 14s (remain 0m 0s) Loss: 0.1185 


Score: 0.85822
Best threshold : 0.19099
After optimizing score: 0.86727
Score: 0.78553
Best threshold : 0.47087
After optimizing score: 0.88777
Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you exp

  0%|          | 0/2553 [00:00<?, ?it/s]

Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

  0%|          | 0/2553 [00:00<?, ?it/s]

Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

  0%|          | 0/2553 [00:00<?, ?it/s]

Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

  0%|          | 0/2553 [00:00<?, ?it/s]

Some weights of the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Ber

  0%|          | 0/2553 [00:00<?, ?it/s]