## Exp-006 (BioBERT)

lueBERT-Large, Uncased, PubMed:




In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!nvidia-smi

Wed Aug 18 00:43:25 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    37W / 300W |  16145MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install transformers pycld2



In [None]:
input_dir = "/content/drive/MyDrive/07_Competition/signate-471/data/"
output_dir = "/content/drive/MyDrive/07_Competition/signate-471/log/"
submission_dir = "/content/drive/MyDrive/07_Competition/signate-471/submission/"
model_dir = "/content/drive/MyDrive/07_Competition/signate-471/model_bin/"
pred_dir = "/content/drive/MyDrive/07_Competition/signate-471/pred/"

In [None]:
import os
import math
import random
import pandas as pd
import numpy as np
from glob import glob
import gc
gc.enable()

import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim as optim
from torch.optim.optimizer import Optimizer
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import Dataset, DataLoader, SequentialSampler, RandomSampler

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import fbeta_score

from transformers import BertConfig, RobertaConfig
from transformers import (get_cosine_schedule_with_warmup, get_cosine_with_hard_restarts_schedule_with_warmup)
from transformers import BertTokenizer, RobertaTokenizer
from transformers import BertModel, RobertaModel
from transformers import AutoConfig, BertConfig, RobertaConfig
from transformers import BertForSequenceClassification, RobertaForSequenceClassification
from torch import cuda
import time

from transformers import AdamW
from transformers import AutoTokenizer
from transformers import AutoModel, AutoModelForSequenceClassification
from transformers import MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
from transformers import get_linear_schedule_with_warmup

from IPython.display import clear_output
from tqdm import tqdm, trange

import re
import nltk
import pycld2 as cld2
from scipy.optimize import minimize, minimize_scalar
import regex
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
class CFG:
  exp = "exp06"
  seed = 71
  fold = 5
  max_len = 280
  epochs = 1
  lr = 2e-5
  train_batch_size = 16
  valid_batch_size = 32
  model_name = "dmis-lab/biobert-v1.1"

CONFIG = CFG()

In [None]:
os.makedirs(model_dir+CONFIG.exp+"/", exist_ok=True)
os.makedirs(pred_dir+CONFIG.exp+"/", exist_ok=True)
os.makedirs(output_dir+CONFIG.exp+"/", exist_ok=True)

In [None]:
def set_random_seed(random_seed):
    random.seed(random_seed)
    np.random.seed(random_seed)
    os.environ["PYTHONHASHSEED"] = str(random_seed)

    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)

    torch.backends.cudnn.deterministic = True

set_random_seed(CONFIG.seed)

In [None]:
DEVICE = torch.device('cuda') if cuda.is_available() else 'cpu'

In [None]:
def init_logger(log_file=output_dir + CONFIG.exp+ f"/{CONFIG.exp}_train.log"):
    from logging import INFO, FileHandler, Formatter, StreamHandler, getLogger

    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()

In [None]:
def get_train_data(train):
    # 交差検証 用の番号を振ります。
    Fold = StratifiedKFold(n_splits=CONFIG.fold, shuffle=True, random_state=CONFIG.seed)
    for n, (train_index, val_index) in enumerate(Fold.split(train, train["judgement"])):
        train.loc[val_index, "fold"] = int(n)
    train["fold"] = train["fold"].astype(np.uint8)

    return train

def get_test_data(test):
    return test

In [None]:
class SRWSDataset(Dataset):
  def __init__(self, df, inference_only=False):

    # Berttokenizer
    tokenizer = BertTokenizer.from_pretrained(CONFIG.model_name)

    self.df = df
    self.inference_only = inference_only # "train":False or "test":True
    self.text = self.df["title_abst"].tolist() # text

    if not self.inference_only:
      # ここvalueだけ
      self.target = df["judgement"].values
      
    self.encoded = tokenizer.batch_encode_plus(
        self.text,
        padding = "max_length",
        max_length = CONFIG.max_len,
        truncation = True,
        return_attention_mask=True
    )

  def __len__(self):
    return len(self.df)

  def __getitem__(self, index):
    input_ids = torch.tensor(self.encoded["input_ids"][index])
    attention_mask = torch.tensor(self.encoded["attention_mask"][index])

    # returnをsetかdictで返すかは自由
    if self.inference_only:
      return (input_ids, attention_mask)

    else:
      # ここで、tensor に変更している
      target = torch.tensor(self.target[index]).float()
      return (input_ids, attention_mask, target)


In [None]:
class SRWSBertModel(nn.Module):
  def __init__(self):
    super().__init__()

    # config を設定することで、元の設定を変更できる？
    # https://www.kaggle.com/c/commonlitreadabilityprize/discussion/260729
    # 最終的にsigmoidに通すから、num_labelsは1でいい
    self.bert = BertForSequenceClassification.from_pretrained(CONFIG.model_name, num_labels=1)
    # この辺を調整することで、モデル内の最終層に追加することができる。
    # bertのoutputがclassificationなんで、そこを変更しないと
    self.sigmoid = nn.Sigmoid()

  def forward(self, input_ids, attention_mask):
    bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask) 
    bert_output = self.sigmoid(bert_output.logits).squeeze()

    return bert_output

In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return "%dm %ds" % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return "%s (remain %s)" % (asMinutes(s), asMinutes(rs))

In [None]:
# 学習
def train_fn(model, train_loader, optimizer, epoch, loss_function, scheduler=None):
  start = end = time.time()
  losses = AverageMeter()
  model.train()

  for batch_num, (input_ids, attention_mask, target) in enumerate(train_loader):
    optimizer.zero_grad()

    input_ids = input_ids.to(DEVICE)
    attention_mask = attention_mask.to(DEVICE)
    target = target.to(DEVICE)
    batch_size = target.size(0)

    pred = model(input_ids, attention_mask)

    # Loss算出
    loss = loss_function(pred, target)
    losses.update(loss.item(), batch_size)
    loss.backward()

    optimizer.step()

    if scheduler:
      scheduler.step()

    if batch_num % 100 == 0 or batch_num == (len(train_loader) -1):
      print(
            f"Epoch: [{epoch + 1}][{batch_num}/{len(train_loader)}] "
            f"Elapsed {timeSince(start, float(batch_num + 1) / len(train_loader)):s} "
            f"Loss: {losses.avg:.4f} "
            )
      
  return losses.avg

def valid_fn(valid_loader, model, loss_function):
  start = end = time.time()
  losses = AverageMeter()

  model.eval()
  preds = []

  for batch_num, (input_ids, attention_mask, target) in enumerate(valid_loader):
    input_ids = input_ids.to(DEVICE)
    attention_mask = attention_mask.to(DEVICE)
    target = target.to(DEVICE)
    batch_size = target.size(0)

    # compare loss
    with torch.no_grad():
      pred = model(input_ids, attention_mask)

    loss = loss_function(pred, target)
    losses.update(loss.item(), batch_size)

    # スコア追加
    preds.append(pred.to("cpu").numpy())

    if batch_num % 100 == 0 or batch_num == (len(valid_loader) - 1):
      print(
          f"EVAL: [{batch_num}/{len(valid_loader)}]"
          f"Elapsed {timeSince(start, float(batch_num+1) / len(valid_loader)):s}"
          f"Loss: {losses.avg:.4f}"
      )
  predictions = np.concatenate(preds)

  return losses.avg, predictions

# 予測
def inference():
    predictions = []

    test_dataset = SRWSDataset(test,  inference_only=True)
    test_loader = DataLoader(
        test_dataset, 
        batch_size=CONFIG.valid_batch_size, 
        shuffle=False, 
        num_workers=4, 
        pin_memory=True
    )

    for fold in range(CONFIG.fold):
        LOGGER.info(f"========== model: {CONFIG.model_name} fold: {fold} inference ==========")
        model = SRWSBertModel()
        model.to(DEVICE)
        model.load_state_dict(torch.load(model_dir +CONFIG.exp + "/"+ f"{CONFIG.model_name.split('/')[1]}_fold{fold}_best.pth")["model"])
        model.eval()
        preds = []
        for i, (input_ids, attention_mask) in tqdm(enumerate(test_loader), total=len(test_loader)):
            input_ids = input_ids.to(DEVICE)
            attention_mask = attention_mask.to(DEVICE)
            with torch.no_grad():
                y_preds = model(input_ids, attention_mask)
            preds.append(y_preds.to("cpu").numpy())
        preds = np.concatenate(preds)
        predictions.append(preds)
    predictions = np.mean(predictions, axis=0)

    return predictions

In [None]:
# 最適化（使ってない）
# https://signate.jp/competitions/471/discussions/tf-roberta-base-baseline-cv08949-lb08734

def opt_fbeta_threshold(y_true, y_pred):
  """fbeta score計算時のthresholdを最適化"""
  def opt_(x):
    return -fbeta_score(y_true, y_pred >= x, beta=7)
  result = minimize(opt_, x0=np.array([0.02]), method='Powell')
  best_threshold = result['x'].item()
  return best_threshold

In [None]:
# LOOP
def train_loop(train, fold):
  LOGGER.info(f"========== fold: {fold} training ==========")

  # ====================================================
  # Data Loader
  # ====================================================
  trn_idx = train[train["fold"] != fold].index
  val_idx = train[train["fold"] == fold].index

  train_folds = train.loc[trn_idx].reset_index(drop=True)
  valid_folds = train.loc[val_idx].reset_index(drop=True)

  train_dataset = SRWSDataset(train_folds)
  valid_dataset = SRWSDataset(valid_folds)

  train_loader = DataLoader(
        train_dataset,
        batch_size=CONFIG.train_batch_size,
        shuffle=True,
        num_workers=4,
        pin_memory=True, # https://qiita.com/sugulu_Ogawa_ISID/items/62f5f7adee083d96a587
        drop_last=True,
  )
  valid_loader = DataLoader(
        valid_dataset,
        batch_size=CONFIG.valid_batch_size,
        shuffle=False,
        num_workers=4,
        pin_memory=True,
        drop_last=False,
  )

  # ====================================================
  # Model
  # ====================================================
  model = SRWSBertModel()
  model.to(DEVICE)

  optimizer = AdamW(model.parameters(), lr=CONFIG.lr)

  # Loss_function
  loss_function = nn.BCELoss()

  # ====================================================
  # LOOP
  # ====================================================

  best_score = -1
  best_loss = np.inf
  best_borders=[]

  # 学習
  for epoch in range(CONFIG.epochs):
    start_time = time.time()

    # train
    avg_loss = train_fn(model, train_loader, optimizer, epoch, loss_function)

    # valid
    avg_val_loss, preds = valid_fn(valid_loader, model,loss_function)
    valid_labels = valid_folds["judgement"].values

    # border最適化
    border_m = opt_fbeta_threshold(valid_labels, preds)
    best_borders.append(border_m)

    # score
    score = fbeta_score(valid_labels, np.where(preds < border_m, 0, 1), beta=7.0)

    elapsed = time.time() - start_time
    LOGGER.info(
            f"Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s"
    )
    LOGGER.info(f"Epoch {epoch+1} - Score: {score}")

    if score > best_score:
      best_score = score
      LOGGER.info(f"Epoch {epoch+1} - Save Best Score: {best_score:.4f} ")
      torch.save(
                {"model": model.state_dict(), "preds": preds}, model_dir +CONFIG.exp + "/"+ f"{CONFIG.model_name.split('/')[1]}_fold{fold}_best.pth"
      ) # scibertでの変更
  check_point = torch.load(model_dir +CONFIG.exp + "/"+ f"{CONFIG.model_name.split('/')[1]}_fold{fold}_best.pth")

  valid_folds["preds"] = check_point["preds"]

  return valid_folds,best_borders

In [None]:
def get_result(result_df):
    preds = result_df["preds"].values
    labels = result_df["judgement"].values
    best_threshold = opt_fbeta_threshold(labels, preds)
    print("Best_Threshold：" + str(best_threshold))
    # 上実行すると、ValueError: Classification metrics can't handle a mix of continuous and binary targets
    score = fbeta_score(labels, np.where(preds < best_threshold, 0, 1), beta=7.0)
    print("Score：" + str(score))
    LOGGER.info(f"Score: {score:<.5f}")

# inference用に、best_thresholdを出力するようにする関数
def get_result_for_cv(result_df,best_border):
    preds = result_df["preds"].values
    labels = result_df["judgement"].values
    #best_threshold = opt_fbeta_threshold(labels, preds)
    print("Best_Threshold：" + str(best_border))
    # 上実行すると、ValueError: Classification metrics can't handle a mix of continuous and binary targets
    score = fbeta_score(labels, np.where(preds < best_border, 0, 1), beta=7.0)
    print("Score：" + str(score))
    LOGGER.info(f"Score: {score:<.5f}")

    return score

def mean_best_border(*best_borders):
    best_border = np.mean(best_borders)
    print("Best_Threshold：" + str(best_border))
    LOGGER.info(f"Best_Border: {best_border:<.8f}")

    return best_border

In [None]:
def clean_stopword(text):
  stopwords = nltk.corpus.stopwords.words('english')
  list_x = text.split()
  res = []
  for w in list_x:
    if w not in stopwords:
      res.append(w)
  return ' '.join(res)

def clean_puncts(x):
  # 化学式とかがあるから '-'は削除しないほうがいいか？

  puncts = [',', '.', '"', ':', ')', '(', '-', '!', '?', '|', ';', "'", '$', '&', '/', '[', ']', '>', '%', '=', '#', '*', '+', '\\', '•',  '~', '@', '£',
            '·', '_', '{', '}', '©', '^', '®', '`',  '<', '→', '°', '€', '™', '›',  '♥', '←', '×', '§', '″', '′', 'Â', '█', '½', 'à', '…',
            '“', '★', '”', '–', '●', 'â', '►', '−', '¢', '²', '¬', '░', '¶', '↑', '±', '¿', '▾', '═', '¦', '║', '―', '¥', '▓', '—', '‹', '─',
            '▒', '：', '¼', '⊕', '▼', '▪', '†', '■', '’', '▀', '¨', '▄', '♫', '☆', 'é', '¯', '♦', '¤', '▲', 'è', '¸', '¾', 'Ã', '⋅', '‘', '∞', '«',
            '∙', '）', '↓', '、', '│', '（', '»', '，', '♪', '╩', '╚', '³', '・', '╦', '╣', '╔', '╗', '▬', '❤', 'ï', 'Ø', '¹', '≤', '‡', '√', '（', '）', '～',
            '➡', '％', '⇒', '▶', '「', '➄', '➆',  '➊', '➋', '➌', '➍', '⓪', '①', '②', '③', '④', '⑤', '⑰', '❶', '❷', '❸', '❹', '❺', '❻', '❼', '❽',  
            '＝', '※', '㈱', '､', '△', '℮', 'ⅼ', '‐', '｣', '┝', '↳', '◉', '／', '＋', '○',
            '【', '】', '✅', '☑', '➤', 'ﾞ', '↳', '〶', '☛', '｢', '⁺', '『', '≫',
            'Â©', '<sub>','Aﾎｲ', 'ﾎｲ', "ﾃｩ"
          ] 
  # 文字化け対応はここで対応するしかない？
  
  for punct in puncts:
    x = x.replace(punct, '')
  return x

def _pre_preprocess(x):
  return str(x).lower() 

def rm_num(x, use_num=True):
  numbers = ["0","1","2","3","4","5","6","7","8","9","０","１","２","３","４","５","６","７","８","９"]
  x = re.sub('[0-9]{5,}', '', x)
  x = re.sub('[0-9]{4}', '', x)
  x = re.sub('[0-9]{3}', '', x)
  x = re.sub('[0-9]{2}', '', x)    
  for i in numbers:
    x = x.replace(str(i), '')        
  return x

def convert_mojibake(text):
  text = text.encode("shift-jis").decode("utf-8", errors="ignore")
  return text

def remove_double(text):
  text = text.replace("  ", " ")
  return text

def preprocess_text(text):
  #text = _pre_preprocess(text)
  #text = clean_stopword(text)
  text = clean_puncts(text)
  text = rm_num(text)
  text = remove_double(text)

  return text

def split_copyright(text):
  if "Copyright" in text:
    text = text.split('Copyright')[0]
    return text
  else:
    return text


In [None]:
pd.set_option("display.max_colwidth", 50)
train = pd.read_csv(input_dir + "train.csv")
test = pd.read_csv(input_dir + "test.csv")
sub = pd.read_csv(input_dir + "sample_submit.csv", header=None)
sub.columns = ["id", "judgement"]

In [None]:
train = get_train_data(train)
train.head()

Unnamed: 0,id,title,abstract,judgement,fold
0,0,One-year age changes in MRI brain volumes in o...,Longitudinal studies indicate that declines in...,0,4
1,1,Supportive CSF biomarker evidence to enhance t...,The present study was undertaken to validate t...,0,1
2,2,Occurrence of basal ganglia germ cell tumors w...,Objective: To report a case series in which ba...,0,2
3,3,New developments in diagnosis and therapy of C...,The etiology and pathogenesis of idiopathic ch...,0,0
4,4,Prolonged shedding of SARS-CoV-2 in an elderly...,,0,0


In [None]:
train["title_abst"] = train["title"] + train["abstract"]
train["title_abst"].fillna(train["title"], inplace=True)

test["title_abst"] = test["title"] + test["abstract"]
test["title_abst"].fillna(test["title"], inplace=True)

train.head()

Unnamed: 0,id,title,abstract,judgement,fold,title_abst
0,0,One-year age changes in MRI brain volumes in o...,Longitudinal studies indicate that declines in...,0,4,One-year age changes in MRI brain volumes in o...
1,1,Supportive CSF biomarker evidence to enhance t...,The present study was undertaken to validate t...,0,1,Supportive CSF biomarker evidence to enhance t...
2,2,Occurrence of basal ganglia germ cell tumors w...,Objective: To report a case series in which ba...,0,2,Occurrence of basal ganglia germ cell tumors w...
3,3,New developments in diagnosis and therapy of C...,The etiology and pathogenesis of idiopathic ch...,0,0,New developments in diagnosis and therapy of C...
4,4,Prolonged shedding of SARS-CoV-2 in an elderly...,,0,0,Prolonged shedding of SARS-CoV-2 in an elderly...


In [None]:
# preprocess
train["title_abst"] = train["title_abst"].apply(lambda x: preprocess_text(x))
test["title_abst"] = test["title_abst"].apply(lambda x: preprocess_text(x))

# titleの単語数が3以下のものは除外してみる
train["title_word_len"] = train["title"].str.split(" ").str.len()
train = train[train["title_word_len"]>3]

# titleが他言語の場合は除外
train["title_lang"] = train["title"].fillna("").map(lambda x: cld2.detect(x)[2][0][1])
train = train[(train["title_lang"]=="en")|(train["title_lang"]=="un")]

# copyright以降は削除したい
train["title_abst"] = train["title_abst"].apply(lambda x: split_copyright(x))
test["title_abst"] = test["title_abst"].apply(lambda x: split_copyright(x))

In [None]:
print(len(pd.read_csv(input_dir + "train.csv")))
print(len(train))

27145
26921


In [None]:
# Training
#border = len(train[train["judgement"] == 1]) / len(train["judgement"]) # 0.023245467689912133
#border = border * 0.6

mean_border_folds = []
 
oof_df = pd.DataFrame()
for fold in range(CONFIG.fold):
  _oof_df,best_borders = train_loop(train, fold)
  oof_df = pd.concat([oof_df, _oof_df])
  LOGGER.info(f"========== fold: {fold} result ==========")
  best_border_fold = mean_best_border(best_borders)
  mean_border_folds.append(best_border_fold)
        
# CV result
LOGGER.info(f"========== CV ==========")
best_border = mean_best_border(mean_border_folds)
get_result_for_cv(oof_df,best_border)
    
# Save OOF result
oof_df.to_csv(pred_dir +CONFIG.exp + "/oof_df.csv", index=False)



Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/462 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/433M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1345] Elapsed 0m 0s (remain 11m 37s) Loss: 0.6606 
Epoch: [1][100/1345] Elapsed 0m 25s (remain 5m 17s) Loss: 0.1623 
Epoch: [1][200/1345] Elapsed 0m 51s (remain 4m 51s) Loss: 0.1401 
Epoch: [1][300/1345] Elapsed 1m 16s (remain 4m 25s) Loss: 0.1199 
Epoch: [1][400/1345] Elapsed 1m 41s (remain 3m 59s) Loss: 0.1081 
Epoch: [1][500/1345] Elapsed 2m 7s (remain 3m 34s) Loss: 0.0999 
Epoch: [1][600/1345] Elapsed 2m 32s (remain 3m 9s) Loss: 0.0921 
Epoch: [1][700/1345] Elapsed 2m 58s (remain 2m 43s) Loss: 0.0879 
Epoch: [1][800/1345] Elapsed 3m 23s (remain 2m 18s) Loss: 0.0822 
Epoch: [1][900/1345] Elapsed 3m 49s (remain 1m 52s) Loss: 0.0783 
Epoch: [1][1000/1345] Elapsed 4m 14s (remain 1m 27s) Loss: 0.0752 
Epoch: [1][1100/1345] Elapsed 4m 40s (remain 1m 2s) Loss: 0.0736 
Epoch: [1][1200/1345] Elapsed 5m 5s (remain 0m 36s) Loss: 0.0718 
Epoch: [1][1300/1345] Elapsed 5m 31s (remain 0m 11s) Loss: 0.0693 
Epoch: [1][1344/1345] Elapsed 5m 42s (remain 0m 0s) Loss: 0.0688 
EVAL: [0/169

Epoch 1 - avg_train_loss: 0.0688  avg_val_loss: 0.0408  time: 370s
Epoch 1 - avg_train_loss: 0.0688  avg_val_loss: 0.0408  time: 370s
Epoch 1 - avg_train_loss: 0.0688  avg_val_loss: 0.0408  time: 370s
Epoch 1 - Score: 0.8963543235914433
Epoch 1 - Score: 0.8963543235914433
Epoch 1 - Score: 0.8963543235914433
Epoch 1 - Save Best Score: 0.8964 
Epoch 1 - Save Best Score: 0.8964 
Epoch 1 - Save Best Score: 0.8964 
Best_Border: 0.00772932
Best_Border: 0.00772932
Best_Border: 0.00772932


Best_Threshold：0.007729319931711987


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1346] Elapsed 0m 0s (remain 10m 39s) Loss: 0.7508 
Epoch: [1][100/1346] Elapsed 0m 25s (remain 5m 18s) Loss: 0.1398 
Epoch: [1][200/1346] Elapsed 0m 51s (remain 4m 51s) Loss: 0.1423 
Epoch: [1][300/1346] Elapsed 1m 16s (remain 4m 26s) Loss: 0.1285 
Epoch: [1][400/1346] Elapsed 1m 42s (remain 4m 0s) Loss: 0.1145 
Epoch: [1][500/1346] Elapsed 2m 7s (remain 3m 35s) Loss: 0.1040 
Epoch: [1][600/1346] Elapsed 2m 33s (remain 3m 9s) Loss: 0.0959 
Epoch: [1][700/1346] Elapsed 2m 58s (remain 2m 44s) Loss: 0.0921 
Epoch: [1][800/1346] Elapsed 3m 24s (remain 2m 18s) Loss: 0.0872 
Epoch: [1][900/1346] Elapsed 3m 49s (remain 1m 53s) Loss: 0.0840 
Epoch: [1][1000/1346] Elapsed 4m 15s (remain 1m 27s) Loss: 0.0812 
Epoch: [1][1100/1346] Elapsed 4m 40s (remain 1m 2s) Loss: 0.0792 
Epoch: [1][1200/1346] Elapsed 5m 6s (remain 0m 36s) Loss: 0.0778 
Epoch: [1][1300/1346] Elapsed 5m 31s (remain 0m 11s) Loss: 0.0759 
Epoch: [1][1345/1346] Elapsed 5m 43s (remain 0m 0s) Loss: 0.0746 
EVAL: [0/169]

Epoch 1 - avg_train_loss: 0.0746  avg_val_loss: 0.0482  time: 370s
Epoch 1 - avg_train_loss: 0.0746  avg_val_loss: 0.0482  time: 370s
Epoch 1 - avg_train_loss: 0.0746  avg_val_loss: 0.0482  time: 370s
Epoch 1 - Score: 0.878099173553719
Epoch 1 - Score: 0.878099173553719
Epoch 1 - Score: 0.878099173553719
Epoch 1 - Save Best Score: 0.8781 
Epoch 1 - Save Best Score: 0.8781 
Epoch 1 - Save Best Score: 0.8781 
Best_Border: 0.00757423
Best_Border: 0.00757423
Best_Border: 0.00757423


Best_Threshold：0.00757423196560248


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1345] Elapsed 0m 0s (remain 10m 40s) Loss: 0.5785 
Epoch: [1][100/1345] Elapsed 0m 25s (remain 5m 18s) Loss: 0.1309 
Epoch: [1][200/1345] Elapsed 0m 51s (remain 4m 51s) Loss: 0.1199 
Epoch: [1][300/1345] Elapsed 1m 16s (remain 4m 26s) Loss: 0.1151 
Epoch: [1][400/1345] Elapsed 1m 42s (remain 4m 0s) Loss: 0.1072 
Epoch: [1][500/1345] Elapsed 2m 7s (remain 3m 35s) Loss: 0.1044 
Epoch: [1][600/1345] Elapsed 2m 33s (remain 3m 9s) Loss: 0.0968 
Epoch: [1][700/1345] Elapsed 2m 58s (remain 2m 44s) Loss: 0.0931 
Epoch: [1][800/1345] Elapsed 3m 24s (remain 2m 18s) Loss: 0.0884 
Epoch: [1][900/1345] Elapsed 3m 49s (remain 1m 53s) Loss: 0.0836 
Epoch: [1][1000/1345] Elapsed 4m 15s (remain 1m 27s) Loss: 0.0804 
Epoch: [1][1100/1345] Elapsed 4m 40s (remain 1m 2s) Loss: 0.0778 
Epoch: [1][1200/1345] Elapsed 5m 6s (remain 0m 36s) Loss: 0.0754 
Epoch: [1][1300/1345] Elapsed 5m 31s (remain 0m 11s) Loss: 0.0731 
Epoch: [1][1344/1345] Elapsed 5m 42s (remain 0m 0s) Loss: 0.0727 
EVAL: [0/169]

Epoch 1 - avg_train_loss: 0.0727  avg_val_loss: 0.0493  time: 370s
Epoch 1 - avg_train_loss: 0.0727  avg_val_loss: 0.0493  time: 370s
Epoch 1 - avg_train_loss: 0.0727  avg_val_loss: 0.0493  time: 370s
Epoch 1 - Score: 0.8518905993125094
Epoch 1 - Score: 0.8518905993125094
Epoch 1 - Score: 0.8518905993125094
Epoch 1 - Save Best Score: 0.8519 
Epoch 1 - Save Best Score: 0.8519 
Epoch 1 - Save Best Score: 0.8519 
Best_Border: 0.01417555
Best_Border: 0.01417555
Best_Border: 0.01417555


Best_Threshold：0.014175545516478994


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1345] Elapsed 0m 0s (remain 10m 44s) Loss: 0.7607 
Epoch: [1][100/1345] Elapsed 0m 25s (remain 5m 17s) Loss: 0.1653 
Epoch: [1][200/1345] Elapsed 0m 51s (remain 4m 51s) Loss: 0.1459 
Epoch: [1][300/1345] Elapsed 1m 16s (remain 4m 25s) Loss: 0.1336 
Epoch: [1][400/1345] Elapsed 1m 42s (remain 4m 0s) Loss: 0.1318 
Epoch: [1][500/1345] Elapsed 2m 7s (remain 3m 34s) Loss: 0.1264 
Epoch: [1][600/1345] Elapsed 2m 32s (remain 3m 9s) Loss: 0.1205 
Epoch: [1][700/1345] Elapsed 2m 58s (remain 2m 44s) Loss: 0.1145 
Epoch: [1][800/1345] Elapsed 3m 24s (remain 2m 18s) Loss: 0.1076 
Epoch: [1][900/1345] Elapsed 3m 49s (remain 1m 53s) Loss: 0.1036 
Epoch: [1][1000/1345] Elapsed 4m 15s (remain 1m 27s) Loss: 0.0992 
Epoch: [1][1100/1345] Elapsed 4m 40s (remain 1m 2s) Loss: 0.0971 
Epoch: [1][1200/1345] Elapsed 5m 6s (remain 0m 36s) Loss: 0.0939 
Epoch: [1][1300/1345] Elapsed 5m 31s (remain 0m 11s) Loss: 0.0902 
Epoch: [1][1344/1345] Elapsed 5m 42s (remain 0m 0s) Loss: 0.0884 
EVAL: [0/169]

Epoch 1 - avg_train_loss: 0.0884  avg_val_loss: 0.0494  time: 370s
Epoch 1 - avg_train_loss: 0.0884  avg_val_loss: 0.0494  time: 370s
Epoch 1 - avg_train_loss: 0.0884  avg_val_loss: 0.0494  time: 370s
Epoch 1 - Score: 0.8718395815170008
Epoch 1 - Score: 0.8718395815170008
Epoch 1 - Score: 0.8718395815170008
Epoch 1 - Save Best Score: 0.8718 
Epoch 1 - Save Best Score: 0.8718 
Epoch 1 - Save Best Score: 0.8718 
Best_Border: 0.01163461
Best_Border: 0.01163461
Best_Border: 0.01163461


Best_Threshold：0.01163461081713535


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch: [1][0/1346] Elapsed 0m 0s (remain 10m 50s) Loss: 0.4644 
Epoch: [1][100/1346] Elapsed 0m 25s (remain 5m 18s) Loss: 0.1541 
Epoch: [1][200/1346] Elapsed 0m 51s (remain 4m 51s) Loss: 0.1152 
Epoch: [1][300/1346] Elapsed 1m 16s (remain 4m 26s) Loss: 0.1087 
Epoch: [1][400/1346] Elapsed 1m 42s (remain 4m 0s) Loss: 0.1042 
Epoch: [1][500/1346] Elapsed 2m 7s (remain 3m 35s) Loss: 0.0946 
Epoch: [1][600/1346] Elapsed 2m 33s (remain 3m 9s) Loss: 0.0909 
Epoch: [1][700/1346] Elapsed 2m 58s (remain 2m 44s) Loss: 0.0847 
Epoch: [1][800/1346] Elapsed 3m 24s (remain 2m 18s) Loss: 0.0816 
Epoch: [1][900/1346] Elapsed 3m 49s (remain 1m 53s) Loss: 0.0788 
Epoch: [1][1000/1346] Elapsed 4m 15s (remain 1m 27s) Loss: 0.0760 
Epoch: [1][1100/1346] Elapsed 4m 40s (remain 1m 2s) Loss: 0.0750 
Epoch: [1][1200/1346] Elapsed 5m 6s (remain 0m 36s) Loss: 0.0723 
Epoch: [1][1300/1346] Elapsed 5m 31s (remain 0m 11s) Loss: 0.0703 
Epoch: [1][1345/1346] Elapsed 5m 43s (remain 0m 0s) Loss: 0.0694 
EVAL: [0/168]

Epoch 1 - avg_train_loss: 0.0694  avg_val_loss: 0.0430  time: 370s
Epoch 1 - avg_train_loss: 0.0694  avg_val_loss: 0.0430  time: 370s
Epoch 1 - avg_train_loss: 0.0694  avg_val_loss: 0.0430  time: 370s
Epoch 1 - Score: 0.8992805755395685
Epoch 1 - Score: 0.8992805755395685
Epoch 1 - Score: 0.8992805755395685
Epoch 1 - Save Best Score: 0.8993 
Epoch 1 - Save Best Score: 0.8993 
Epoch 1 - Save Best Score: 0.8993 
Best_Border: 0.00290727
Best_Border: 0.00290727
Best_Border: 0.00290727
Best_Border: 0.00880420
Best_Border: 0.00880420
Best_Border: 0.00880420
Score: 0.86436
Score: 0.86436
Score: 0.86436


Best_Threshold：0.0029072682482424814
Best_Threshold：0.008804195295834257
Best_Threshold：0.008804195295834257
Score：0.8643597169557818


In [None]:
best_border

0.026602333932376643

In [None]:
# inference, submit
# border を出力できるようにしたい
#best_border = 0.03955983
predictions = inference()
predictions = np.where(predictions < best_border, 0, 1)

# submission
sub["judgement"] = predictions
sub.to_csv(submission_dir +CONFIG.exp+ "_submission.csv", index=False, header=False)

Some weights of the model checkpoint at bionlp/bluebert_pubmed_uncased_L-12_H-768_A-12 were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceCla