1. データの読み込み

In [1]:
import math
import random
import time
import warnings
import numpy as np
import pandas as pd
import torch
from scipy.optimize import minimize, minimize_scalar
import torch.nn as nn
import os
import transformers as T
from pathlib import Path
from sklearn.metrics import fbeta_score
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import DataLoader, Dataset
from tqdm.notebook import tqdm

In [2]:
warnings.filterwarnings("ignore")

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
def seed_torch(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

seed = 471
seed_torch(seed)

In [5]:
DATA_DIR = './dataset/data4'
OUTPUT_DIR = './result/result10/'
if not os.path.isdir(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

In [6]:
def init_logger(log_file=OUTPUT_DIR + "/train.log"):
    from logging import INFO, FileHandler, Formatter, StreamHandler, getLogger

    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()

In [7]:
train = pd.read_csv(DATA_DIR  +"/train.csv", index_col=0)
test = pd.read_csv(DATA_DIR + "/test.csv", index_col=0)
sub = pd.read_csv(DATA_DIR + "/sample_submit.csv", header=None)
sub.columns = ["id", "judgement"]

In [8]:
def opt_fbeta_threshold(y_true, y_pred):
    """fbeta score計算時のthresholdを最適化"""
    def opt_(x): 
        return -fbeta_score(y_true, y_pred >= x, beta=7)
    result = minimize(opt_, x0=np.array([0.1]), method='Powell')
    best_threshold = result['x'].item()
    return best_threshold


def metrics(y_true, y_pred):
    """fbeta(beta=7)の閾値最適化評価関数"""
    bt = opt_fbeta_threshold(y_true, y_pred)
    print(f"bt:{bt}")
    score = fbeta_score(y_true, y_pred >= bt, beta=7)
    return score, bt

In [9]:
class BaseDataset(Dataset):
    def __init__(self, df, model_name, include_labels=True):
        tokenizer = T.BertTokenizer.from_pretrained(model_name)

        self.df = df
        self.include_labels = include_labels

        self.text = df["text"]
        self.encoded = tokenizer.batch_encode_plus(
            self.text,
            padding = 'max_length',            
            max_length = 500,
            return_attention_mask=True
        )

        
        if self.include_labels:
            self.labels = df["judgement"].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        input_ids = torch.tensor(self.encoded['input_ids'][idx])
        attention_mask = torch.tensor(self.encoded['attention_mask'][idx])

        if self.include_labels:
            label = torch.tensor(self.labels[idx]).float()
            return input_ids, attention_mask, label

        return input_ids, attention_mask

In [10]:
class BaseModel(nn.Module):
    def __init__(self, model_name):
        super().__init__()

        self.model = T.BertForSequenceClassification.from_pretrained(model_name, num_labels=1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_ids, attention_mask):
        out = self.model(input_ids=input_ids, attention_mask=attention_mask)
        out = self.sigmoid(out.logits).squeeze()

        return out

In [11]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2):
        super(FocalLoss, self).__init__()
        self.weight = torch.Tensor([alpha, 1-alpha])
        self.nllLoss = nn.NLLLoss(weight=self.weight).to('cuda')
        self.gamma = gamma

    def forward(self, input, target):
        input = input.unsqueeze(1)
        softmax = torch.cat([1-input, input], dim=1)
        log_logits = torch.log(softmax)
        target = target.long()
        fix_weights = (1 - softmax) ** self.gamma
        logits = fix_weights * log_logits
        logits = logits
        target = target
        return self.nllLoss(logits, target)

In [12]:
def under_sampling(data: pd.DataFrame):

    #order_of_finish = 着順
    low_frequency_data_sample = data[data["judgement"] == 1]
    low_frequency_data_size = len(low_frequency_data_sample)
    # 高頻度データの行ラベル
    high_frequency_data = data[data["judgement"] == 0].index

    # 高頻度データの行ラベルから、低頻度のデータと同じ数をランダムで抽出
    random_indices = np.random.choice(high_frequency_data, low_frequency_data_size, replace=False)

    # 抽出した行ラベルを使って、該当するデータを取得
    high_frequency_data_sample = data.loc[random_indices]
    pd.DataFrame(high_frequency_data_sample)

    # データをマージする。 concatは結合API
    merged_data = pd.concat([high_frequency_data_sample, low_frequency_data_sample], ignore_index=True)
    balanced_data = pd.DataFrame(merged_data)

    return balanced_data


In [13]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return "%dm %ds" % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return "%s (remain %s)" % (asMinutes(s), asMinutes(rs))

In [14]:
def train_fn(train_loader, model, criterion, optimizer, epoch, device):
    start = end = time.time()
    losses = AverageMeter()

    # switch to train mode
    model.train()

    for step, (input_ids, attention_mask, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)

        y_preds = model(input_ids, attention_mask)

        loss = criterion(y_preds, labels)
        # record loss
        losses.update(loss.item(), batch_size)
        loss.backward()

        optimizer.step()

        if step % 100 == 0 or step == (len(train_loader) - 1):
            print(
                f"Epoch: [{epoch + 1}][{step}/{len(train_loader)}] "
                f"Elapsed {timeSince(start, float(step + 1) / len(train_loader)):s} "
                f"Loss: {losses.avg:.4f} "
            )

    return losses.avg

In [15]:
def valid_fn(valid_loader, model, criterion, device):
    start = end = time.time()
    losses = AverageMeter()

    # switch to evaluation mode
    model.eval()
    preds = []

    for step, (input_ids, attention_mask, labels) in enumerate(valid_loader):
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)

        # compute loss
        with torch.no_grad():
            y_preds = model(input_ids, attention_mask)

        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)

        # record score
        preds.append(y_preds.to("cpu").numpy())

        if step % 100 == 0 or step == (len(valid_loader) - 1):
            print(
                f"EVAL: [{step}/{len(valid_loader)}] "
                f"Elapsed {timeSince(start, float(step + 1) / len(valid_loader)):s} "
                f"Loss: {losses.avg:.4f} "
            )

    predictions = np.concatenate(preds)
    return losses.avg, predictions

In [16]:
def train_loop(train, fold):

    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # Data Loader
    # ====================================================
    trn_idx = train[train["fold"] != fold].index
    val_idx = train[train["fold"] == fold].index
    
    train_folds = train.loc[trn_idx].reset_index(drop=True)
    valid_folds = train.loc[val_idx].reset_index(drop=True)
#     train_folds = under_sampling(train_folds)
    train_dataset = BaseDataset(train_folds, "bert-base-uncased")
    valid_dataset = BaseDataset(valid_folds, "bert-base-uncased")

    train_loader = DataLoader(
        train_dataset,
        batch_size=64,
        shuffle=True,
        num_workers=4,
        pin_memory=True,
        drop_last=True,
    )
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=64,
        shuffle=False,
        num_workers=4,
        pin_memory=True,
        drop_last=False,
    )

    # ====================================================
    # Model
    # ====================================================
    model = BaseModel("bert-base-uncased")
    model.to(device)
    if torch.cuda.device_count()>=2:
        model = nn.DataParallel(model)

    optimizer = T.AdamW(model.parameters(), lr=2e-5, weight_decay = 1e-5)

    criterion = FocalLoss()
    # ====================================================
    # Loop
    # ====================================================
    best_score = -1
    best_loss = np.inf

    for epoch in range(10):
        start_time = time.time()
        
        # train
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, device)

        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        valid_labels = valid_folds["judgement"].values

        # scoring
        score, border = metrics(valid_labels, preds)

        elapsed = time.time() - start_time
        LOGGER.info(
            f"Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s"
        )
        LOGGER.info(f"Epoch {epoch+1} - Score: {score} -Border: {border}")

        if score > best_score:
            best_score = score
            best_border =border
            LOGGER.info(f"Epoch {epoch+1} - Save Best Score: {best_score:.4f} - Save Best Score: {best_border:.4f} Model")
            torch.save(
                {"model": model.state_dict(), "preds": preds}, OUTPUT_DIR + f"bert-base-uncased_fold{fold}_best.pth"
            )

    check_point = torch.load(OUTPUT_DIR + f"bert-base-uncased_fold{fold}_best.pth")

    valid_folds["preds"] = check_point["preds"]

    return valid_folds

In [17]:
def inference():
    predictions = []

    test_dataset = BaseDataset(test, "bert-base-uncased", include_labels=False)
    test_loader = DataLoader(
        test_dataset, batch_size=64, shuffle=False, num_workers=4, pin_memory=True
    )

    for fold in range(5):
        LOGGER.info(f"========== model: bert-base-uncased fold: {fold} inference ==========")
        model = BaseModel("bert-base-uncased")
        model.to(device)
        model.load_state_dict(torch.load(OUTPUT_DIR + f"bert-base-uncased_fold{fold}_best.pth")["model"])
        model.eval()
        preds = []
        for i, (input_ids, attention_mask) in tqdm(enumerate(test_loader), total=len(test_loader)):
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            with torch.no_grad():
                y_preds = model(input_ids, attention_mask)
            preds.append(y_preds.to("cpu").numpy())
        preds = np.concatenate(preds)
        predictions.append(preds)
    predictions = np.mean(predictions, axis=0)

    return predictions

In [18]:
def get_result(result_df):
    preds = result_df["preds"].values
    labels = result_df["judgement"].values
    score, border = metrics(labels, preds)
    LOGGER.info(f"Score: {score:<.5f}, Border: {border:<.5f}")
    return border

In [19]:
def main():
#     if os.path.isfile(OUTPUT_DIR + "oof_df.csv"):
#         oof_df = pd.read_csv(OUTPUT_DIR + "oof_df.csv")
#         border = get_result(oof_df)
#     else:
    # Training
    oof_df = pd.DataFrame()
    for fold in range(5):
        _oof_df = train_loop(train, fold)
        oof_df = pd.concat([oof_df, _oof_df])
        LOGGER.info(f"========== fold: {fold} result ==========")
        get_result(_oof_df)

    # CV result
    LOGGER.info(f"========== CV ==========")
    border = get_result(oof_df)

    # Save OOF result
    oof_df.to_csv(OUTPUT_DIR + "oof_df.csv", index=False)
    # Inference
    predictions = inference()
    predictions = np.where(predictions < border, 0, 1)

    # submission
    sub["judgement"] = predictions
    sub.to_csv(OUTPUT_DIR + "submission.csv", index=False, header=False)

In [None]:
if __name__ == "__main__":
    main()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch: [1][0/339] Elapsed 0m 2s (remain 16m 23s) Loss: 0.2693 
Epoch: [1][100/339] Elapsed 0m 47s (remain 1m 52s) Loss: 0.0706 
Epoch: [1][200/339] Elapsed 1m 33s (remain 1m 4s) Loss: 0.0587 
Epoch: [1][300/339] Elapsed 2m 20s (remain 0m 17s) Loss: 0.0516 
Epoch: [1][338/339] Elapsed 2m 38s (remain 0m 0s) Loss: 0.0498 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 24s) Loss: 0.0352 


Epoch 1 - avg_train_loss: 0.0498  avg_val_loss: 0.0411  time: 173s
Epoch 1 - Score: 0.8534432018834608 -Border: 0.408661293766298
Epoch 1 - Save Best Score: 0.8534 - Save Best Score: 0.4087 Model


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0411 
bt:0.408661293766298




Epoch: [2][0/339] Elapsed 0m 0s (remain 3m 12s) Loss: 0.0335 
Epoch: [2][100/339] Elapsed 0m 48s (remain 1m 55s) Loss: 0.0282 
Epoch: [2][200/339] Elapsed 1m 37s (remain 1m 6s) Loss: 0.0263 
Epoch: [2][300/339] Elapsed 2m 25s (remain 0m 18s) Loss: 0.0239 




Epoch: [2][338/339] Elapsed 2m 44s (remain 0m 0s) Loss: 0.0238 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.0091 


Epoch 2 - avg_train_loss: 0.0238  avg_val_loss: 0.0238  time: 179s
Epoch 2 - Score: 0.89126559714795 -Border: 0.18289388523582206
Epoch 2 - Save Best Score: 0.8913 - Save Best Score: 0.1829 Model


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0238 
bt:0.18289388523582206




Epoch: [3][0/339] Elapsed 0m 0s (remain 3m 17s) Loss: 0.0092 
Epoch: [3][100/339] Elapsed 0m 50s (remain 1m 57s) Loss: 0.0142 
Epoch: [3][200/339] Elapsed 1m 38s (remain 1m 7s) Loss: 0.0152 
Epoch: [3][300/339] Elapsed 2m 27s (remain 0m 18s) Loss: 0.0147 




Epoch: [3][338/339] Elapsed 2m 45s (remain 0m 0s) Loss: 0.0143 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.0104 


Epoch 3 - avg_train_loss: 0.0143  avg_val_loss: 0.0260  time: 180s
Epoch 3 - Score: 0.8956796628029503 -Border: 0.12012782515859108
Epoch 3 - Save Best Score: 0.8957 - Save Best Score: 0.1201 Model


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0260 
bt:0.12012782515859108




Epoch: [4][0/339] Elapsed 0m 0s (remain 3m 13s) Loss: 0.0017 
Epoch: [4][100/339] Elapsed 0m 49s (remain 1m 56s) Loss: 0.0083 
Epoch: [4][200/339] Elapsed 1m 38s (remain 1m 7s) Loss: 0.0106 
Epoch: [4][300/339] Elapsed 2m 25s (remain 0m 18s) Loss: 0.0105 




Epoch: [4][338/339] Elapsed 2m 43s (remain 0m 0s) Loss: 0.0105 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 21s) Loss: 0.0378 


Epoch 4 - avg_train_loss: 0.0105  avg_val_loss: 0.0319  time: 177s
Epoch 4 - Score: 0.8901399878271455 -Border: 0.10225198976065401


EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0319 
bt:0.10225198976065401




Epoch: [5][0/339] Elapsed 0m 0s (remain 3m 57s) Loss: 0.0074 
Epoch: [5][100/339] Elapsed 0m 48s (remain 1m 53s) Loss: 0.0063 
Epoch: [5][200/339] Elapsed 1m 35s (remain 1m 5s) Loss: 0.0066 
Epoch: [5][300/339] Elapsed 2m 23s (remain 0m 18s) Loss: 0.0068 




Epoch: [5][338/339] Elapsed 2m 41s (remain 0m 0s) Loss: 0.0067 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 21s) Loss: 0.0708 


Epoch 5 - avg_train_loss: 0.0067  avg_val_loss: 0.0362  time: 175s
Epoch 5 - Score: 0.8696321379098045 -Border: 0.10440480611427669


EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0362 
bt:0.10440480611427669




Epoch: [6][0/339] Elapsed 0m 0s (remain 3m 18s) Loss: 0.0023 
Epoch: [6][100/339] Elapsed 0m 47s (remain 1m 50s) Loss: 0.0048 
Epoch: [6][200/339] Elapsed 1m 33s (remain 1m 4s) Loss: 0.0042 
Epoch: [6][300/339] Elapsed 2m 20s (remain 0m 17s) Loss: 0.0058 




Epoch: [6][338/339] Elapsed 2m 38s (remain 0m 0s) Loss: 0.0058 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 23s) Loss: 0.0366 


Epoch 6 - avg_train_loss: 0.0058  avg_val_loss: 0.0472  time: 173s
Epoch 6 - Score: 0.8439781021897809 -Border: 0.05979500589043981


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0472 
bt:0.05979500589043981




Epoch: [7][0/339] Elapsed 0m 0s (remain 3m 8s) Loss: 0.0014 
Epoch: [7][100/339] Elapsed 0m 48s (remain 1m 54s) Loss: 0.0041 
Epoch: [7][200/339] Elapsed 1m 36s (remain 1m 6s) Loss: 0.0032 
Epoch: [7][300/339] Elapsed 2m 23s (remain 0m 18s) Loss: 0.0036 




Epoch: [7][338/339] Elapsed 2m 42s (remain 0m 0s) Loss: 0.0037 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 21s) Loss: 0.0048 


Epoch 7 - avg_train_loss: 0.0037  avg_val_loss: 0.0362  time: 176s
Epoch 7 - Score: 0.9055530609190241 -Border: 0.1068489971029441
Epoch 7 - Save Best Score: 0.9056 - Save Best Score: 0.1068 Model


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0362 
bt:0.1068489971029441




Epoch: [8][0/339] Elapsed 0m 0s (remain 3m 12s) Loss: 0.0027 
Epoch: [8][100/339] Elapsed 0m 48s (remain 1m 54s) Loss: 0.0034 
Epoch: [8][200/339] Elapsed 1m 35s (remain 1m 5s) Loss: 0.0035 
Epoch: [8][300/339] Elapsed 2m 23s (remain 0m 18s) Loss: 0.0034 




Epoch: [8][338/339] Elapsed 2m 41s (remain 0m 0s) Loss: 0.0036 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 21s) Loss: 0.0073 


Epoch 8 - avg_train_loss: 0.0036  avg_val_loss: 0.0418  time: 176s
Epoch 8 - Score: 0.877856818525806 -Border: 0.055255827474104346


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0418 
bt:0.055255827474104346




Epoch: [9][0/339] Elapsed 0m 0s (remain 3m 29s) Loss: 0.0011 
Epoch: [9][100/339] Elapsed 0m 48s (remain 1m 54s) Loss: 0.0009 
Epoch: [9][200/339] Elapsed 1m 36s (remain 1m 6s) Loss: 0.0014 
Epoch: [9][300/339] Elapsed 2m 24s (remain 0m 18s) Loss: 0.0023 




Epoch: [9][338/339] Elapsed 2m 43s (remain 0m 0s) Loss: 0.0026 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 26s) Loss: 0.0050 


Epoch 9 - avg_train_loss: 0.0026  avg_val_loss: 0.0364  time: 179s
Epoch 9 - Score: 0.8664858348402652 -Border: 0.09287517499790993


EVAL: [84/85] Elapsed 0m 15s (remain 0m 0s) Loss: 0.0364 
bt:0.09287517499790993




Epoch: [10][0/339] Elapsed 0m 0s (remain 3m 19s) Loss: 0.0012 
Epoch: [10][100/339] Elapsed 0m 51s (remain 2m 0s) Loss: 0.0028 
Epoch: [10][200/339] Elapsed 1m 41s (remain 1m 9s) Loss: 0.0040 
Epoch: [10][300/339] Elapsed 2m 31s (remain 0m 19s) Loss: 0.0038 




Epoch: [10][338/339] Elapsed 2m 50s (remain 0m 0s) Loss: 0.0035 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 23s) Loss: 0.0056 


Epoch 10 - avg_train_loss: 0.0035  avg_val_loss: 0.0445  time: 186s
Epoch 10 - Score: 0.8611492150586801 -Border: 0.05232792585702036


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0445 
bt:0.05232792585702036


Score: 0.90555, Border: 0.10685


bt:0.1068489971029441


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch: [1][0/339] Elapsed 0m 0s (remain 3m 26s) Loss: 0.3164 
Epoch: [1][100/339] Elapsed 0m 48s (remain 1m 53s) Loss: 0.0666 
Epoch: [1][200/339] Elapsed 1m 38s (remain 1m 7s) Loss: 0.0592 
Epoch: [1][300/339] Elapsed 2m 28s (remain 0m 18s) Loss: 0.0527 




Epoch: [1][338/339] Elapsed 2m 48s (remain 0m 0s) Loss: 0.0507 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 24s) Loss: 0.0289 


Epoch 1 - avg_train_loss: 0.0507  avg_val_loss: 0.0332  time: 184s
Epoch 1 - Score: 0.8154389779831475 -Border: 0.26263646823421977
Epoch 1 - Save Best Score: 0.8154 - Save Best Score: 0.2626 Model


EVAL: [84/85] Elapsed 0m 15s (remain 0m 0s) Loss: 0.0332 
bt:0.26263646823421977




Epoch: [2][0/339] Elapsed 0m 0s (remain 3m 33s) Loss: 0.0203 
Epoch: [2][100/339] Elapsed 0m 49s (remain 1m 56s) Loss: 0.0285 
Epoch: [2][200/339] Elapsed 1m 38s (remain 1m 7s) Loss: 0.0272 
Epoch: [2][300/339] Elapsed 2m 26s (remain 0m 18s) Loss: 0.0275 




Epoch: [2][338/339] Elapsed 2m 44s (remain 0m 0s) Loss: 0.0265 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.0172 


Epoch 2 - avg_train_loss: 0.0265  avg_val_loss: 0.0207  time: 180s
Epoch 2 - Score: 0.9188473978241695 -Border: 0.18249760369804768
Epoch 2 - Save Best Score: 0.9188 - Save Best Score: 0.1825 Model


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0207 
bt:0.18249760369804768




Epoch: [3][0/339] Elapsed 0m 0s (remain 3m 24s) Loss: 0.0065 
Epoch: [3][100/339] Elapsed 0m 48s (remain 1m 54s) Loss: 0.0147 
Epoch: [3][200/339] Elapsed 1m 36s (remain 1m 6s) Loss: 0.0155 
Epoch: [3][300/339] Elapsed 2m 23s (remain 0m 18s) Loss: 0.0164 




Epoch: [3][338/339] Elapsed 2m 41s (remain 0m 0s) Loss: 0.0161 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 23s) Loss: 0.0219 


Epoch 3 - avg_train_loss: 0.0161  avg_val_loss: 0.0253  time: 175s
Epoch 3 - Score: 0.9059598682240192 -Border: 0.09912020563678896


EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0253 
bt:0.09912020563678896




Epoch: [4][0/339] Elapsed 0m 0s (remain 3m 11s) Loss: 0.0020 
Epoch: [4][100/339] Elapsed 0m 48s (remain 1m 54s) Loss: 0.0099 
Epoch: [4][200/339] Elapsed 1m 35s (remain 1m 5s) Loss: 0.0112 
Epoch: [4][300/339] Elapsed 2m 23s (remain 0m 18s) Loss: 0.0113 




Epoch: [4][338/339] Elapsed 2m 41s (remain 0m 0s) Loss: 0.0113 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.0158 


Epoch 4 - avg_train_loss: 0.0113  avg_val_loss: 0.0222  time: 175s
Epoch 4 - Score: 0.916287934320481 -Border: 0.10398990177139467


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0222 
bt:0.10398990177139467




Epoch: [5][0/339] Elapsed 0m 0s (remain 3m 13s) Loss: 0.0018 
Epoch: [5][100/339] Elapsed 0m 48s (remain 1m 54s) Loss: 0.0058 
Epoch: [5][200/339] Elapsed 1m 35s (remain 1m 5s) Loss: 0.0069 
Epoch: [5][300/339] Elapsed 2m 23s (remain 0m 18s) Loss: 0.0077 
Epoch: [5][338/339] Elapsed 2m 41s (remain 0m 0s) Loss: 0.0076 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 23s) Loss: 0.0181 


Epoch 5 - avg_train_loss: 0.0076  avg_val_loss: 0.0382  time: 175s
Epoch 5 - Score: 0.898203592814371 -Border: 0.045168124078730304


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0382 
bt:0.045168124078730304




Epoch: [6][0/339] Elapsed 0m 0s (remain 3m 15s) Loss: 0.0025 
Epoch: [6][100/339] Elapsed 0m 48s (remain 1m 53s) Loss: 0.0036 
Epoch: [6][200/339] Elapsed 1m 36s (remain 1m 5s) Loss: 0.0049 
Epoch: [6][300/339] Elapsed 2m 22s (remain 0m 18s) Loss: 0.0047 
Epoch: [6][338/339] Elapsed 2m 40s (remain 0m 0s) Loss: 0.0046 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.0926 




EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0611 
bt:0.029614846874064377


Epoch 6 - avg_train_loss: 0.0046  avg_val_loss: 0.0611  time: 174s
Epoch 6 - Score: 0.876257238646754 -Border: 0.029614846874064377


Epoch: [7][0/339] Elapsed 0m 0s (remain 3m 17s) Loss: 0.0001 
Epoch: [7][100/339] Elapsed 0m 47s (remain 1m 52s) Loss: 0.0013 
Epoch: [7][200/339] Elapsed 1m 35s (remain 1m 5s) Loss: 0.0014 
Epoch: [7][300/339] Elapsed 2m 22s (remain 0m 18s) Loss: 0.0023 




Epoch: [7][338/339] Elapsed 2m 40s (remain 0m 0s) Loss: 0.0023 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 23s) Loss: 0.0251 




EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0467 


Epoch 7 - avg_train_loss: 0.0023  avg_val_loss: 0.0467  time: 175s
Epoch 7 - Score: 0.908953188910771 -Border: 0.030900437433500766


bt:0.030900437433500766




Epoch: [8][0/339] Elapsed 0m 0s (remain 3m 15s) Loss: 0.0001 
Epoch: [8][100/339] Elapsed 0m 47s (remain 1m 51s) Loss: 0.0044 
Epoch: [8][200/339] Elapsed 1m 34s (remain 1m 4s) Loss: 0.0036 
Epoch: [8][300/339] Elapsed 2m 22s (remain 0m 17s) Loss: 0.0035 




Epoch: [8][338/339] Elapsed 2m 40s (remain 0m 0s) Loss: 0.0038 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 23s) Loss: 0.0318 


Epoch 8 - avg_train_loss: 0.0038  avg_val_loss: 0.0448  time: 175s
Epoch 8 - Score: 0.8799878622363829 -Border: 0.049308438438559386


EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0448 
bt:0.049308438438559386




Epoch: [9][0/339] Elapsed 0m 0s (remain 3m 9s) Loss: 0.0001 
Epoch: [9][100/339] Elapsed 0m 48s (remain 1m 55s) Loss: 0.0020 
Epoch: [9][200/339] Elapsed 1m 35s (remain 1m 5s) Loss: 0.0016 
Epoch: [9][300/339] Elapsed 2m 23s (remain 0m 18s) Loss: 0.0024 




Epoch: [9][338/339] Elapsed 2m 41s (remain 0m 0s) Loss: 0.0023 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.0217 


Epoch 9 - avg_train_loss: 0.0023  avg_val_loss: 0.0778  time: 176s
Epoch 9 - Score: 0.862845138055222 -Border: 0.02121363309446008


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0778 
bt:0.02121363309446008




Epoch: [10][0/339] Elapsed 0m 0s (remain 3m 17s) Loss: 0.0008 
Epoch: [10][100/339] Elapsed 0m 48s (remain 1m 53s) Loss: 0.0017 
Epoch: [10][200/339] Elapsed 1m 35s (remain 1m 5s) Loss: 0.0025 
Epoch: [10][300/339] Elapsed 2m 23s (remain 0m 18s) Loss: 0.0031 




Epoch: [10][338/339] Elapsed 2m 41s (remain 0m 0s) Loss: 0.0028 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.0307 


Epoch 10 - avg_train_loss: 0.0028  avg_val_loss: 0.0499  time: 175s
Epoch 10 - Score: 0.9073185362927414 -Border: 0.02884132720887169


EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0499 
bt:0.02884132720887169


Score: 0.91885, Border: 0.18250


bt:0.18249760369804768


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch: [1][0/339] Elapsed 0m 0s (remain 3m 17s) Loss: 0.1042 
Epoch: [1][100/339] Elapsed 0m 45s (remain 1m 46s) Loss: 0.0630 
Epoch: [1][200/339] Elapsed 1m 31s (remain 1m 2s) Loss: 0.0547 
Epoch: [1][300/339] Elapsed 2m 17s (remain 0m 17s) Loss: 0.0467 




Epoch: [1][338/339] Elapsed 2m 35s (remain 0m 0s) Loss: 0.0449 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 23s) Loss: 0.0168 


Epoch 1 - avg_train_loss: 0.0449  avg_val_loss: 0.0259  time: 170s
Epoch 1 - Score: 0.8676625924249282 -Border: 0.33729144540175715
Epoch 1 - Save Best Score: 0.8677 - Save Best Score: 0.3373 Model


EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0259 
bt:0.33729144540175715




Epoch: [2][0/339] Elapsed 0m 0s (remain 3m 16s) Loss: 0.1036 
Epoch: [2][100/339] Elapsed 0m 47s (remain 1m 51s) Loss: 0.0238 
Epoch: [2][200/339] Elapsed 1m 33s (remain 1m 4s) Loss: 0.0211 
Epoch: [2][300/339] Elapsed 2m 20s (remain 0m 17s) Loss: 0.0211 
Epoch: [2][338/339] Elapsed 2m 38s (remain 0m 0s) Loss: 0.0210 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 23s) Loss: 0.0114 


Epoch 2 - avg_train_loss: 0.0210  avg_val_loss: 0.0201  time: 174s
Epoch 2 - Score: 0.8914847832769749 -Border: 0.3360162182891184
Epoch 2 - Save Best Score: 0.8915 - Save Best Score: 0.3360 Model


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0201 
bt:0.3360162182891184




Epoch: [3][0/339] Elapsed 0m 0s (remain 3m 21s) Loss: 0.0061 
Epoch: [3][100/339] Elapsed 0m 47s (remain 1m 51s) Loss: 0.0113 
Epoch: [3][200/339] Elapsed 1m 34s (remain 1m 4s) Loss: 0.0112 
Epoch: [3][300/339] Elapsed 2m 20s (remain 0m 17s) Loss: 0.0119 
Epoch: [3][338/339] Elapsed 2m 38s (remain 0m 0s) Loss: 0.0123 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 23s) Loss: 0.0069 


Epoch 3 - avg_train_loss: 0.0123  avg_val_loss: 0.0250  time: 172s
Epoch 3 - Score: 0.8562605277933745 -Border: 0.1767138597915193


EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0250 
bt:0.1767138597915193




Epoch: [4][0/339] Elapsed 0m 0s (remain 3m 14s) Loss: 0.0076 
Epoch: [4][100/339] Elapsed 0m 48s (remain 1m 53s) Loss: 0.0089 
Epoch: [4][200/339] Elapsed 1m 33s (remain 1m 3s) Loss: 0.0086 
Epoch: [4][300/339] Elapsed 2m 18s (remain 0m 17s) Loss: 0.0090 




Epoch: [4][338/339] Elapsed 2m 34s (remain 0m 0s) Loss: 0.0092 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.0040 
EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0304 


Epoch 4 - avg_train_loss: 0.0092  avg_val_loss: 0.0304  time: 168s
Epoch 4 - Score: 0.8783683191901146 -Border: 0.09031556363687128


bt:0.09031556363687128




Epoch: [5][0/339] Elapsed 0m 0s (remain 3m 10s) Loss: 0.0055 
Epoch: [5][100/339] Elapsed 0m 45s (remain 1m 46s) Loss: 0.0047 
Epoch: [5][200/339] Elapsed 1m 30s (remain 1m 2s) Loss: 0.0042 
Epoch: [5][300/339] Elapsed 2m 17s (remain 0m 17s) Loss: 0.0048 




Epoch: [5][338/339] Elapsed 2m 36s (remain 0m 0s) Loss: 0.0049 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 24s) Loss: 0.0077 


Epoch 5 - avg_train_loss: 0.0049  avg_val_loss: 0.0405  time: 171s
Epoch 5 - Score: 0.8800370541917555 -Border: 0.09276402461920152


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0405 
bt:0.09276402461920152




Epoch: [6][0/339] Elapsed 0m 0s (remain 3m 23s) Loss: 0.0019 
Epoch: [6][100/339] Elapsed 0m 50s (remain 1m 59s) Loss: 0.0016 
Epoch: [6][200/339] Elapsed 1m 40s (remain 1m 9s) Loss: 0.0022 
Epoch: [6][300/339] Elapsed 2m 30s (remain 0m 18s) Loss: 0.0038 




Epoch: [6][338/339] Elapsed 2m 49s (remain 0m 0s) Loss: 0.0038 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 24s) Loss: 0.0067 


Epoch 6 - avg_train_loss: 0.0038  avg_val_loss: 0.0367  time: 184s
Epoch 6 - Score: 0.8851566046300499 -Border: 0.08290142784531139


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0367 
bt:0.08290142784531139




Epoch: [7][0/339] Elapsed 0m 0s (remain 3m 30s) Loss: 0.0007 
Epoch: [7][100/339] Elapsed 0m 50s (remain 1m 58s) Loss: 0.0034 
Epoch: [7][200/339] Elapsed 1m 39s (remain 1m 8s) Loss: 0.0038 
Epoch: [7][300/339] Elapsed 2m 29s (remain 0m 18s) Loss: 0.0037 
Epoch: [7][338/339] Elapsed 2m 47s (remain 0m 0s) Loss: 0.0036 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 25s) Loss: 0.0002 




EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0630 


Epoch 7 - avg_train_loss: 0.0036  avg_val_loss: 0.0630  time: 183s
Epoch 7 - Score: 0.8548948403691935 -Border: 0.02577691609776907


bt:0.02577691609776907




Epoch: [8][0/339] Elapsed 0m 0s (remain 3m 32s) Loss: 0.0016 
Epoch: [8][100/339] Elapsed 0m 50s (remain 1m 58s) Loss: 0.0009 
Epoch: [8][200/339] Elapsed 1m 39s (remain 1m 8s) Loss: 0.0020 
Epoch: [8][300/339] Elapsed 2m 28s (remain 0m 18s) Loss: 0.0023 




Epoch: [8][338/339] Elapsed 2m 47s (remain 0m 0s) Loss: 0.0021 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 24s) Loss: 0.0116 


Epoch 8 - avg_train_loss: 0.0021  avg_val_loss: 0.0679  time: 182s
Epoch 8 - Score: 0.8538201089356688 -Border: 0.01896094842128991


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0679 
bt:0.01896094842128991




Epoch: [9][0/339] Elapsed 0m 0s (remain 3m 25s) Loss: 0.0001 
Epoch: [9][100/339] Elapsed 0m 50s (remain 1m 58s) Loss: 0.0001 
Epoch: [9][200/339] Elapsed 1m 38s (remain 1m 7s) Loss: 0.0004 
Epoch: [9][300/339] Elapsed 2m 27s (remain 0m 18s) Loss: 0.0020 
Epoch: [9][338/339] Elapsed 2m 45s (remain 0m 0s) Loss: 0.0020 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 23s) Loss: 0.0013 


Epoch 9 - avg_train_loss: 0.0020  avg_val_loss: 0.0652  time: 181s
Epoch 9 - Score: 0.8640720302062154 -Border: 0.021834148472449445


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0652 
bt:0.021834148472449445




Epoch: [10][0/339] Elapsed 0m 0s (remain 3m 21s) Loss: 0.0010 
Epoch: [10][100/339] Elapsed 0m 49s (remain 1m 55s) Loss: 0.0010 
Epoch: [10][200/339] Elapsed 1m 38s (remain 1m 7s) Loss: 0.0023 
Epoch: [10][300/339] Elapsed 2m 26s (remain 0m 18s) Loss: 0.0019 




Epoch: [10][338/339] Elapsed 2m 44s (remain 0m 0s) Loss: 0.0019 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 23s) Loss: 0.0027 


Epoch 10 - avg_train_loss: 0.0019  avg_val_loss: 0.0577  time: 179s
Epoch 10 - Score: 0.8724832214765099 -Border: 0.038256535247916335


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0577 
bt:0.038256535247916335


Score: 0.89148, Border: 0.33602


bt:0.3360162182891184


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch: [1][0/339] Elapsed 0m 0s (remain 3m 19s) Loss: 0.1076 
Epoch: [1][100/339] Elapsed 0m 47s (remain 1m 52s) Loss: 0.0619 
Epoch: [1][200/339] Elapsed 1m 37s (remain 1m 6s) Loss: 0.0513 
Epoch: [1][300/339] Elapsed 2m 25s (remain 0m 18s) Loss: 0.0457 
Epoch: [1][338/339] Elapsed 2m 44s (remain 0m 0s) Loss: 0.0442 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 23s) Loss: 0.0444 


Epoch 1 - avg_train_loss: 0.0442  avg_val_loss: 0.0307  time: 179s
Epoch 1 - Score: 0.827861832714816 -Border: 0.2376326225420507
Epoch 1 - Save Best Score: 0.8279 - Save Best Score: 0.2376 Model


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0307 
bt:0.2376326225420507




Epoch: [2][0/339] Elapsed 0m 0s (remain 3m 26s) Loss: 0.0252 
Epoch: [2][100/339] Elapsed 0m 48s (remain 1m 55s) Loss: 0.0230 
Epoch: [2][200/339] Elapsed 1m 37s (remain 1m 6s) Loss: 0.0227 
Epoch: [2][300/339] Elapsed 2m 25s (remain 0m 18s) Loss: 0.0223 




Epoch: [2][338/339] Elapsed 2m 44s (remain 0m 0s) Loss: 0.0216 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 23s) Loss: 0.1049 


Epoch 2 - avg_train_loss: 0.0216  avg_val_loss: 0.0320  time: 179s
Epoch 2 - Score: 0.8718726307808946 -Border: 0.17714993480949992
Epoch 2 - Save Best Score: 0.8719 - Save Best Score: 0.1771 Model


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0320 
bt:0.17714993480949992




Epoch: [3][0/339] Elapsed 0m 0s (remain 3m 21s) Loss: 0.0035 
Epoch: [3][100/339] Elapsed 0m 50s (remain 1m 58s) Loss: 0.0112 
Epoch: [3][200/339] Elapsed 1m 38s (remain 1m 7s) Loss: 0.0137 
Epoch: [3][300/339] Elapsed 2m 23s (remain 0m 18s) Loss: 0.0136 




Epoch: [3][338/339] Elapsed 2m 41s (remain 0m 0s) Loss: 0.0140 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.1287 




EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0282 


Epoch 3 - avg_train_loss: 0.0140  avg_val_loss: 0.0282  time: 175s
Epoch 3 - Score: 0.8757727406535177 -Border: 0.24992044171640534
Epoch 3 - Save Best Score: 0.8758 - Save Best Score: 0.2499 Model


bt:0.24992044171640534




Epoch: [4][0/339] Elapsed 0m 0s (remain 3m 17s) Loss: 0.0083 
Epoch: [4][100/339] Elapsed 0m 46s (remain 1m 48s) Loss: 0.0090 
Epoch: [4][200/339] Elapsed 1m 31s (remain 1m 2s) Loss: 0.0078 
Epoch: [4][300/339] Elapsed 2m 15s (remain 0m 17s) Loss: 0.0088 
Epoch: [4][338/339] Elapsed 2m 32s (remain 0m 0s) Loss: 0.0086 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.1721 


Epoch 4 - avg_train_loss: 0.0086  avg_val_loss: 0.0307  time: 165s
Epoch 4 - Score: 0.8695005945303211 -Border: 0.10281560152067358


EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0307 
bt:0.10281560152067358




Epoch: [5][0/339] Elapsed 0m 0s (remain 3m 6s) Loss: 0.0152 
Epoch: [5][100/339] Elapsed 0m 44s (remain 1m 45s) Loss: 0.0054 
Epoch: [5][200/339] Elapsed 1m 29s (remain 1m 1s) Loss: 0.0053 
Epoch: [5][300/339] Elapsed 2m 14s (remain 0m 16s) Loss: 0.0070 




Epoch: [5][338/339] Elapsed 2m 30s (remain 0m 0s) Loss: 0.0073 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.1541 


Epoch 5 - avg_train_loss: 0.0073  avg_val_loss: 0.0344  time: 164s
Epoch 5 - Score: 0.8683390233041414 -Border: 0.08311164039302023


EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0344 
bt:0.08311164039302023




Epoch: [6][0/339] Elapsed 0m 0s (remain 3m 7s) Loss: 0.0054 
Epoch: [6][100/339] Elapsed 0m 45s (remain 1m 46s) Loss: 0.0042 
Epoch: [6][200/339] Elapsed 1m 29s (remain 1m 1s) Loss: 0.0043 
Epoch: [6][300/339] Elapsed 2m 14s (remain 0m 16s) Loss: 0.0042 




Epoch: [6][338/339] Elapsed 2m 31s (remain 0m 0s) Loss: 0.0041 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.1109 


Epoch 6 - avg_train_loss: 0.0041  avg_val_loss: 0.0488  time: 165s
Epoch 6 - Score: 0.8509725400457666 -Border: 0.034139830308887134


EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0488 
bt:0.034139830308887134




Epoch: [7][0/339] Elapsed 0m 0s (remain 3m 18s) Loss: 0.0041 
Epoch: [7][100/339] Elapsed 0m 45s (remain 1m 46s) Loss: 0.0029 
Epoch: [7][200/339] Elapsed 1m 29s (remain 1m 1s) Loss: 0.0035 
Epoch: [7][300/339] Elapsed 2m 13s (remain 0m 16s) Loss: 0.0035 
Epoch: [7][338/339] Elapsed 2m 30s (remain 0m 0s) Loss: 0.0033 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.2029 


Epoch 7 - avg_train_loss: 0.0033  avg_val_loss: 0.0570  time: 164s
Epoch 7 - Score: 0.8290535583272193 -Border: 0.03308866210000992


EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0570 
bt:0.03308866210000992




Epoch: [8][0/339] Elapsed 0m 0s (remain 3m 7s) Loss: 0.0001 
Epoch: [8][100/339] Elapsed 0m 45s (remain 1m 46s) Loss: 0.0031 
Epoch: [8][200/339] Elapsed 1m 29s (remain 1m 1s) Loss: 0.0031 
Epoch: [8][300/339] Elapsed 2m 13s (remain 0m 16s) Loss: 0.0029 




Epoch: [8][338/339] Elapsed 2m 30s (remain 0m 0s) Loss: 0.0033 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.1412 


Epoch 8 - avg_train_loss: 0.0033  avg_val_loss: 0.0457  time: 164s
Epoch 8 - Score: 0.862683587622986 -Border: 0.038242884924652186


EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0457 
bt:0.038242884924652186




Epoch: [9][0/339] Elapsed 0m 0s (remain 3m 10s) Loss: 0.0019 
Epoch: [9][100/339] Elapsed 0m 45s (remain 1m 46s) Loss: 0.0026 
Epoch: [9][200/339] Elapsed 1m 29s (remain 1m 1s) Loss: 0.0022 
Epoch: [9][300/339] Elapsed 2m 13s (remain 0m 16s) Loss: 0.0026 




Epoch: [9][338/339] Elapsed 2m 30s (remain 0m 0s) Loss: 0.0024 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.1819 


Epoch 9 - avg_train_loss: 0.0024  avg_val_loss: 0.0640  time: 164s
Epoch 9 - Score: 0.8562383076701683 -Border: 0.021121166145770046


EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0640 
bt:0.021121166145770046




Epoch: [10][0/339] Elapsed 0m 0s (remain 3m 10s) Loss: 0.0000 
Epoch: [10][100/339] Elapsed 0m 45s (remain 1m 46s) Loss: 0.0005 
Epoch: [10][200/339] Elapsed 1m 29s (remain 1m 1s) Loss: 0.0011 
Epoch: [10][300/339] Elapsed 2m 14s (remain 0m 16s) Loss: 0.0018 




Epoch: [10][338/339] Elapsed 2m 31s (remain 0m 0s) Loss: 0.0019 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 22s) Loss: 0.1818 


Epoch 10 - avg_train_loss: 0.0019  avg_val_loss: 0.0470  time: 165s
Epoch 10 - Score: 0.8338341346153847 -Border: 0.09506743298214337


EVAL: [84/85] Elapsed 0m 13s (remain 0m 0s) Loss: 0.0470 
bt:0.09506743298214337


Score: 0.87577, Border: 0.24992


bt:0.24992044171640534


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch: [1][0/339] Elapsed 0m 0s (remain 3m 18s) Loss: 0.1440 
Epoch: [1][100/339] Elapsed 0m 42s (remain 1m 41s) Loss: 0.0673 
Epoch: [1][200/339] Elapsed 1m 27s (remain 0m 59s) Loss: 0.0578 
Epoch: [1][300/339] Elapsed 2m 12s (remain 0m 16s) Loss: 0.0500 
Epoch: [1][338/339] Elapsed 2m 30s (remain 0m 0s) Loss: 0.0481 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 24s) Loss: 0.0088 


Epoch 1 - avg_train_loss: 0.0481  avg_val_loss: 0.0298  time: 165s
Epoch 1 - Score: 0.8761682242990655 -Border: 0.25424599867615366
Epoch 1 - Save Best Score: 0.8762 - Save Best Score: 0.2542 Model


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0298 
bt:0.25424599867615366




Epoch: [2][0/339] Elapsed 0m 0s (remain 3m 19s) Loss: 0.0334 
Epoch: [2][100/339] Elapsed 0m 49s (remain 1m 55s) Loss: 0.0232 
Epoch: [2][200/339] Elapsed 1m 38s (remain 1m 7s) Loss: 0.0238 
Epoch: [2][300/339] Elapsed 2m 27s (remain 0m 18s) Loss: 0.0229 




Epoch: [2][338/339] Elapsed 2m 45s (remain 0m 0s) Loss: 0.0218 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 24s) Loss: 0.0063 


Epoch 2 - avg_train_loss: 0.0218  avg_val_loss: 0.0302  time: 181s
Epoch 2 - Score: 0.8891523414344992 -Border: 0.12150116988477287
Epoch 2 - Save Best Score: 0.8892 - Save Best Score: 0.1215 Model


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0302 
bt:0.12150116988477287




Epoch: [3][0/339] Elapsed 0m 0s (remain 3m 20s) Loss: 0.0049 
Epoch: [3][100/339] Elapsed 0m 49s (remain 1m 56s) Loss: 0.0155 
Epoch: [3][200/339] Elapsed 1m 38s (remain 1m 7s) Loss: 0.0155 
Epoch: [3][300/339] Elapsed 2m 26s (remain 0m 18s) Loss: 0.0151 




Epoch: [3][338/339] Elapsed 2m 44s (remain 0m 0s) Loss: 0.0148 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 24s) Loss: 0.0105 


Epoch 3 - avg_train_loss: 0.0148  avg_val_loss: 0.0268  time: 179s
Epoch 3 - Score: 0.869749681798897 -Border: 0.1108486793965637


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0268 
bt:0.1108486793965637




Epoch: [4][0/339] Elapsed 0m 0s (remain 3m 20s) Loss: 0.0056 
Epoch: [4][100/339] Elapsed 0m 49s (remain 1m 55s) Loss: 0.0087 
Epoch: [4][200/339] Elapsed 1m 38s (remain 1m 7s) Loss: 0.0087 
Epoch: [4][300/339] Elapsed 2m 27s (remain 0m 18s) Loss: 0.0087 
Epoch: [4][338/339] Elapsed 2m 45s (remain 0m 0s) Loss: 0.0091 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 24s) Loss: 0.0114 


Epoch 4 - avg_train_loss: 0.0091  avg_val_loss: 0.0306  time: 181s
Epoch 4 - Score: 0.8735254591608183 -Border: 0.12104437164912797


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0306 
bt:0.12104437164912797




Epoch: [5][0/339] Elapsed 0m 0s (remain 3m 25s) Loss: 0.0068 
Epoch: [5][100/339] Elapsed 0m 49s (remain 1m 56s) Loss: 0.0047 
Epoch: [5][200/339] Elapsed 1m 38s (remain 1m 7s) Loss: 0.0044 
Epoch: [5][300/339] Elapsed 2m 27s (remain 0m 18s) Loss: 0.0060 




Epoch: [5][338/339] Elapsed 2m 45s (remain 0m 0s) Loss: 0.0058 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 24s) Loss: 0.0086 


Epoch 5 - avg_train_loss: 0.0058  avg_val_loss: 0.0441  time: 180s
Epoch 5 - Score: 0.8505193436700286 -Border: 0.06891844633002916


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0441 
bt:0.06891844633002916




Epoch: [6][0/339] Elapsed 0m 0s (remain 3m 24s) Loss: 0.0001 
Epoch: [6][100/339] Elapsed 0m 49s (remain 1m 55s) Loss: 0.0036 
Epoch: [6][200/339] Elapsed 1m 38s (remain 1m 7s) Loss: 0.0038 
Epoch: [6][300/339] Elapsed 2m 27s (remain 0m 18s) Loss: 0.0042 




Epoch: [6][338/339] Elapsed 2m 45s (remain 0m 0s) Loss: 0.0041 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 26s) Loss: 0.0161 


Epoch 6 - avg_train_loss: 0.0041  avg_val_loss: 0.0463  time: 181s
Epoch 6 - Score: 0.8745302110436542 -Border: 0.0313018921367905


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0463 
bt:0.0313018921367905




Epoch: [7][0/339] Elapsed 0m 0s (remain 3m 24s) Loss: 0.0033 
Epoch: [7][100/339] Elapsed 0m 49s (remain 1m 56s) Loss: 0.0040 
Epoch: [7][200/339] Elapsed 1m 38s (remain 1m 7s) Loss: 0.0038 
Epoch: [7][300/339] Elapsed 2m 26s (remain 0m 18s) Loss: 0.0038 
Epoch: [7][338/339] Elapsed 2m 45s (remain 0m 0s) Loss: 0.0037 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 24s) Loss: 0.0265 


Epoch 7 - avg_train_loss: 0.0037  avg_val_loss: 0.0563  time: 180s
Epoch 7 - Score: 0.8541487223657765 -Border: 0.02703092349781768


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0563 
bt:0.02703092349781768




Epoch: [8][0/339] Elapsed 0m 0s (remain 3m 28s) Loss: 0.0002 
Epoch: [8][100/339] Elapsed 0m 49s (remain 1m 55s) Loss: 0.0008 
Epoch: [8][200/339] Elapsed 1m 37s (remain 1m 6s) Loss: 0.0008 
Epoch: [8][300/339] Elapsed 2m 26s (remain 0m 18s) Loss: 0.0011 




Epoch: [8][338/339] Elapsed 2m 45s (remain 0m 0s) Loss: 0.0012 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 25s) Loss: 0.0133 


Epoch 8 - avg_train_loss: 0.0012  avg_val_loss: 0.0549  time: 180s
Epoch 8 - Score: 0.8495678921927641 -Border: 0.03082929947367925


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0549 
bt:0.03082929947367925




Epoch: [9][0/339] Elapsed 0m 0s (remain 3m 21s) Loss: 0.0034 
Epoch: [9][100/339] Elapsed 0m 49s (remain 1m 56s) Loss: 0.0007 
Epoch: [9][200/339] Elapsed 1m 37s (remain 1m 6s) Loss: 0.0008 
Epoch: [9][300/339] Elapsed 2m 24s (remain 0m 18s) Loss: 0.0008 
Epoch: [9][338/339] Elapsed 2m 42s (remain 0m 0s) Loss: 0.0008 




EVAL: [0/85] Elapsed 0m 0s (remain 0m 24s) Loss: 0.0066 


Epoch 9 - avg_train_loss: 0.0008  avg_val_loss: 0.0769  time: 177s
Epoch 9 - Score: 0.850190559953093 -Border: 0.013487957388034658


EVAL: [84/85] Elapsed 0m 14s (remain 0m 0s) Loss: 0.0769 
bt:0.013487957388034658




Epoch: [10][0/339] Elapsed 0m 0s (remain 3m 19s) Loss: 0.0002 
Epoch: [10][100/339] Elapsed 0m 49s (remain 1m 56s) Loss: 0.0033 
Epoch: [10][200/339] Elapsed 1m 38s (remain 1m 7s) Loss: 0.0023 


In [None]:
!nvidia-smi

In [None]:
!ps -a

In [None]:
test