In [1]:
# ====================================================
# Directory settings
# ====================================================
import os

OUTPUT_DIR = './Comonlist_025_3'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

In [2]:
# ====================================================
# Library
# ====================================================
import os
import gc
import re
import ast
import sys
import copy
import json
import time
import math
import string
import pickle
import random
import joblib
import itertools
import warnings
warnings.filterwarnings("ignore")

import scipy as sp
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from tqdm.auto import tqdm
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

# os.system('pip install iterative-stratification==0.1.7')
# from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F
from torch.optim import Adam, SGD, AdamW
from torch.utils.data import DataLoader, Dataset

os.system('pip install -q transformers')
os.system('pip install -q tokenizers')
import tokenizers
import transformers
print(f"tokenizers.__version__: {tokenizers.__version__}")
print(f"transformers.__version__: {transformers.__version__}")
from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
%env TOKENIZERS_PARALLELISM=true

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

tokenizers.__version__: 0.11.6
transformers.__version__: 4.29.2
env: TOKENIZERS_PARALLELISM=true


In [3]:
from torch.cuda.amp import autocast, GradScaler
from sklearn import metrics
from src.machine_learning_util import set_seed, set_device, init_logger, AverageMeter, to_pickle, unpickle, asMinutes, timeSince

In [4]:
class CFG:
    EXP_ID = '024'
    apex = True
    model ='microsoft/deberta-v3-large' # 'microsoft/deberta-large' # 'microsoft/deberta-v3-base' #'microsoft/deberta-v3-large' 
    seed = 2022 # 42 # 71
    n_splits = 4
    max_len = 512 # 1429 # 1024 # 512
    dropout = 0
    targets = ['content', 'wording']
    target_size = len(targets)
    n_accumulate=1
    print_freq = 100
    eval_freq = 780 * 2 # 390 # 170
    min_lr=1e-6
    scheduler = 'cosine'
    batch_size = 1 # 2 # 4
    num_workers = 0 #3
    lr = 5e-6 # 3e-6
    weigth_decay = 0.01
    epochs = 4
    n_fold = 4
    trn_fold = [i for i in range(n_fold)]
    train = True
    num_warmup_steps = 0
    num_cycles=0.5
    debug = False
    freezing = True
    gradient_checkpoint = True
    reinit_layers = 4 # 3
    tokenizer = AutoTokenizer.from_pretrained(model)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [5]:
# ====================================================
# Utils
# ====================================================
def MCRMSE(y_trues, y_preds):
    scores = []
    idxes = y_trues.shape[1]
    for i in range(idxes):
        y_true = y_trues[:,i]
        y_pred = y_preds[:,i]
        score = mean_squared_error(y_true, y_pred, squared=False) # RMSE
        scores.append(score)
    mcrmse_score = np.mean(scores)
    return mcrmse_score, scores


def get_score(y_trues, y_preds):
    mcrmse_score, scores = MCRMSE(y_trues, y_preds)
    return mcrmse_score, scores


def get_logger(filename=OUTPUT_DIR+'_train'):
    from logging import getLogger, INFO, StreamHandler, FileHandler, Formatter
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()


def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(CFG.seed)
# seed_everything(seed=60)

# Data Loading

In [6]:
# ====================================================
# Data Loading
# ====================================================
path = "C:/Users/Lab000/Desktop/kaggle/kaggle_competetion/CommonLit - Evaluate Student Summaries/input/commonlit-evaluate-student-summaries/"
prompts_train = pd.read_csv(path+'prompts_train.csv')
prompts_test = pd.read_csv(path+'prompts_test.csv')

summaries_train = pd.read_csv(path+'summaries_train.csv')
summaries_test = pd.read_csv(path+'summaries_test.csv')

submission = pd.read_csv(path+'sample_submission.csv')

print(f"prompts_train.shape: {prompts_train.shape}")
#display(prompts_train.head())
print(f"prompts_test.shape: {prompts_test.shape}")
#display(prompts_test.head())

print(f"summaries_train.shape: {summaries_train.shape}")
#display(summaries_train.head())
print(f"summaries_test.shape: {summaries_test.shape}")
#display(summaries_test.head())
print(f"submission.shape: {submission.shape}")
#display(submission.head())

prompts_train.shape: (4, 4)
prompts_test.shape: (2, 4)
summaries_train.shape: (7165, 5)
summaries_test.shape: (4, 3)
submission.shape: (4, 3)


# Merge Prompts Data 

In [7]:
train = summaries_train.merge(prompts_train, how="left", on="prompt_id")
test = summaries_test.merge(prompts_test, how="left", on="prompt_id")

# CV split(GroupKFold)

In [8]:
# ====================================================
# CV split
# ====================================================
Fold = GroupKFold(n_splits=CFG.n_fold)
for n, (train_index, val_index) in enumerate(Fold.split(train, groups=train["prompt_id"])):
    train.loc[val_index, 'fold'] = int(n)
train['fold'] = train['fold'].astype(int)
display(train.groupby('fold').size())

fold
0    2057
1    2009
2    1996
3    1103
dtype: int64

In [9]:
CFG.tokenizer.save_pretrained(OUTPUT_DIR+'/tokenizer/')

('./Comonlist_025_3/tokenizer/tokenizer_config.json',
 './Comonlist_025_3/tokenizer/special_tokens_map.json',
 './Comonlist_025_3/tokenizer/spm.model',
 './Comonlist_025_3/tokenizer/added_tokens.json',
 './Comonlist_025_3/tokenizer/tokenizer.json')

# Text max Length

In [10]:
# ====================================================
# Define max_len
# ====================================================
lengths = []
tk0 = tqdm(train['text'].fillna("").values, total=len(train))
for text in tk0:
    length = len(CFG.tokenizer(text, add_special_tokens=False)['input_ids'])
    lengths.append(length)
CFG.max_len = max(lengths) + 2 # cls & sep
LOGGER.info(f"text column max_len: {CFG.max_len}")

  0%|          | 0/7165 [00:00<?, ?it/s]

text column max_len: 822


In [11]:
input_col = "input"
sep = CFG.tokenizer.sep_token
train[input_col] = (
                    train["prompt_title"] + sep 
#                    + train_df["prompt_text"] + sep 
                    + train["prompt_question"] + sep 
                    + train["text"]
                  )

# input max Length

In [12]:
# ====================================================
# Define max_len
# ====================================================
lengths = []
tk0 = tqdm(train['input'].fillna("").values, total=len(train))
for text in tk0:
    length = len(CFG.tokenizer(text, add_special_tokens=False)['input_ids'])
    lengths.append(length)
CFG.max_len = max(lengths) + 2 # cls & sep
LOGGER.info(f"input column max_len: {CFG.max_len}")

  0%|          | 0/7165 [00:00<?, ?it/s]

input column max_len: 858


In [13]:
class FeedBackDataset(Dataset):
    def __init__(self, df, tokenizer, max_length):
        self.df = df
        self.max_len = CFG.max_len
        self.text = df['input'].values
        self.tokenizer = CFG.tokenizer
        self.targets = df[CFG.targets].values

    def __len__(self):
        return len(self.df)

    # staticmethod に書き換えたい
    def cut_head_and_tail(self, text):
        input_ids = self.tokenizer.encode(text)
        n_token = len(input_ids)

        if n_token == self.max_len:
            input_ids = input_ids
            attention_mask = [1 for _ in range(self.max_len)]
            token_type_ids = [1 for _ in range(self.max_len)]
        elif n_token < self.max_len:
            pad = [1 for _ in range(self.max_len-n_token)]
            input_ids = input_ids + pad
            attention_mask = [1 if n_token > i else 0 for i in range(self.max_len)]
            token_type_ids = [1 if n_token > i else 0 for i in range(self.max_len)]
        else:
            harf_len = (self.max_len-2)//2#256
            
            _input_ids = input_ids[1:-1]
            input_ids = [0]+ _input_ids[:harf_len] + _input_ids[-harf_len:] + [2]#取前256個跟後256個
            attention_mask = [1 for _ in range(self.max_len)]
            token_type_ids = [1 for _ in range(self.max_len)]

            if len(input_ids) < self.max_len:
                diff = self.max_len - len(input_ids)
                input_ids = [0]+ _input_ids[:harf_len] + _input_ids[-(harf_len+diff):] + [2]

        d = {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "token_type_ids": token_type_ids,
        }
        return d

    def __getitem__(self, index):
        text = self.text[index]
        inputs = self.cut_head_and_tail(text)
        return {
            'input_ids':inputs['input_ids'],
            'attention_mask':inputs['attention_mask'],
            'target':self.targets[index]
            }

In [14]:
class Collate:
    def __init__(self, tokenizer, isTrain=True):
        self.tokenizer = tokenizer
        self.isTrain = isTrain

    def __call__(self, batch):
        
        output = dict()
        output["input_ids"] = [sample["input_ids"] for sample in batch]
        output["attention_mask"] = [sample["attention_mask"] for sample in batch]
        if self.isTrain:
            output["target"] = [sample["target"] for sample in batch]

        # calculate max token length of this batch
        batch_max = max([len(ids) for ids in output["input_ids"]])
        
        # add padding
        if self.tokenizer.padding_side == "right":#沒意義
            output["input_ids"] = [s + (batch_max - len(s)) * [self.tokenizer.pad_token_id] for s in output["input_ids"]]
            output["attention_mask"] = [s + (batch_max - len(s)) * [0] for s in output["attention_mask"]]
        else:
            output["input_ids"] = [(batch_max - len(s)) * [self.tokenizer.pad_token_id] + s for s in output["input_ids"]]
            output["attention_mask"] = [(batch_max - len(s)) * [0] + s for s in output["attention_mask"]]

        # convert to tensors
        output["input_ids"] = torch.tensor(output["input_ids"], dtype=torch.long)
        output["attention_mask"] = torch.tensor(output["attention_mask"], dtype=torch.long)
        if self.isTrain:
            output["target"] = torch.tensor(output["target"], dtype=torch.float)

        
        return output

In [15]:
collate_fn = Collate(CFG.tokenizer, isTrain=True)

In [16]:
def freeze(module):
    """
    Freezes module's parameters.
    """

    for parameter in module.parameters():
        parameter.requires_grad = False

def get_scheduler(cfg, optimizer, num_train_steps):
    if cfg.scheduler == 'linear':
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
        )
    elif cfg.scheduler == 'cosine':
        scheduler = get_cosine_schedule_with_warmup(
            optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
        )
    return scheduler


In [17]:
class FeedBackModel(nn.Module):
    def __init__(self, model_name):
        super(FeedBackModel, self).__init__()

        self.cfg = CFG
        self.config = AutoConfig.from_pretrained(model_name)
        self.config.hidden_dropout_prob = 0
        self.config.attention_probs_dropout_prob = 0

        self.model = AutoModel.from_pretrained(model_name, config=self.config)

        self.output = nn.Sequential(
            nn.LayerNorm(self.config.hidden_size),
            nn.Linear(self.config.hidden_size, self.cfg.target_size)
        )

        
        # Freeze
        if self.cfg.freezing:
            freeze(self.model.embeddings)
            freeze(self.model.encoder.layer[:2])

        # Gradient Checkpointing
        #if self.cfg.gradient_checkpoint:
        #    self.model.gradient_checkpointing_enable() 

        #if self.cfg.reinit_layers > 0:
        #    layers = self.model.encoder.layer[-self.cfg.reinit_layers:]
        #    for layer in layers:
        #        for module in layer.modules():
        #            self._init_weights(module)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)

    def forward(self, ids, mask, token_type_ids=None, targets=None):
        if token_type_ids:
            transformer_out = self.model(ids, mask, token_type_ids)
        else:
            transformer_out = self.model(ids, mask)

        # simple CLS
        sequence_output = transformer_out[0][:, 0, :]

        logits = self.output(sequence_output)

        return logits

In [18]:
def criterion(outputs, targets):
    loss_fct = nn.MSELoss()
    loss = loss_fct(outputs, targets)
    return loss

def get_score(outputs, targets):
    mcrmse = []
    for i in range(CFG.target_size):
        mcrmse.append(
            metrics.mean_squared_error(
                targets[:, i],
                outputs[:, i],
                squared=False,
            ),
        )
    mcrmse = np.mean(mcrmse)
    return mcrmse
def get_result(oof_df):
    labels = oof_df[CFG.targets].values
    preds = oof_df[['pred_0', 'pred_1']].values
    score = get_score(preds, labels)
    LOGGER.info(f'Score: {score:<.4f}')

# Training And Validation Per Epoch

In [19]:
def train_one_epoch(model, optimizer, scheduler, dataloader, valid_loader, device, epoch, best_score, valid_labels):
    model.train()
    scaler = GradScaler(enabled=CFG.apex)

    dataset_size = 0
    running_loss = 0

    start = end = time.time()

    for step, data in enumerate(dataloader):
        
        ids = data['input_ids'].to(device, dtype=torch.long)
        mask = data['attention_mask'].to(device, dtype=torch.long)
        targets = data['target'].to(device, dtype=torch.float)

        batch_size = ids.size(0)

        with autocast(enabled=CFG.apex):
            outputs = model(ids, mask)
            loss = criterion(outputs, targets)

        #accumulate
        loss = loss / CFG.n_accumulate
        scaler.scale(loss).backward()
        if (step +1) % CFG.n_accumulate == 0:#n_accumulate=1
            # torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_norm)
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            if scheduler is not None:
                scheduler.step()
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size

        epoch_loss = running_loss / dataset_size

        end = time.time()

        if step % CFG.print_freq == 0 or step == (len(dataloader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Loss: [{3}]'
                  'Elapsed {remain:s} '
                  .format(epoch+1, step, len(dataloader), epoch_loss,
                          remain=timeSince(start, float(step+1)/len(dataloader))))

        if (step > 0) & (step % CFG.eval_freq == 0) :

            valid_epoch_loss, pred = valid_one_epoch(model, valid_loader, device, epoch)

            score = get_score(pred, valid_labels)

            LOGGER.info(f'Epoch {epoch+1} Step {step} - avg_train_loss: {epoch_loss:.4f}  avg_val_loss: {valid_epoch_loss:.4f}')
            LOGGER.info(f'Epoch {epoch+1} Step {step} - Score: {score:.4f}')

            if score < best_score:
                best_score = score
                LOGGER.info(f'Epoch {epoch+1} Step {step} - Save Best Score: {best_score:.4f} Model')
                torch.save({'model': model.state_dict(),
                            'predictions': pred},
                            OUTPUT_DIR+f"/model/{CFG.model.replace('/', '-')}_fold{fold}_best.pth")

            # model.train()

    gc.collect()

    return epoch_loss, valid_epoch_loss, pred, best_score

@torch.no_grad()
def valid_one_epoch(model, dataloader, device, epoch):
    model.eval()

    dataset_size = 0
    running_loss = 0

    start = end = time.time()
    pred = []

    for step, data in enumerate(dataloader):
        ids = data['input_ids'].to(device, dtype=torch.long)
        mask = data['attention_mask'].to(device, dtype=torch.long)
        targets = data['target'].to(device, dtype=torch.float)

        batch_size = ids.size(0)
        outputs = model(ids, mask)
        loss = criterion(outputs, targets)
        pred.append(outputs.to('cpu').numpy())

        running_loss += (loss.item()* batch_size)
        dataset_size += batch_size

        epoch_loss = running_loss / dataset_size

        end = time.time()

        if step % CFG.print_freq == 0 or step == (len(dataloader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  .format(step, len(dataloader),
                          remain=timeSince(start, float(step+1)/len(dataloader))))

    pred = np.concatenate(pred)
    return epoch_loss, pred

# Training And Validation Loop

In [20]:
def train_loop(fold):
    LOGGER.info(f'-------------fold:{fold} training-------------')

    train_data = train[train.fold != fold].reset_index(drop=True)
    valid_data = train[train.fold == fold].reset_index(drop=True)
    valid_labels = valid_data[CFG.targets].values

    trainDataset = FeedBackDataset(train_data, CFG.tokenizer, CFG.max_len)
    validDataset = FeedBackDataset(valid_data, CFG.tokenizer, CFG.max_len)
    
    

    train_loader = DataLoader(trainDataset,
                              batch_size = CFG.batch_size,
                              shuffle=True,
                              collate_fn = collate_fn,
                              num_workers = CFG.num_workers,
                              pin_memory = True,
                              drop_last=True)

    
    valid_loader = DataLoader(validDataset,
                              batch_size = CFG.batch_size * 2,
                              shuffle=False,
                              collate_fn = collate_fn,
                              num_workers = CFG.num_workers,
                              pin_memory = True,
                              drop_last=False)

    model = FeedBackModel(CFG.model)
    torch.save(model.config, OUTPUT_DIR+'/model/config.pth')
    model.to(device)
    optimizer = AdamW(model.parameters(), lr=CFG.lr, weight_decay=CFG.weigth_decay)
    num_train_steps = int(len(train_data) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # loop
    best_score = 100

    for epoch in range(CFG.epochs):
        # if epoch == (CFG.epochs - 1):
        #     break

        start_time = time.time()

        train_epoch_loss, valid_epoch_loss, pred, best_score = train_one_epoch(model, optimizer, scheduler, train_loader, valid_loader, device, epoch, best_score, valid_labels)

        score = get_score(pred, valid_labels)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {train_epoch_loss:.4f}  avg_val_loss: {valid_epoch_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')

        if score < best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': pred},
                        OUTPUT_DIR+f"/model/{CFG.model.replace('/', '-')}_fold{fold}_best.pth")

    predictions = torch.load(OUTPUT_DIR+f"/model/{CFG.model.replace('/', '-')}_fold{fold}_best.pth",
                             map_location=torch.device('cpu'))['predictions']
    valid_data['pred_0'] = predictions[:, 0]
    valid_data['pred_1'] = predictions[:, 1]
    # valid_data['pred_2'] = predictions[:, 2]
    # valid_data['pred_3'] = predictions[:, 3]
    # valid_data['pred_4'] = predictions[:, 4]
    # valid_data['pred_5'] = predictions[:, 5]

    torch.cuda.empty_cache()
    gc.collect()

    return valid_data

In [22]:
if CFG.train:
    oof_df = pd.DataFrame()
    for fold in range(CFG.n_fold):
        if fold in CFG.trn_fold:
            _oof_df = train_loop(fold)
            oof_df = pd.concat([oof_df, _oof_df])
            LOGGER.info(f"========== fold: {fold} result ==========")
            get_result(_oof_df)
    oof_df = oof_df.reset_index(drop=True)
    LOGGER.info(f"========== CV ==========")
    get_result(oof_df)
    oof_df.to_csv(OUTPUT_DIR+f'_oof_df.csv', index=False)

-------------fold:0 training-------------
Some weights of the model checkpoint at microsoft/deberta-v3-large were not used when initializing DebertaV2Model: ['lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.LayerNorm.bias', 'mask_predictions.dense.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.bias', 'mask_predictions.classifier.bias', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'mask_predictions.dense.weight', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceC

Epoch: [1][0/5108] Loss: [0.616123378276825]Elapsed 0m 1s (remain 164m 31s) 
Epoch: [1][100/5108] Loss: [1.1703011779815402]Elapsed 0m 27s (remain 23m 2s) 
Epoch: [1][200/5108] Loss: [0.9837909583766847]Elapsed 0m 54s (remain 21m 59s) 
Epoch: [1][300/5108] Loss: [0.8180993434723987]Elapsed 1m 20s (remain 21m 22s) 
Epoch: [1][400/5108] Loss: [0.7428202990567639]Elapsed 1m 46s (remain 20m 49s) 
Epoch: [1][500/5108] Loss: [0.6693293196217718]Elapsed 2m 14s (remain 20m 34s) 
Epoch: [1][600/5108] Loss: [0.6549973492025392]Elapsed 2m 42s (remain 20m 18s) 
Epoch: [1][700/5108] Loss: [0.608956645064012]Elapsed 3m 9s (remain 19m 53s) 
Epoch: [1][800/5108] Loss: [0.572528002765489]Elapsed 3m 36s (remain 19m 23s) 
Epoch: [1][900/5108] Loss: [0.5563777108928943]Elapsed 4m 4s (remain 18m 59s) 
Epoch: [1][1000/5108] Loss: [0.5343874195973471]Elapsed 4m 33s (remain 18m 41s) 
Epoch: [1][1100/5108] Loss: [0.5061466105201524]Elapsed 5m 2s (remain 18m 22s) 
Epoch: [1][1200/5108] Loss: [0.5033370929267676

Epoch 1 Step 1560 - avg_train_loss: 0.4703  avg_val_loss: 0.2559
Epoch 1 Step 1560 - Score: 0.4994
Epoch 1 Step 1560 - Save Best Score: 0.4994 Model


EVAL: [1028/1029] Elapsed 5m 41s (remain 0m 0s) 
Epoch: [1][1600/5108] Loss: [0.4683846140253898]Elapsed 13m 13s (remain 28m 58s) 
Epoch: [1][1700/5108] Loss: [0.4591213936838368]Elapsed 13m 39s (remain 27m 21s) 
Epoch: [1][1800/5108] Loss: [0.45069700736177054]Elapsed 14m 6s (remain 25m 54s) 
Epoch: [1][1900/5108] Loss: [0.44471962376559415]Elapsed 14m 34s (remain 24m 34s) 
Epoch: [1][2000/5108] Loss: [0.4374531333965167]Elapsed 15m 0s (remain 23m 18s) 
Epoch: [1][2100/5108] Loss: [0.4336716204539847]Elapsed 15m 28s (remain 22m 8s) 
Epoch: [1][2200/5108] Loss: [0.426345295492243]Elapsed 15m 54s (remain 21m 0s) 
Epoch: [1][2300/5108] Loss: [0.41847886321531413]Elapsed 16m 19s (remain 19m 54s) 
Epoch: [1][2400/5108] Loss: [0.4128149985104246]Elapsed 16m 45s (remain 18m 53s) 
Epoch: [1][2500/5108] Loss: [0.4079419108551889]Elapsed 17m 11s (remain 17m 55s) 
Epoch: [1][2600/5108] Loss: [0.40176145085192244]Elapsed 17m 39s (remain 17m 1s) 
Epoch: [1][2700/5108] Loss: [0.3984100219828702]Ela

Epoch 1 Step 3120 - avg_train_loss: 0.3857  avg_val_loss: 0.2735
Epoch 1 Step 3120 - Score: 0.5133


EVAL: [1028/1029] Elapsed 5m 30s (remain 0m 0s) 
Epoch: [1][3200/5108] Loss: [0.3839792927645783]Elapsed 25m 53s (remain 15m 25s) 
Epoch: [1][3300/5108] Loss: [0.38117379732279394]Elapsed 26m 22s (remain 14m 26s) 
Epoch: [1][3400/5108] Loss: [0.3788872378807885]Elapsed 26m 51s (remain 13m 28s) 
Epoch: [1][3500/5108] Loss: [0.37492907294691696]Elapsed 27m 18s (remain 12m 32s) 
Epoch: [1][3600/5108] Loss: [0.37381654412732357]Elapsed 27m 44s (remain 11m 36s) 
Epoch: [1][3700/5108] Loss: [0.37218060403443803]Elapsed 28m 10s (remain 10m 42s) 
Epoch: [1][3800/5108] Loss: [0.3698593446333531]Elapsed 28m 34s (remain 9m 49s) 
Epoch: [1][3900/5108] Loss: [0.3687640204664729]Elapsed 28m 59s (remain 8m 58s) 
Epoch: [1][4000/5108] Loss: [0.36551956083406806]Elapsed 29m 24s (remain 8m 8s) 
Epoch: [1][4100/5108] Loss: [0.36242821801312847]Elapsed 29m 50s (remain 7m 19s) 
Epoch: [1][4200/5108] Loss: [0.3615063686973541]Elapsed 30m 15s (remain 6m 31s) 
Epoch: [1][4300/5108] Loss: [0.3596733075592703]E

Epoch 1 Step 4680 - avg_train_loss: 0.3492  avg_val_loss: 0.2855
Epoch 1 Step 4680 - Score: 0.5292


EVAL: [1028/1029] Elapsed 5m 17s (remain 0m 0s) 
Epoch: [1][4700/5108] Loss: [0.3489553583675404]Elapsed 37m 48s (remain 3m 16s) 
Epoch: [1][4800/5108] Loss: [0.3478286535963397]Elapsed 38m 14s (remain 2m 26s) 
Epoch: [1][4900/5108] Loss: [0.3459004888518618]Elapsed 38m 39s (remain 1m 37s) 
Epoch: [1][5000/5108] Loss: [0.3441439838868686]Elapsed 39m 5s (remain 0m 50s) 
Epoch: [1][5100/5108] Loss: [0.34262176548853257]Elapsed 39m 31s (remain 0m 3s) 


Epoch 1 - avg_train_loss: 0.3426  avg_val_loss: 0.2855  time: 2374s
Epoch 1 - Score: 0.5292


Epoch: [1][5107/5108] Loss: [0.34256814733349994]Elapsed 39m 33s (remain 0m 0s) 
Epoch: [2][0/5108] Loss: [1.2115882635116577]Elapsed 0m 0s (remain 17m 3s) 
Epoch: [2][100/5108] Loss: [0.3247374596839568]Elapsed 0m 25s (remain 20m 54s) 
Epoch: [2][200/5108] Loss: [0.23967975955132154]Elapsed 0m 50s (remain 20m 25s) 
Epoch: [2][300/5108] Loss: [0.22931563344446051]Elapsed 1m 13s (remain 19m 28s) 
Epoch: [2][400/5108] Loss: [0.22382464846511932]Elapsed 1m 36s (remain 18m 52s) 
Epoch: [2][500/5108] Loss: [0.2304852140727899]Elapsed 2m 0s (remain 18m 25s) 
Epoch: [2][600/5108] Loss: [0.22048938868498777]Elapsed 2m 23s (remain 17m 56s) 
Epoch: [2][700/5108] Loss: [0.2196830322506131]Elapsed 2m 47s (remain 17m 30s) 
Epoch: [2][800/5108] Loss: [0.2191902909813171]Elapsed 3m 10s (remain 17m 6s) 
Epoch: [2][900/5108] Loss: [0.21327863141025297]Elapsed 3m 34s (remain 16m 42s) 
Epoch: [2][1000/5108] Loss: [0.2152152250470484]Elapsed 3m 57s (remain 16m 16s) 
Epoch: [2][1100/5108] Loss: [0.21369382

Epoch 2 Step 1560 - avg_train_loss: 0.2070  avg_val_loss: 0.2764
Epoch 2 Step 1560 - Score: 0.5172


EVAL: [1028/1029] Elapsed 5m 21s (remain 0m 0s) 
Epoch: [2][1600/5108] Loss: [0.206822711516906]Elapsed 11m 53s (remain 26m 3s) 
Epoch: [2][1700/5108] Loss: [0.20597737341040903]Elapsed 12m 19s (remain 24m 41s) 
Epoch: [2][1800/5108] Loss: [0.2045121513666077]Elapsed 12m 46s (remain 23m 26s) 
Epoch: [2][1900/5108] Loss: [0.20400669651150108]Elapsed 13m 12s (remain 22m 17s) 
Epoch: [2][2000/5108] Loss: [0.20469259686674984]Elapsed 13m 39s (remain 21m 11s) 
Epoch: [2][2100/5108] Loss: [0.20544786025644687]Elapsed 14m 5s (remain 20m 10s) 
Epoch: [2][2200/5108] Loss: [0.20644412082737715]Elapsed 14m 31s (remain 19m 11s) 
Epoch: [2][2300/5108] Loss: [0.20609847234117337]Elapsed 14m 58s (remain 18m 15s) 
Epoch: [2][2400/5108] Loss: [0.2063121673762976]Elapsed 15m 24s (remain 17m 21s) 
Epoch: [2][2500/5108] Loss: [0.20520470514455344]Elapsed 15m 50s (remain 16m 30s) 
Epoch: [2][2600/5108] Loss: [0.2038014369186254]Elapsed 16m 16s (remain 15m 41s) 
Epoch: [2][2700/5108] Loss: [0.20569469927178

Epoch 2 Step 3120 - avg_train_loss: 0.2042  avg_val_loss: 0.2624
Epoch 2 Step 3120 - Score: 0.5079


EVAL: [1028/1029] Elapsed 5m 22s (remain 0m 0s) 
Epoch: [2][3200/5108] Loss: [0.20309089328493082]Elapsed 24m 20s (remain 14m 30s) 
Epoch: [2][3300/5108] Loss: [0.20303261424427343]Elapsed 24m 46s (remain 13m 33s) 
Epoch: [2][3400/5108] Loss: [0.20423724005339983]Elapsed 25m 12s (remain 12m 39s) 
Epoch: [2][3500/5108] Loss: [0.20445156336678524]Elapsed 25m 39s (remain 11m 46s) 
Epoch: [2][3600/5108] Loss: [0.20347572527825641]Elapsed 26m 5s (remain 10m 55s) 
Epoch: [2][3700/5108] Loss: [0.20411108438415712]Elapsed 26m 31s (remain 10m 5s) 
Epoch: [2][3800/5108] Loss: [0.20387985665179212]Elapsed 26m 58s (remain 9m 16s) 
Epoch: [2][3900/5108] Loss: [0.20387737855681018]Elapsed 27m 24s (remain 8m 28s) 
Epoch: [2][4000/5108] Loss: [0.20336276741598236]Elapsed 27m 51s (remain 7m 42s) 
Epoch: [2][4100/5108] Loss: [0.20345391784146952]Elapsed 28m 18s (remain 6m 56s) 
Epoch: [2][4200/5108] Loss: [0.2036194508281878]Elapsed 28m 44s (remain 6m 12s) 
Epoch: [2][4300/5108] Loss: [0.203103728160250

Epoch 2 Step 4680 - avg_train_loss: 0.2013  avg_val_loss: 0.3369
Epoch 2 Step 4680 - Score: 0.5771


EVAL: [1028/1029] Elapsed 5m 14s (remain 0m 0s) 
Epoch: [2][4700/5108] Loss: [0.20146519829840792]Elapsed 36m 10s (remain 3m 7s) 
Epoch: [2][4800/5108] Loss: [0.20144463178129118]Elapsed 36m 36s (remain 2m 20s) 
Epoch: [2][4900/5108] Loss: [0.2006840850434857]Elapsed 37m 3s (remain 1m 33s) 
Epoch: [2][5000/5108] Loss: [0.20037729796059717]Elapsed 37m 29s (remain 0m 48s) 
Epoch: [2][5100/5108] Loss: [0.19976859134462452]Elapsed 37m 57s (remain 0m 3s) 


Epoch 2 - avg_train_loss: 0.1997  avg_val_loss: 0.3369  time: 2280s
Epoch 2 - Score: 0.5771


Epoch: [2][5107/5108] Loss: [0.19969884540334254]Elapsed 37m 59s (remain 0m 0s) 
Epoch: [3][0/5108] Loss: [0.007521997671574354]Elapsed 0m 0s (remain 19m 31s) 
Epoch: [3][100/5108] Loss: [0.1205355448897301]Elapsed 0m 28s (remain 23m 25s) 
Epoch: [3][200/5108] Loss: [0.10849677216358927]Elapsed 0m 56s (remain 23m 0s) 
Epoch: [3][300/5108] Loss: [0.1158080864167831]Elapsed 1m 23s (remain 22m 9s) 
Epoch: [3][400/5108] Loss: [0.10732445723745439]Elapsed 1m 50s (remain 21m 32s) 
Epoch: [3][500/5108] Loss: [0.11120444601537512]Elapsed 2m 17s (remain 21m 6s) 
Epoch: [3][600/5108] Loss: [0.1126984500791215]Elapsed 2m 44s (remain 20m 34s) 
Epoch: [3][700/5108] Loss: [0.1176957359668442]Elapsed 3m 10s (remain 20m 0s) 
Epoch: [3][800/5108] Loss: [0.11973515571117402]Elapsed 3m 37s (remain 19m 27s) 
Epoch: [3][900/5108] Loss: [0.11731222814492762]Elapsed 4m 4s (remain 19m 1s) 
Epoch: [3][1000/5108] Loss: [0.11548610635763891]Elapsed 4m 32s (remain 18m 37s) 
Epoch: [3][1100/5108] Loss: [0.11858102

Epoch 3 Step 1560 - avg_train_loss: 0.1148  avg_val_loss: 0.2772
Epoch 3 Step 1560 - Score: 0.5239


EVAL: [1028/1029] Elapsed 5m 3s (remain 0m 0s) 
Epoch: [3][1600/5108] Loss: [0.1143510648170898]Elapsed 12m 16s (remain 26m 52s) 
Epoch: [3][1700/5108] Loss: [0.11444032697944066]Elapsed 12m 40s (remain 25m 22s) 
Epoch: [3][1800/5108] Loss: [0.11380689601529549]Elapsed 13m 5s (remain 24m 1s) 
Epoch: [3][1900/5108] Loss: [0.11167161436614129]Elapsed 13m 31s (remain 22m 49s) 
Epoch: [3][2000/5108] Loss: [0.1109961634806828]Elapsed 13m 58s (remain 21m 42s) 
Epoch: [3][2100/5108] Loss: [0.11044663965171292]Elapsed 14m 25s (remain 20m 38s) 
Epoch: [3][2200/5108] Loss: [0.10998716583439982]Elapsed 14m 51s (remain 19m 37s) 
Epoch: [3][2300/5108] Loss: [0.10961071419010406]Elapsed 15m 18s (remain 18m 40s) 
Epoch: [3][2400/5108] Loss: [0.10869641299425056]Elapsed 15m 45s (remain 17m 46s) 
Epoch: [3][2500/5108] Loss: [0.10902310967089986]Elapsed 16m 13s (remain 16m 54s) 
Epoch: [3][2600/5108] Loss: [0.10937021018099281]Elapsed 16m 40s (remain 16m 4s) 
Epoch: [3][2700/5108] Loss: [0.1110396575066

Epoch 3 Step 3120 - avg_train_loss: 0.1119  avg_val_loss: 0.2619
Epoch 3 Step 3120 - Score: 0.5108


EVAL: [1028/1029] Elapsed 5m 21s (remain 0m 0s) 
Epoch: [3][3200/5108] Loss: [0.11202899897982496]Elapsed 24m 40s (remain 14m 42s) 
Epoch: [3][3300/5108] Loss: [0.11169841092463874]Elapsed 25m 7s (remain 13m 45s) 
Epoch: [3][3400/5108] Loss: [0.11173600840190467]Elapsed 25m 34s (remain 12m 50s) 
Epoch: [3][3500/5108] Loss: [0.1116519825919595]Elapsed 26m 0s (remain 11m 56s) 
Epoch: [3][3600/5108] Loss: [0.11152417262135647]Elapsed 26m 29s (remain 11m 4s) 
Epoch: [3][3700/5108] Loss: [0.11196497424998281]Elapsed 26m 56s (remain 10m 14s) 
Epoch: [3][3800/5108] Loss: [0.11177263611972564]Elapsed 27m 23s (remain 9m 25s) 
Epoch: [3][3900/5108] Loss: [0.1117240334738175]Elapsed 27m 49s (remain 8m 36s) 
Epoch: [3][4000/5108] Loss: [0.11144296776930017]Elapsed 28m 17s (remain 7m 49s) 
Epoch: [3][4100/5108] Loss: [0.1115377973225227]Elapsed 28m 44s (remain 7m 3s) 
Epoch: [3][4200/5108] Loss: [0.1115954066836516]Elapsed 29m 10s (remain 6m 17s) 
Epoch: [3][4300/5108] Loss: [0.11169205651162811]El

Epoch 3 Step 4680 - avg_train_loss: 0.1110  avg_val_loss: 0.2378
Epoch 3 Step 4680 - Score: 0.4853
Epoch 3 Step 4680 - Save Best Score: 0.4853 Model


EVAL: [1028/1029] Elapsed 5m 17s (remain 0m 0s) 
Epoch: [3][4700/5108] Loss: [0.11097687331273666]Elapsed 36m 48s (remain 3m 11s) 
Epoch: [3][4800/5108] Loss: [0.11130078066346835]Elapsed 37m 14s (remain 2m 22s) 
Epoch: [3][4900/5108] Loss: [0.11168263954569665]Elapsed 37m 41s (remain 1m 35s) 
Epoch: [3][5000/5108] Loss: [0.11146147689323425]Elapsed 38m 7s (remain 0m 48s) 
Epoch: [3][5100/5108] Loss: [0.11124565377374901]Elapsed 38m 33s (remain 0m 3s) 


Epoch 3 - avg_train_loss: 0.1113  avg_val_loss: 0.2378  time: 2316s
Epoch 3 - Score: 0.4853


Epoch: [3][5107/5108] Loss: [0.1112798269412921]Elapsed 38m 35s (remain 0m 0s) 
Epoch: [4][0/5108] Loss: [0.04094325378537178]Elapsed 0m 0s (remain 17m 54s) 
Epoch: [4][100/5108] Loss: [0.06600729200993619]Elapsed 0m 27s (remain 22m 29s) 
Epoch: [4][200/5108] Loss: [0.06621803585958287]Elapsed 0m 54s (remain 22m 20s) 
Epoch: [4][300/5108] Loss: [0.06608152489649345]Elapsed 1m 19s (remain 21m 11s) 
Epoch: [4][400/5108] Loss: [0.06396955739324413]Elapsed 1m 43s (remain 20m 19s) 
Epoch: [4][500/5108] Loss: [0.06439701601054972]Elapsed 2m 9s (remain 19m 52s) 
Epoch: [4][600/5108] Loss: [0.06435160193732357]Elapsed 2m 34s (remain 19m 20s) 
Epoch: [4][700/5108] Loss: [0.06412532575519077]Elapsed 3m 0s (remain 18m 56s) 
Epoch: [4][800/5108] Loss: [0.06434381180784594]Elapsed 3m 26s (remain 18m 27s) 
Epoch: [4][900/5108] Loss: [0.06441224647539268]Elapsed 3m 50s (remain 17m 54s) 
Epoch: [4][1000/5108] Loss: [0.06347517567855748]Elapsed 4m 12s (remain 17m 17s) 
Epoch: [4][1100/5108] Loss: [0.06

Epoch 4 Step 1560 - avg_train_loss: 0.0610  avg_val_loss: 0.2519
Epoch 4 Step 1560 - Score: 0.5004


EVAL: [1028/1029] Elapsed 5m 16s (remain 0m 0s) 
Epoch: [4][1600/5108] Loss: [0.06080512422646827]Elapsed 11m 48s (remain 25m 52s) 
Epoch: [4][1700/5108] Loss: [0.06041663282162814]Elapsed 12m 16s (remain 24m 34s) 
Epoch: [4][1800/5108] Loss: [0.061078825690033686]Elapsed 12m 44s (remain 23m 24s) 
Epoch: [4][1900/5108] Loss: [0.06275211289035175]Elapsed 13m 11s (remain 22m 14s) 
Epoch: [4][2000/5108] Loss: [0.06238338951358252]Elapsed 13m 36s (remain 21m 7s) 
Epoch: [4][2100/5108] Loss: [0.062386719127739854]Elapsed 14m 0s (remain 20m 3s) 
Epoch: [4][2200/5108] Loss: [0.06186118832169634]Elapsed 14m 23s (remain 19m 0s) 
Epoch: [4][2300/5108] Loss: [0.061336743157301]Elapsed 14m 47s (remain 18m 2s) 
Epoch: [4][2400/5108] Loss: [0.06161555954377744]Elapsed 15m 12s (remain 17m 8s) 
Epoch: [4][2500/5108] Loss: [0.062204971815129385]Elapsed 15m 37s (remain 16m 17s) 
Epoch: [4][2600/5108] Loss: [0.06174338382413859]Elapsed 16m 3s (remain 15m 28s) 
Epoch: [4][2700/5108] Loss: [0.0616636946170

Epoch 4 Step 3120 - avg_train_loss: 0.0621  avg_val_loss: 0.2589
Epoch 4 Step 3120 - Score: 0.5063


EVAL: [1028/1029] Elapsed 5m 6s (remain 0m 0s) 
Epoch: [4][3200/5108] Loss: [0.061940569778777266]Elapsed 23m 42s (remain 14m 7s) 
Epoch: [4][3300/5108] Loss: [0.06203848462565158]Elapsed 24m 9s (remain 13m 13s) 
Epoch: [4][3400/5108] Loss: [0.06186897498214035]Elapsed 24m 36s (remain 12m 20s) 
Epoch: [4][3500/5108] Loss: [0.06147717337098241]Elapsed 25m 2s (remain 11m 29s) 
Epoch: [4][3600/5108] Loss: [0.06113630862210955]Elapsed 25m 28s (remain 10m 39s) 
Epoch: [4][3700/5108] Loss: [0.0612984222106757]Elapsed 25m 55s (remain 9m 51s) 
Epoch: [4][3800/5108] Loss: [0.06085040793316396]Elapsed 26m 22s (remain 9m 4s) 
Epoch: [4][3900/5108] Loss: [0.06105203062445726]Elapsed 26m 48s (remain 8m 17s) 
Epoch: [4][4000/5108] Loss: [0.061020598818107064]Elapsed 27m 15s (remain 7m 32s) 
Epoch: [4][4100/5108] Loss: [0.06093122907165686]Elapsed 27m 42s (remain 6m 48s) 
Epoch: [4][4200/5108] Loss: [0.06094132252764424]Elapsed 28m 9s (remain 6m 4s) 
Epoch: [4][4300/5108] Loss: [0.06088670700236306]E

Epoch 4 Step 4680 - avg_train_loss: 0.0603  avg_val_loss: 0.2585
Epoch 4 Step 4680 - Score: 0.5054


EVAL: [1028/1029] Elapsed 5m 23s (remain 0m 0s) 
Epoch: [4][4700/5108] Loss: [0.060217705821013524]Elapsed 35m 48s (remain 3m 5s) 
Epoch: [4][4800/5108] Loss: [0.0600280982063653]Elapsed 36m 14s (remain 2m 19s) 
Epoch: [4][4900/5108] Loss: [0.06008591833477044]Elapsed 36m 41s (remain 1m 32s) 
Epoch: [4][5000/5108] Loss: [0.059980737563548846]Elapsed 37m 8s (remain 0m 47s) 
Epoch: [4][5100/5108] Loss: [0.059924306167904964]Elapsed 37m 35s (remain 0m 3s) 


Epoch 4 - avg_train_loss: 0.0599  avg_val_loss: 0.2585  time: 2258s


Epoch: [4][5107/5108] Loss: [0.05992736271521489]Elapsed 37m 37s (remain 0m 0s) 


Epoch 4 - Score: 0.5054
Score: 0.4853
-------------fold:1 training-------------
Some weights of the model checkpoint at microsoft/deberta-v3-large were not used when initializing DebertaV2Model: ['lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.LayerNorm.bias', 'mask_predictions.dense.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.bias', 'mask_predictions.classifier.bias', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'mask_predictions.dense.weight', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassi

Epoch: [1][0/5156] Loss: [5.304915428161621]Elapsed 0m 0s (remain 17m 59s) 
Epoch: [1][100/5156] Loss: [1.3948432911662292]Elapsed 0m 26s (remain 22m 1s) 
Epoch: [1][200/5156] Loss: [0.9289316934317962]Elapsed 0m 53s (remain 21m 54s) 
Epoch: [1][300/5156] Loss: [0.7687891150706376]Elapsed 1m 20s (remain 21m 39s) 
Epoch: [1][400/5156] Loss: [0.6608519309416339]Elapsed 1m 47s (remain 21m 19s) 
Epoch: [1][500/5156] Loss: [0.6076356207648899]Elapsed 2m 14s (remain 20m 54s) 
Epoch: [1][600/5156] Loss: [0.5589462680143216]Elapsed 2m 41s (remain 20m 22s) 
Epoch: [1][700/5156] Loss: [0.5154475952744731]Elapsed 3m 7s (remain 19m 52s) 
Epoch: [1][800/5156] Loss: [0.48204186445084074]Elapsed 3m 34s (remain 19m 25s) 
Epoch: [1][900/5156] Loss: [0.46349226336393373]Elapsed 4m 0s (remain 18m 57s) 
Epoch: [1][1000/5156] Loss: [0.45168295503232525]Elapsed 4m 28s (remain 18m 34s) 
Epoch: [1][1100/5156] Loss: [0.4336406167263087]Elapsed 4m 56s (remain 18m 11s) 
Epoch: [1][1200/5156] Loss: [0.41998564525

Epoch 1 Step 1560 - avg_train_loss: 0.3932  avg_val_loss: 0.4586
Epoch 1 Step 1560 - Score: 0.6643
Epoch 1 Step 1560 - Save Best Score: 0.6643 Model


EVAL: [1004/1005] Elapsed 5m 15s (remain 0m 0s) 
Epoch: [1][1600/5156] Loss: [0.3934382969946762]Elapsed 12m 38s (remain 28m 3s) 
Epoch: [1][1700/5156] Loss: [0.38849805066309145]Elapsed 13m 4s (remain 26m 34s) 
Epoch: [1][1800/5156] Loss: [0.3798483985325388]Elapsed 13m 31s (remain 25m 11s) 
Epoch: [1][1900/5156] Loss: [0.37357706853735645]Elapsed 13m 58s (remain 23m 54s) 
Epoch: [1][2000/5156] Loss: [0.36948319686968983]Elapsed 14m 25s (remain 22m 44s) 
Epoch: [1][2100/5156] Loss: [0.36492762567000214]Elapsed 14m 52s (remain 21m 37s) 
Epoch: [1][2200/5156] Loss: [0.3589598916085524]Elapsed 15m 18s (remain 20m 33s) 
Epoch: [1][2300/5156] Loss: [0.35390045672894077]Elapsed 15m 45s (remain 19m 33s) 
Epoch: [1][2400/5156] Loss: [0.34856625994437945]Elapsed 16m 12s (remain 18m 36s) 
Epoch: [1][2500/5156] Loss: [0.3445856672058792]Elapsed 16m 41s (remain 17m 42s) 
Epoch: [1][2600/5156] Loss: [0.34017662969300577]Elapsed 17m 8s (remain 16m 50s) 
Epoch: [1][2700/5156] Loss: [0.33736826486540

Epoch 1 Step 3120 - avg_train_loss: 0.3282  avg_val_loss: 0.6864
Epoch 1 Step 3120 - Score: 0.8036


EVAL: [1004/1005] Elapsed 5m 18s (remain 0m 0s) 
Epoch: [1][3200/5156] Loss: [0.32775328288445243]Elapsed 25m 4s (remain 15m 19s) 
Epoch: [1][3300/5156] Loss: [0.3264011352121691]Elapsed 25m 30s (remain 14m 20s) 
Epoch: [1][3400/5156] Loss: [0.325119219889804]Elapsed 25m 56s (remain 13m 23s) 
Epoch: [1][3500/5156] Loss: [0.32544063068590134]Elapsed 26m 22s (remain 12m 28s) 
Epoch: [1][3600/5156] Loss: [0.32466170753494117]Elapsed 26m 48s (remain 11m 34s) 
Epoch: [1][3700/5156] Loss: [0.3228290339582126]Elapsed 27m 14s (remain 10m 42s) 
Epoch: [1][3800/5156] Loss: [0.3214258877881042]Elapsed 27m 40s (remain 9m 51s) 
Epoch: [1][3900/5156] Loss: [0.3214120450469188]Elapsed 28m 6s (remain 9m 2s) 
Epoch: [1][4000/5156] Loss: [0.3189666778491873]Elapsed 28m 32s (remain 8m 14s) 
Epoch: [1][4100/5156] Loss: [0.3171038611154616]Elapsed 28m 58s (remain 7m 27s) 
Epoch: [1][4200/5156] Loss: [0.31574601577355915]Elapsed 29m 24s (remain 6m 41s) 
Epoch: [1][4300/5156] Loss: [0.3148367602377959]Elapse

Epoch 1 Step 4680 - avg_train_loss: 0.3069  avg_val_loss: 0.6004
Epoch 1 Step 4680 - Score: 0.7456


EVAL: [1004/1005] Elapsed 5m 2s (remain 0m 0s) 
Epoch: [1][4700/5156] Loss: [0.30686981568142707]Elapsed 36m 36s (remain 3m 32s) 
Epoch: [1][4800/5156] Loss: [0.30435105338310436]Elapsed 37m 2s (remain 2m 44s) 
Epoch: [1][4900/5156] Loss: [0.30299082483123846]Elapsed 37m 28s (remain 1m 56s) 
Epoch: [1][5000/5156] Loss: [0.30237300004168005]Elapsed 37m 54s (remain 1m 10s) 
Epoch: [1][5100/5156] Loss: [0.3016945804985903]Elapsed 38m 20s (remain 0m 24s) 


Epoch 1 - avg_train_loss: 0.3003  avg_val_loss: 0.6004  time: 2315s
Epoch 1 - Score: 0.7456


Epoch: [1][5155/5156] Loss: [0.30032470919161464]Elapsed 38m 34s (remain 0m 0s) 
Epoch: [2][0/5156] Loss: [0.1730145514011383]Elapsed 0m 0s (remain 18m 7s) 
Epoch: [2][100/5156] Loss: [0.16715783035292722]Elapsed 0m 25s (remain 21m 39s) 
Epoch: [2][200/5156] Loss: [0.22739005725123035]Elapsed 0m 51s (remain 21m 18s) 
Epoch: [2][300/5156] Loss: [0.20730655136490334]Elapsed 1m 17s (remain 20m 54s) 
Epoch: [2][400/5156] Loss: [0.19659115336696034]Elapsed 1m 43s (remain 20m 30s) 
Epoch: [2][500/5156] Loss: [0.18411863365228254]Elapsed 2m 9s (remain 20m 5s) 
Epoch: [2][600/5156] Loss: [0.1852145099810196]Elapsed 2m 35s (remain 19m 39s) 
Epoch: [2][700/5156] Loss: [0.1825156503927611]Elapsed 3m 1s (remain 19m 13s) 
Epoch: [2][800/5156] Loss: [0.18249523240029952]Elapsed 3m 27s (remain 18m 48s) 
Epoch: [2][900/5156] Loss: [0.17929604947772426]Elapsed 3m 53s (remain 18m 22s) 
Epoch: [2][1000/5156] Loss: [0.18320616228705072]Elapsed 4m 19s (remain 17m 56s) 
Epoch: [2][1100/5156] Loss: [0.184601

Epoch 2 Step 1560 - avg_train_loss: 0.1818  avg_val_loss: 0.5023
Epoch 2 Step 1560 - Score: 0.6860


EVAL: [1004/1005] Elapsed 5m 2s (remain 0m 0s) 
Epoch: [2][1600/5156] Loss: [0.1818738140195328]Elapsed 11m 57s (remain 26m 33s) 
Epoch: [2][1700/5156] Loss: [0.17898976910782147]Elapsed 12m 23s (remain 25m 10s) 
Epoch: [2][1800/5156] Loss: [0.18171266596795352]Elapsed 12m 49s (remain 23m 53s) 
Epoch: [2][1900/5156] Loss: [0.18125385365727897]Elapsed 13m 15s (remain 22m 42s) 
Epoch: [2][2000/5156] Loss: [0.181258292338203]Elapsed 13m 41s (remain 21m 35s) 
Epoch: [2][2100/5156] Loss: [0.181208110209929]Elapsed 14m 7s (remain 20m 32s) 
Epoch: [2][2200/5156] Loss: [0.17935975073412128]Elapsed 14m 33s (remain 19m 32s) 
Epoch: [2][2300/5156] Loss: [0.17794506879346725]Elapsed 14m 59s (remain 18m 36s) 
Epoch: [2][2400/5156] Loss: [0.17806497346943415]Elapsed 15m 25s (remain 17m 41s) 
Epoch: [2][2500/5156] Loss: [0.17815975669754164]Elapsed 15m 51s (remain 16m 49s) 
Epoch: [2][2600/5156] Loss: [0.17898555750530423]Elapsed 16m 17s (remain 16m 0s) 
Epoch: [2][2700/5156] Loss: [0.180697501902316

Epoch 2 Step 3120 - avg_train_loss: 0.1780  avg_val_loss: 0.5612
Epoch 2 Step 3120 - Score: 0.7263


EVAL: [1004/1005] Elapsed 5m 2s (remain 0m 0s) 
Epoch: [2][3200/5156] Loss: [0.17821324569133443]Elapsed 23m 55s (remain 14m 36s) 
Epoch: [2][3300/5156] Loss: [0.17843405733708692]Elapsed 24m 21s (remain 13m 41s) 
Epoch: [2][3400/5156] Loss: [0.18023933905346734]Elapsed 24m 47s (remain 12m 47s) 
Epoch: [2][3500/5156] Loss: [0.17987744288055293]Elapsed 25m 13s (remain 11m 55s) 
Epoch: [2][3600/5156] Loss: [0.18013225435705077]Elapsed 25m 39s (remain 11m 4s) 
Epoch: [2][3700/5156] Loss: [0.1794867703119869]Elapsed 26m 5s (remain 10m 15s) 
Epoch: [2][3800/5156] Loss: [0.1802947767289686]Elapsed 26m 31s (remain 9m 27s) 
Epoch: [2][3900/5156] Loss: [0.17961564028615448]Elapsed 26m 57s (remain 8m 40s) 
Epoch: [2][4000/5156] Loss: [0.17896707965509678]Elapsed 27m 23s (remain 7m 54s) 
Epoch: [2][4100/5156] Loss: [0.17782932036630167]Elapsed 27m 49s (remain 7m 9s) 
Epoch: [2][4200/5156] Loss: [0.17905326705981645]Elapsed 28m 15s (remain 6m 25s) 
Epoch: [2][4300/5156] Loss: [0.1781596308572727]E

Epoch 2 Step 4680 - avg_train_loss: 0.1771  avg_val_loss: 0.5027
Epoch 2 Step 4680 - Score: 0.6867


EVAL: [1004/1005] Elapsed 5m 2s (remain 0m 0s) 
Epoch: [2][4700/5156] Loss: [0.17684363770026554]Elapsed 35m 27s (remain 3m 25s) 
Epoch: [2][4800/5156] Loss: [0.17713699988031792]Elapsed 35m 53s (remain 2m 39s) 
Epoch: [2][4900/5156] Loss: [0.17643642426135694]Elapsed 36m 19s (remain 1m 53s) 
Epoch: [2][5000/5156] Loss: [0.17576338148517204]Elapsed 36m 45s (remain 1m 8s) 
Epoch: [2][5100/5156] Loss: [0.17584497407073915]Elapsed 37m 11s (remain 0m 24s) 


Epoch 2 - avg_train_loss: 0.1759  avg_val_loss: 0.5027  time: 2246s
Epoch 2 - Score: 0.6867


Epoch: [2][5155/5156] Loss: [0.17588368558007023]Elapsed 37m 25s (remain 0m 0s) 
Epoch: [3][0/5156] Loss: [0.07220237702131271]Elapsed 0m 0s (remain 18m 4s) 
Epoch: [3][100/5156] Loss: [0.08775792960573153]Elapsed 0m 26s (remain 21m 41s) 
Epoch: [3][200/5156] Loss: [0.09767684270406984]Elapsed 0m 51s (remain 21m 21s) 
Epoch: [3][300/5156] Loss: [0.09672669187814993]Elapsed 1m 17s (remain 20m 56s) 
Epoch: [3][400/5156] Loss: [0.09750824750268915]Elapsed 1m 43s (remain 20m 31s) 
Epoch: [3][500/5156] Loss: [0.09422599832758718]Elapsed 2m 9s (remain 20m 5s) 
Epoch: [3][600/5156] Loss: [0.09903708529863117]Elapsed 2m 35s (remain 19m 40s) 
Epoch: [3][700/5156] Loss: [0.10009215182468277]Elapsed 3m 1s (remain 19m 14s) 
Epoch: [3][800/5156] Loss: [0.09938869171552749]Elapsed 3m 27s (remain 18m 48s) 
Epoch: [3][900/5156] Loss: [0.09903912934005529]Elapsed 3m 53s (remain 18m 23s) 
Epoch: [3][1000/5156] Loss: [0.09981109483116968]Elapsed 4m 19s (remain 17m 57s) 
Epoch: [3][1100/5156] Loss: [0.098

Epoch 3 Step 1560 - avg_train_loss: 0.0980  avg_val_loss: 0.4635
Epoch 3 Step 1560 - Score: 0.6582
Epoch 3 Step 1560 - Save Best Score: 0.6582 Model


EVAL: [1004/1005] Elapsed 5m 2s (remain 0m 0s) 
Epoch: [3][1600/5156] Loss: [0.0975576379239682]Elapsed 12m 6s (remain 26m 52s) 
Epoch: [3][1700/5156] Loss: [0.0994146295042035]Elapsed 12m 32s (remain 25m 27s) 
Epoch: [3][1800/5156] Loss: [0.09905009807528681]Elapsed 12m 58s (remain 24m 9s) 
Epoch: [3][1900/5156] Loss: [0.09818851810927623]Elapsed 13m 24s (remain 22m 56s) 
Epoch: [3][2000/5156] Loss: [0.09802290876170358]Elapsed 13m 50s (remain 21m 48s) 
Epoch: [3][2100/5156] Loss: [0.09871815196024387]Elapsed 14m 16s (remain 20m 44s) 
Epoch: [3][2200/5156] Loss: [0.09879804727811632]Elapsed 14m 41s (remain 19m 44s) 
Epoch: [3][2300/5156] Loss: [0.09898934023725312]Elapsed 15m 7s (remain 18m 46s) 
Epoch: [3][2400/5156] Loss: [0.09914948341191977]Elapsed 15m 33s (remain 17m 51s) 
Epoch: [3][2500/5156] Loss: [0.09880141507295505]Elapsed 15m 59s (remain 16m 58s) 
Epoch: [3][2600/5156] Loss: [0.09799766123219775]Elapsed 16m 25s (remain 16m 8s) 
Epoch: [3][2700/5156] Loss: [0.09806469427808

Epoch 3 Step 3120 - avg_train_loss: 0.0983  avg_val_loss: 0.4314
Epoch 3 Step 3120 - Score: 0.6370
Epoch 3 Step 3120 - Save Best Score: 0.6370 Model


EVAL: [1004/1005] Elapsed 5m 2s (remain 0m 0s) 
Epoch: [3][3200/5156] Loss: [0.09767284986446441]Elapsed 24m 13s (remain 14m 47s) 
Epoch: [3][3300/5156] Loss: [0.09742270241482444]Elapsed 24m 39s (remain 13m 51s) 
Epoch: [3][3400/5156] Loss: [0.09733296740085676]Elapsed 25m 5s (remain 12m 56s) 
Epoch: [3][3500/5156] Loss: [0.09721985774042947]Elapsed 25m 31s (remain 12m 3s) 
Epoch: [3][3600/5156] Loss: [0.0963089982799884]Elapsed 25m 57s (remain 11m 12s) 
Epoch: [3][3700/5156] Loss: [0.09622992319469176]Elapsed 26m 23s (remain 10m 22s) 
Epoch: [3][3800/5156] Loss: [0.09583085734059422]Elapsed 26m 49s (remain 9m 33s) 
Epoch: [3][3900/5156] Loss: [0.09616835052409832]Elapsed 27m 15s (remain 8m 46s) 
Epoch: [3][4000/5156] Loss: [0.0958272397678219]Elapsed 27m 41s (remain 7m 59s) 
Epoch: [3][4100/5156] Loss: [0.09503121844722967]Elapsed 28m 6s (remain 7m 13s) 
Epoch: [3][4200/5156] Loss: [0.09481375530952998]Elapsed 28m 32s (remain 6m 29s) 
Epoch: [3][4300/5156] Loss: [0.0944768463828562]E

Epoch 3 Step 4680 - avg_train_loss: 0.0940  avg_val_loss: 0.4901
Epoch 3 Step 4680 - Score: 0.6772


EVAL: [1004/1005] Elapsed 5m 7s (remain 0m 0s) 
Epoch: [3][4700/5156] Loss: [0.09393133746864335]Elapsed 35m 49s (remain 3m 28s) 
Epoch: [3][4800/5156] Loss: [0.09374930737768188]Elapsed 36m 16s (remain 2m 40s) 
Epoch: [3][4900/5156] Loss: [0.09383287891773934]Elapsed 36m 42s (remain 1m 54s) 
Epoch: [3][5000/5156] Loss: [0.09361217555697944]Elapsed 37m 9s (remain 1m 9s) 
Epoch: [3][5100/5156] Loss: [0.09333199798133886]Elapsed 37m 35s (remain 0m 24s) 


Epoch 3 - avg_train_loss: 0.0934  avg_val_loss: 0.4901  time: 2270s
Epoch 3 - Score: 0.6772


Epoch: [3][5155/5156] Loss: [0.09341624165506107]Elapsed 37m 50s (remain 0m 0s) 
Epoch: [4][0/5156] Loss: [0.00043263891711831093]Elapsed 0m 0s (remain 22m 27s) 
Epoch: [4][100/5156] Loss: [0.05672167980810627]Elapsed 0m 26s (remain 22m 22s) 
Epoch: [4][200/5156] Loss: [0.05043870796445428]Elapsed 0m 53s (remain 22m 7s) 
Epoch: [4][300/5156] Loss: [0.047825205793926816]Elapsed 1m 22s (remain 22m 6s) 
Epoch: [4][400/5156] Loss: [0.04649919785256962]Elapsed 1m 50s (remain 21m 47s) 
Epoch: [4][500/5156] Loss: [0.04593730268217094]Elapsed 2m 17s (remain 21m 13s) 
Epoch: [4][600/5156] Loss: [0.04613628121041197]Elapsed 2m 43s (remain 20m 36s) 
Epoch: [4][700/5156] Loss: [0.04549075486204061]Elapsed 3m 9s (remain 20m 3s) 
Epoch: [4][800/5156] Loss: [0.045574632726465586]Elapsed 3m 35s (remain 19m 30s) 
Epoch: [4][900/5156] Loss: [0.04465275000059987]Elapsed 4m 1s (remain 18m 59s) 
Epoch: [4][1000/5156] Loss: [0.04483540956256603]Elapsed 4m 27s (remain 18m 30s) 
Epoch: [4][1100/5156] Loss: [0

Epoch 4 Step 1560 - avg_train_loss: 0.0459  avg_val_loss: 0.4715
Epoch 4 Step 1560 - Score: 0.6638


EVAL: [1004/1005] Elapsed 5m 1s (remain 0m 0s) 
Epoch: [4][1600/5156] Loss: [0.045838919741889785]Elapsed 12m 9s (remain 26m 59s) 
Epoch: [4][1700/5156] Loss: [0.045824515843296915]Elapsed 12m 35s (remain 25m 34s) 
Epoch: [4][1800/5156] Loss: [0.04602363957271437]Elapsed 13m 2s (remain 24m 17s) 
Epoch: [4][1900/5156] Loss: [0.04578120432499314]Elapsed 13m 28s (remain 23m 4s) 
Epoch: [4][2000/5156] Loss: [0.04587617830369705]Elapsed 13m 55s (remain 21m 58s) 
Epoch: [4][2100/5156] Loss: [0.045844255461235744]Elapsed 14m 21s (remain 20m 53s) 
Epoch: [4][2200/5156] Loss: [0.045809615960952926]Elapsed 14m 46s (remain 19m 50s) 
Epoch: [4][2300/5156] Loss: [0.04595297862726872]Elapsed 15m 13s (remain 18m 53s) 
Epoch: [4][2400/5156] Loss: [0.04598706963929506]Elapsed 15m 39s (remain 17m 57s) 
Epoch: [4][2500/5156] Loss: [0.04567220040434479]Elapsed 16m 4s (remain 17m 4s) 
Epoch: [4][2600/5156] Loss: [0.045899826819539497]Elapsed 16m 32s (remain 16m 15s) 
Epoch: [4][2700/5156] Loss: [0.04718782

Epoch 4 Step 3120 - avg_train_loss: 0.0469  avg_val_loss: 0.4934
Epoch 4 Step 3120 - Score: 0.6765


EVAL: [1004/1005] Elapsed 5m 41s (remain 0m 0s) 
Epoch: [4][3200/5156] Loss: [0.047264217072340665]Elapsed 24m 54s (remain 15m 12s) 
Epoch: [4][3300/5156] Loss: [0.04706800835593754]Elapsed 25m 22s (remain 14m 15s) 
Epoch: [4][3400/5156] Loss: [0.04704524349639566]Elapsed 25m 51s (remain 13m 20s) 
Epoch: [4][3500/5156] Loss: [0.047052700781344846]Elapsed 26m 19s (remain 12m 26s) 
Epoch: [4][3600/5156] Loss: [0.04721560656381455]Elapsed 26m 47s (remain 11m 34s) 
Epoch: [4][3700/5156] Loss: [0.04724410518808513]Elapsed 27m 17s (remain 10m 43s) 
Epoch: [4][3800/5156] Loss: [0.04746636446556884]Elapsed 27m 44s (remain 9m 53s) 
Epoch: [4][3900/5156] Loss: [0.047608220250362876]Elapsed 28m 9s (remain 9m 3s) 
Epoch: [4][4000/5156] Loss: [0.04732836954095655]Elapsed 28m 34s (remain 8m 14s) 
Epoch: [4][4100/5156] Loss: [0.047258871111945266]Elapsed 28m 59s (remain 7m 27s) 
Epoch: [4][4200/5156] Loss: [0.047074321865394245]Elapsed 29m 24s (remain 6m 41s) 
Epoch: [4][4300/5156] Loss: [0.046990148

Epoch 4 Step 4680 - avg_train_loss: 0.0469  avg_val_loss: 0.4775
Epoch 4 Step 4680 - Score: 0.6677


EVAL: [1004/1005] Elapsed 5m 9s (remain 0m 0s) 
Epoch: [4][4700/5156] Loss: [0.04687431271089912]Elapsed 36m 50s (remain 3m 33s) 
Epoch: [4][4800/5156] Loss: [0.046681985179102915]Elapsed 37m 17s (remain 2m 45s) 
Epoch: [4][4900/5156] Loss: [0.04679306027590134]Elapsed 37m 47s (remain 1m 57s) 
Epoch: [4][5000/5156] Loss: [0.046706125359490695]Elapsed 38m 16s (remain 1m 11s) 
Epoch: [4][5100/5156] Loss: [0.046492664999168105]Elapsed 38m 44s (remain 0m 25s) 


Epoch 4 - avg_train_loss: 0.0465  avg_val_loss: 0.4775  time: 2340s
Epoch 4 - Score: 0.6677


Epoch: [4][5155/5156] Loss: [0.04654370139587076]Elapsed 38m 59s (remain 0m 0s) 


Score: 0.6370
-------------fold:2 training-------------
Some weights of the model checkpoint at microsoft/deberta-v3-large were not used when initializing DebertaV2Model: ['lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.LayerNorm.bias', 'mask_predictions.dense.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.bias', 'mask_predictions.classifier.bias', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'mask_predictions.dense.weight', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a Be

Epoch: [1][0/5169] Loss: [0.07743202894926071]Elapsed 0m 0s (remain 17m 49s) 
Epoch: [1][100/5169] Loss: [0.9050802862084941]Elapsed 0m 27s (remain 23m 3s) 
Epoch: [1][200/5169] Loss: [0.7810996281120935]Elapsed 0m 56s (remain 23m 13s) 
Epoch: [1][300/5169] Loss: [0.695324585998533]Elapsed 1m 24s (remain 22m 48s) 
Epoch: [1][400/5169] Loss: [0.6558169331021277]Elapsed 1m 51s (remain 22m 9s) 
Epoch: [1][500/5169] Loss: [0.6374424520750789]Elapsed 2m 17s (remain 21m 21s) 
Epoch: [1][600/5169] Loss: [0.6160972656933172]Elapsed 2m 43s (remain 20m 43s) 
Epoch: [1][700/5169] Loss: [0.5786264495334233]Elapsed 3m 12s (remain 20m 26s) 
Epoch: [1][800/5169] Loss: [0.5531664225152567]Elapsed 3m 39s (remain 19m 59s) 
Epoch: [1][900/5169] Loss: [0.5238858210242373]Elapsed 4m 8s (remain 19m 35s) 
Epoch: [1][1000/5169] Loss: [0.49845249064818054]Elapsed 4m 37s (remain 19m 13s) 
Epoch: [1][1100/5169] Loss: [0.48250440627643243]Elapsed 5m 5s (remain 18m 50s) 
Epoch: [1][1200/5169] Loss: [0.477850165658

Epoch 1 Step 1560 - avg_train_loss: 0.4463  avg_val_loss: 0.3278
Epoch 1 Step 1560 - Score: 0.5639
Epoch 1 Step 1560 - Save Best Score: 0.5639 Model


EVAL: [997/998] Elapsed 5m 22s (remain 0m 0s) 
Epoch: [1][1600/5169] Loss: [0.4413938909459623]Elapsed 12m 53s (remain 28m 44s) 
Epoch: [1][1700/5169] Loss: [0.4317357977449289]Elapsed 13m 22s (remain 27m 15s) 
Epoch: [1][1800/5169] Loss: [0.42695192871858656]Elapsed 13m 50s (remain 25m 52s) 
Epoch: [1][1900/5169] Loss: [0.4208028839413413]Elapsed 14m 16s (remain 24m 31s) 
Epoch: [1][2000/5169] Loss: [0.4172048063884667]Elapsed 14m 41s (remain 23m 16s) 
Epoch: [1][2100/5169] Loss: [0.41016644826875603]Elapsed 15m 8s (remain 22m 6s) 
Epoch: [1][2200/5169] Loss: [0.4062039799927206]Elapsed 15m 36s (remain 21m 3s) 
Epoch: [1][2300/5169] Loss: [0.40001804204477814]Elapsed 16m 4s (remain 20m 2s) 
Epoch: [1][2400/5169] Loss: [0.39448230206461027]Elapsed 16m 32s (remain 19m 4s) 
Epoch: [1][2500/5169] Loss: [0.39525729757999195]Elapsed 17m 1s (remain 18m 9s) 
Epoch: [1][2600/5169] Loss: [0.39053059397479484]Elapsed 17m 29s (remain 17m 15s) 
Epoch: [1][2700/5169] Loss: [0.3878993915200922]Elaps

Epoch 1 Step 3120 - avg_train_loss: 0.3781  avg_val_loss: 0.2268
Epoch 1 Step 3120 - Score: 0.4743
Epoch 1 Step 3120 - Save Best Score: 0.4743 Model


EVAL: [997/998] Elapsed 5m 34s (remain 0m 0s) 
Epoch: [1][3200/5169] Loss: [0.37450211655527615]Elapsed 25m 50s (remain 15m 53s) 
Epoch: [1][3300/5169] Loss: [0.371296486355342]Elapsed 26m 16s (remain 14m 51s) 
Epoch: [1][3400/5169] Loss: [0.3683903083093018]Elapsed 26m 42s (remain 13m 53s) 
Epoch: [1][3500/5169] Loss: [0.36515234580599143]Elapsed 27m 8s (remain 12m 55s) 
Epoch: [1][3600/5169] Loss: [0.3627292679542627]Elapsed 27m 35s (remain 12m 0s) 
Epoch: [1][3700/5169] Loss: [0.36134868981461693]Elapsed 28m 0s (remain 11m 6s) 
Epoch: [1][3800/5169] Loss: [0.35948721298991604]Elapsed 28m 25s (remain 10m 13s) 
Epoch: [1][3900/5169] Loss: [0.3570562695032675]Elapsed 28m 52s (remain 9m 23s) 
Epoch: [1][4000/5169] Loss: [0.3535217364807958]Elapsed 29m 17s (remain 8m 33s) 
Epoch: [1][4100/5169] Loss: [0.3523667892993681]Elapsed 29m 43s (remain 7m 44s) 
Epoch: [1][4200/5169] Loss: [0.3505027454337774]Elapsed 30m 9s (remain 6m 56s) 
Epoch: [1][4300/5169] Loss: [0.3477610466958923]Elapsed 3

Epoch 1 Step 4680 - avg_train_loss: 0.3423  avg_val_loss: 0.3647
Epoch 1 Step 4680 - Score: 0.6031


EVAL: [997/998] Elapsed 5m 5s (remain 0m 0s) 
Epoch: [1][4700/5169] Loss: [0.3417784366146192]Elapsed 37m 22s (remain 3m 43s) 
Epoch: [1][4800/5169] Loss: [0.3410870945150169]Elapsed 37m 48s (remain 2m 53s) 
Epoch: [1][4900/5169] Loss: [0.3398462474776568]Elapsed 38m 15s (remain 2m 5s) 
Epoch: [1][5000/5169] Loss: [0.3385415572512562]Elapsed 38m 41s (remain 1m 17s) 
Epoch: [1][5100/5169] Loss: [0.3366103659023508]Elapsed 39m 7s (remain 0m 31s) 


Epoch 1 - avg_train_loss: 0.3358  avg_val_loss: 0.3647  time: 2365s
Epoch 1 - Score: 0.6031


Epoch: [1][5168/5169] Loss: [0.33578316814918535]Elapsed 39m 25s (remain 0m 0s) 
Epoch: [2][0/5169] Loss: [0.15480388700962067]Elapsed 0m 0s (remain 18m 7s) 
Epoch: [2][100/5169] Loss: [0.1971554174064889]Elapsed 0m 25s (remain 21m 42s) 
Epoch: [2][200/5169] Loss: [0.2125817284290476]Elapsed 0m 51s (remain 21m 22s) 
Epoch: [2][300/5169] Loss: [0.21733117477290148]Elapsed 1m 17s (remain 20m 59s) 
Epoch: [2][400/5169] Loss: [0.2094128539670271]Elapsed 1m 43s (remain 20m 34s) 
Epoch: [2][500/5169] Loss: [0.21506539323035942]Elapsed 2m 9s (remain 20m 8s) 
Epoch: [2][600/5169] Loss: [0.21862368516542327]Elapsed 2m 35s (remain 19m 43s) 
Epoch: [2][700/5169] Loss: [0.20928940965427986]Elapsed 3m 1s (remain 19m 18s) 
Epoch: [2][800/5169] Loss: [0.2082856801432845]Elapsed 3m 27s (remain 18m 52s) 
Epoch: [2][900/5169] Loss: [0.19962207161529402]Elapsed 3m 53s (remain 18m 26s) 
Epoch: [2][1000/5169] Loss: [0.2025281489363629]Elapsed 4m 19s (remain 18m 0s) 
Epoch: [2][1100/5169] Loss: [0.201584772

Epoch 2 Step 1560 - avg_train_loss: 0.2008  avg_val_loss: 0.2854
Epoch 2 Step 1560 - Score: 0.5282


EVAL: [997/998] Elapsed 5m 0s (remain 0m 0s) 
Epoch: [2][1600/5169] Loss: [0.2007646697820678]Elapsed 11m 55s (remain 26m 35s) 
Epoch: [2][1700/5169] Loss: [0.2002829418353613]Elapsed 12m 21s (remain 25m 12s) 
Epoch: [2][1800/5169] Loss: [0.19926724220414938]Elapsed 12m 47s (remain 23m 55s) 
Epoch: [2][1900/5169] Loss: [0.1985727323163954]Elapsed 13m 13s (remain 22m 44s) 
Epoch: [2][2000/5169] Loss: [0.1983052519036641]Elapsed 13m 39s (remain 21m 37s) 
Epoch: [2][2100/5169] Loss: [0.201615011986882]Elapsed 14m 5s (remain 20m 34s) 
Epoch: [2][2200/5169] Loss: [0.2026125063912687]Elapsed 14m 31s (remain 19m 35s) 
Epoch: [2][2300/5169] Loss: [0.20287948166155292]Elapsed 14m 57s (remain 18m 38s) 
Epoch: [2][2400/5169] Loss: [0.2030991719020896]Elapsed 15m 23s (remain 17m 44s) 
Epoch: [2][2500/5169] Loss: [0.20288855925200466]Elapsed 15m 49s (remain 16m 52s) 
Epoch: [2][2600/5169] Loss: [0.20330793422681467]Elapsed 16m 15s (remain 16m 2s) 
Epoch: [2][2700/5169] Loss: [0.20099170476958242]El

Epoch 2 Step 3120 - avg_train_loss: 0.2008  avg_val_loss: 0.2624
Epoch 2 Step 3120 - Score: 0.5084


EVAL: [997/998] Elapsed 5m 0s (remain 0m 0s) 
Epoch: [2][3200/5169] Loss: [0.20061736386390006]Elapsed 23m 51s (remain 14m 40s) 
Epoch: [2][3300/5169] Loss: [0.2017499070262326]Elapsed 24m 17s (remain 13m 44s) 
Epoch: [2][3400/5169] Loss: [0.2014853626604739]Elapsed 24m 43s (remain 12m 51s) 
Epoch: [2][3500/5169] Loss: [0.20027729531017263]Elapsed 25m 9s (remain 11m 59s) 
Epoch: [2][3600/5169] Loss: [0.19924294469870887]Elapsed 25m 35s (remain 11m 8s) 
Epoch: [2][3700/5169] Loss: [0.19872123032442154]Elapsed 26m 1s (remain 10m 19s) 
Epoch: [2][3800/5169] Loss: [0.19801902218365292]Elapsed 26m 27s (remain 9m 31s) 
Epoch: [2][3900/5169] Loss: [0.19865840537783433]Elapsed 26m 53s (remain 8m 44s) 
Epoch: [2][4000/5169] Loss: [0.19817978216714643]Elapsed 27m 19s (remain 7m 58s) 
Epoch: [2][4100/5169] Loss: [0.19797241139299898]Elapsed 27m 45s (remain 7m 13s) 
Epoch: [2][4200/5169] Loss: [0.19870170154496364]Elapsed 28m 10s (remain 6m 29s) 
Epoch: [2][4300/5169] Loss: [0.19900246526197984]El

Epoch 2 Step 4680 - avg_train_loss: 0.1965  avg_val_loss: 0.3396
Epoch 2 Step 4680 - Score: 0.5667


EVAL: [997/998] Elapsed 5m 0s (remain 0m 0s) 
Epoch: [2][4700/5169] Loss: [0.19630621307947305]Elapsed 35m 21s (remain 3m 31s) 
Epoch: [2][4800/5169] Loss: [0.1956218188872971]Elapsed 35m 47s (remain 2m 44s) 
Epoch: [2][4900/5169] Loss: [0.19545049987426616]Elapsed 36m 13s (remain 1m 58s) 
Epoch: [2][5000/5169] Loss: [0.19510914004588234]Elapsed 36m 39s (remain 1m 13s) 
Epoch: [2][5100/5169] Loss: [0.19635532030929412]Elapsed 37m 5s (remain 0m 29s) 


Epoch 2 - avg_train_loss: 0.1965  avg_val_loss: 0.3396  time: 2243s
Epoch 2 - Score: 0.5667


Epoch: [2][5168/5169] Loss: [0.19649998959871046]Elapsed 37m 22s (remain 0m 0s) 
Epoch: [3][0/5169] Loss: [0.030709408223628998]Elapsed 0m 0s (remain 18m 12s) 
Epoch: [3][100/5169] Loss: [0.1250167797010687]Elapsed 0m 26s (remain 21m 47s) 
Epoch: [3][200/5169] Loss: [0.11109417312852668]Elapsed 0m 51s (remain 21m 24s) 
Epoch: [3][300/5169] Loss: [0.1090045013260777]Elapsed 1m 17s (remain 20m 59s) 
Epoch: [3][400/5169] Loss: [0.10732832968588035]Elapsed 1m 43s (remain 20m 34s) 
Epoch: [3][500/5169] Loss: [0.10839998068175503]Elapsed 2m 9s (remain 20m 9s) 
Epoch: [3][600/5169] Loss: [0.10794816726201181]Elapsed 2m 35s (remain 19m 43s) 
Epoch: [3][700/5169] Loss: [0.11017270723412953]Elapsed 3m 1s (remain 19m 17s) 
Epoch: [3][800/5169] Loss: [0.10900343433804868]Elapsed 3m 27s (remain 18m 52s) 
Epoch: [3][900/5169] Loss: [0.10854939803337828]Elapsed 3m 53s (remain 18m 26s) 
Epoch: [3][1000/5169] Loss: [0.1070349686197623]Elapsed 4m 19s (remain 18m 0s) 
Epoch: [3][1100/5169] Loss: [0.10477

Epoch 3 Step 1560 - avg_train_loss: 0.1017  avg_val_loss: 0.2764
Epoch 3 Step 1560 - Score: 0.5206


EVAL: [997/998] Elapsed 5m 0s (remain 0m 0s) 
Epoch: [3][1600/5169] Loss: [0.10123299092065065]Elapsed 11m 55s (remain 26m 34s) 
Epoch: [3][1700/5169] Loss: [0.10048188591231207]Elapsed 12m 21s (remain 25m 12s) 
Epoch: [3][1800/5169] Loss: [0.10135026889014881]Elapsed 12m 47s (remain 23m 55s) 
Epoch: [3][1900/5169] Loss: [0.1038770952178351]Elapsed 13m 13s (remain 22m 44s) 
Epoch: [3][2000/5169] Loss: [0.10371141175966167]Elapsed 13m 39s (remain 21m 37s) 
Epoch: [3][2100/5169] Loss: [0.10419426082849789]Elapsed 14m 5s (remain 20m 34s) 
Epoch: [3][2200/5169] Loss: [0.10496390386201254]Elapsed 14m 31s (remain 19m 35s) 
Epoch: [3][2300/5169] Loss: [0.10593347262096935]Elapsed 14m 57s (remain 18m 38s) 
Epoch: [3][2400/5169] Loss: [0.10642811064346963]Elapsed 15m 23s (remain 17m 44s) 
Epoch: [3][2500/5169] Loss: [0.1059653037119249]Elapsed 15m 49s (remain 16m 52s) 
Epoch: [3][2600/5169] Loss: [0.1057595319804468]Elapsed 16m 15s (remain 16m 2s) 
Epoch: [3][2700/5169] Loss: [0.106400000267818

Epoch 3 Step 3120 - avg_train_loss: 0.1065  avg_val_loss: 0.2708
Epoch 3 Step 3120 - Score: 0.5139


EVAL: [997/998] Elapsed 5m 0s (remain 0m 0s) 
Epoch: [3][3200/5169] Loss: [0.10631356303359277]Elapsed 23m 51s (remain 14m 40s) 
Epoch: [3][3300/5169] Loss: [0.1060808261345634]Elapsed 24m 17s (remain 13m 44s) 
Epoch: [3][3400/5169] Loss: [0.10638670199770273]Elapsed 24m 43s (remain 12m 51s) 
Epoch: [3][3500/5169] Loss: [0.10601564546012648]Elapsed 25m 9s (remain 11m 59s) 
Epoch: [3][3600/5169] Loss: [0.10553723139654525]Elapsed 25m 35s (remain 11m 8s) 
Epoch: [3][3700/5169] Loss: [0.1056112599665069]Elapsed 26m 1s (remain 10m 19s) 
Epoch: [3][3800/5169] Loss: [0.10569327170694758]Elapsed 26m 27s (remain 9m 31s) 
Epoch: [3][3900/5169] Loss: [0.10534475677301061]Elapsed 26m 52s (remain 8m 44s) 
Epoch: [3][4000/5169] Loss: [0.10566092927850165]Elapsed 27m 18s (remain 7m 58s) 
Epoch: [3][4100/5169] Loss: [0.10550647224106294]Elapsed 27m 44s (remain 7m 13s) 
Epoch: [3][4200/5169] Loss: [0.10535188905477604]Elapsed 28m 10s (remain 6m 29s) 
Epoch: [3][4300/5169] Loss: [0.10518882042287599]El

Epoch 3 Step 4680 - avg_train_loss: 0.1050  avg_val_loss: 0.3257
Epoch 3 Step 4680 - Score: 0.5583


EVAL: [997/998] Elapsed 5m 0s (remain 0m 0s) 
Epoch: [3][4700/5169] Loss: [0.10496528384134621]Elapsed 35m 21s (remain 3m 31s) 
Epoch: [3][4800/5169] Loss: [0.10500510367941325]Elapsed 35m 47s (remain 2m 44s) 
Epoch: [3][4900/5169] Loss: [0.10511581112729233]Elapsed 36m 13s (remain 1m 58s) 
Epoch: [3][5000/5169] Loss: [0.10522685719076698]Elapsed 36m 38s (remain 1m 13s) 
Epoch: [3][5100/5169] Loss: [0.10507174113157873]Elapsed 37m 4s (remain 0m 29s) 


Epoch 3 - avg_train_loss: 0.1049  avg_val_loss: 0.3257  time: 2243s
Epoch 3 - Score: 0.5583


Epoch: [3][5168/5169] Loss: [0.10493124411115959]Elapsed 37m 22s (remain 0m 0s) 
Epoch: [4][0/5169] Loss: [0.028447121381759644]Elapsed 0m 0s (remain 18m 12s) 
Epoch: [4][100/5169] Loss: [0.05900744404839721]Elapsed 0m 26s (remain 21m 44s) 
Epoch: [4][200/5169] Loss: [0.06196746115824115]Elapsed 0m 51s (remain 21m 24s) 
Epoch: [4][300/5169] Loss: [0.0643612768685142]Elapsed 1m 17s (remain 20m 59s) 
Epoch: [4][400/5169] Loss: [0.06340325707431457]Elapsed 1m 43s (remain 20m 34s) 
Epoch: [4][500/5169] Loss: [0.06192472493197867]Elapsed 2m 9s (remain 20m 9s) 
Epoch: [4][600/5169] Loss: [0.06032818228976765]Elapsed 2m 35s (remain 19m 43s) 
Epoch: [4][700/5169] Loss: [0.05962303340801504]Elapsed 3m 1s (remain 19m 17s) 
Epoch: [4][800/5169] Loss: [0.05993301624217797]Elapsed 3m 27s (remain 18m 52s) 
Epoch: [4][900/5169] Loss: [0.060152190709481274]Elapsed 3m 53s (remain 18m 26s) 
Epoch: [4][1000/5169] Loss: [0.059063155852508334]Elapsed 4m 19s (remain 18m 0s) 
Epoch: [4][1100/5169] Loss: [0.0

Epoch 4 Step 1560 - avg_train_loss: 0.0585  avg_val_loss: 0.2695
Epoch 4 Step 1560 - Score: 0.5125


EVAL: [997/998] Elapsed 5m 0s (remain 0m 0s) 
Epoch: [4][1600/5169] Loss: [0.05849688942265633]Elapsed 11m 55s (remain 26m 35s) 
Epoch: [4][1700/5169] Loss: [0.05769566844395449]Elapsed 12m 21s (remain 25m 12s) 
Epoch: [4][1800/5169] Loss: [0.05737595307563707]Elapsed 12m 47s (remain 23m 55s) 
Epoch: [4][1900/5169] Loss: [0.05724149556152512]Elapsed 13m 13s (remain 22m 44s) 
Epoch: [4][2000/5169] Loss: [0.057455606831516355]Elapsed 13m 39s (remain 21m 37s) 
Epoch: [4][2100/5169] Loss: [0.057855022458724085]Elapsed 14m 5s (remain 20m 34s) 
Epoch: [4][2200/5169] Loss: [0.057944218674594386]Elapsed 14m 31s (remain 19m 35s) 
Epoch: [4][2300/5169] Loss: [0.05740708112841301]Elapsed 14m 57s (remain 18m 38s) 
Epoch: [4][2400/5169] Loss: [0.05726359961903183]Elapsed 15m 23s (remain 17m 44s) 
Epoch: [4][2500/5169] Loss: [0.05704424670036541]Elapsed 15m 49s (remain 16m 52s) 
Epoch: [4][2600/5169] Loss: [0.05661851225005358]Elapsed 16m 15s (remain 16m 2s) 
Epoch: [4][2700/5169] Loss: [0.057424665

Epoch 4 Step 3120 - avg_train_loss: 0.0568  avg_val_loss: 0.2840
Epoch 4 Step 3120 - Score: 0.5251


EVAL: [997/998] Elapsed 5m 0s (remain 0m 0s) 
Epoch: [4][3200/5169] Loss: [0.057053694123201514]Elapsed 23m 51s (remain 14m 40s) 
Epoch: [4][3300/5169] Loss: [0.05691989541768179]Elapsed 24m 17s (remain 13m 44s) 
Epoch: [4][3400/5169] Loss: [0.05679555279298715]Elapsed 24m 43s (remain 12m 51s) 
Epoch: [4][3500/5169] Loss: [0.05660169662082381]Elapsed 25m 9s (remain 11m 59s) 
Epoch: [4][3600/5169] Loss: [0.05644449483743587]Elapsed 25m 35s (remain 11m 8s) 
Epoch: [4][3700/5169] Loss: [0.05675185499512441]Elapsed 26m 1s (remain 10m 19s) 
Epoch: [4][3800/5169] Loss: [0.056267359153147165]Elapsed 26m 27s (remain 9m 31s) 
Epoch: [4][3900/5169] Loss: [0.056373589498232374]Elapsed 26m 52s (remain 8m 44s) 
Epoch: [4][4000/5169] Loss: [0.05640235877665602]Elapsed 27m 18s (remain 7m 58s) 
Epoch: [4][4100/5169] Loss: [0.05610733419236101]Elapsed 27m 44s (remain 7m 13s) 
Epoch: [4][4200/5169] Loss: [0.05629803206545973]Elapsed 28m 10s (remain 6m 29s) 
Epoch: [4][4300/5169] Loss: [0.055977249155825

Epoch 4 Step 4680 - avg_train_loss: 0.0559  avg_val_loss: 0.2845
Epoch 4 Step 4680 - Score: 0.5256


EVAL: [997/998] Elapsed 5m 0s (remain 0m 0s) 
Epoch: [4][4700/5169] Loss: [0.05579492292594664]Elapsed 35m 21s (remain 3m 31s) 
Epoch: [4][4800/5169] Loss: [0.055792987932179815]Elapsed 35m 47s (remain 2m 44s) 
Epoch: [4][4900/5169] Loss: [0.05560164425704302]Elapsed 36m 13s (remain 1m 58s) 
Epoch: [4][5000/5169] Loss: [0.05558713682084257]Elapsed 36m 38s (remain 1m 13s) 
Epoch: [4][5100/5169] Loss: [0.055606355909948495]Elapsed 37m 4s (remain 0m 29s) 


Epoch 4 - avg_train_loss: 0.0558  avg_val_loss: 0.2845  time: 2243s
Epoch 4 - Score: 0.5256


Epoch: [4][5168/5169] Loss: [0.05578647106224073]Elapsed 37m 22s (remain 0m 0s) 


Score: 0.4743
-------------fold:3 training-------------
Some weights of the model checkpoint at microsoft/deberta-v3-large were not used when initializing DebertaV2Model: ['lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.LayerNorm.bias', 'mask_predictions.dense.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.bias', 'mask_predictions.classifier.bias', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'mask_predictions.dense.weight', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a Be

Epoch: [1][0/6062] Loss: [2.1273584365844727]Elapsed 0m 0s (remain 38m 23s) 
Epoch: [1][100/6062] Loss: [0.9938402575542136]Elapsed 0m 26s (remain 25m 40s) 
Epoch: [1][200/6062] Loss: [0.816778668639162]Elapsed 0m 52s (remain 25m 17s) 
Epoch: [1][300/6062] Loss: [0.7479652698079056]Elapsed 1m 17s (remain 24m 52s) 
Epoch: [1][400/6062] Loss: [0.6754884607201568]Elapsed 1m 43s (remain 24m 27s) 
Epoch: [1][500/6062] Loss: [0.6222115759863267]Elapsed 2m 9s (remain 24m 1s) 
Epoch: [1][600/6062] Loss: [0.589661993088275]Elapsed 2m 35s (remain 23m 35s) 
Epoch: [1][700/6062] Loss: [0.5731179834049216]Elapsed 3m 1s (remain 23m 9s) 
Epoch: [1][800/6062] Loss: [0.554378711536331]Elapsed 3m 27s (remain 22m 44s) 
Epoch: [1][900/6062] Loss: [0.5230180369729953]Elapsed 3m 53s (remain 22m 18s) 
Epoch: [1][1000/6062] Loss: [0.5036815480943242]Elapsed 4m 19s (remain 21m 52s) 
Epoch: [1][1100/6062] Loss: [0.48377001517134616]Elapsed 4m 45s (remain 21m 26s) 
Epoch: [1][1200/6062] Loss: [0.4698672632473034

Epoch 1 Step 1560 - avg_train_loss: 0.4375  avg_val_loss: 0.6231
Epoch 1 Step 1560 - Score: 0.7782
Epoch 1 Step 1560 - Save Best Score: 0.7782 Model


EVAL: [551/552] Elapsed 2m 43s (remain 0m 0s) 
Epoch: [1][1600/6062] Loss: [0.4332263805403276]Elapsed 9m 46s (remain 27m 15s) 
Epoch: [1][1700/6062] Loss: [0.4229281494647222]Elapsed 10m 12s (remain 26m 11s) 
Epoch: [1][1800/6062] Loss: [0.41586976959029265]Elapsed 10m 38s (remain 25m 11s) 
Epoch: [1][1900/6062] Loss: [0.41405601874204995]Elapsed 11m 4s (remain 24m 15s) 
Epoch: [1][2000/6062] Loss: [0.4073834768806473]Elapsed 11m 30s (remain 23m 21s) 
Epoch: [1][2100/6062] Loss: [0.40014279406936887]Elapsed 11m 56s (remain 22m 31s) 
Epoch: [1][2200/6062] Loss: [0.3934104175821879]Elapsed 12m 22s (remain 21m 42s) 
Epoch: [1][2300/6062] Loss: [0.38684896769471006]Elapsed 12m 48s (remain 20m 56s) 
Epoch: [1][2400/6062] Loss: [0.380558982820181]Elapsed 13m 14s (remain 20m 11s) 
Epoch: [1][2500/6062] Loss: [0.3747036279090855]Elapsed 13m 40s (remain 19m 28s) 
Epoch: [1][2600/6062] Loss: [0.3711733144821842]Elapsed 14m 6s (remain 18m 46s) 
Epoch: [1][2700/6062] Loss: [0.3659785367158274]Ela

Epoch 1 Step 3120 - avg_train_loss: 0.3551  avg_val_loss: 0.3842
Epoch 1 Step 3120 - Score: 0.6155
Epoch 1 Step 3120 - Save Best Score: 0.6155 Model


EVAL: [551/552] Elapsed 2m 46s (remain 0m 0s) 
Epoch: [1][3200/6062] Loss: [0.35365853050432416]Elapsed 19m 35s (remain 17m 30s) 
Epoch: [1][3300/6062] Loss: [0.3528729831758629]Elapsed 20m 1s (remain 16m 45s) 
Epoch: [1][3400/6062] Loss: [0.3498240676277182]Elapsed 20m 27s (remain 16m 0s) 
Epoch: [1][3500/6062] Loss: [0.3455289944559538]Elapsed 20m 53s (remain 15m 17s) 
Epoch: [1][3600/6062] Loss: [0.3435724362424319]Elapsed 21m 19s (remain 14m 34s) 
Epoch: [1][3700/6062] Loss: [0.3428472382551036]Elapsed 21m 45s (remain 13m 52s) 
Epoch: [1][3800/6062] Loss: [0.34231641723372497]Elapsed 22m 11s (remain 13m 12s) 
Epoch: [1][3900/6062] Loss: [0.34039298687780256]Elapsed 22m 37s (remain 12m 32s) 
Epoch: [1][4000/6062] Loss: [0.3380133369308399]Elapsed 23m 3s (remain 11m 52s) 
Epoch: [1][4100/6062] Loss: [0.3356858765026848]Elapsed 23m 29s (remain 11m 13s) 
Epoch: [1][4200/6062] Loss: [0.3330876228979649]Elapsed 23m 55s (remain 10m 35s) 
Epoch: [1][4300/6062] Loss: [0.331350606151283]Elap

Epoch 1 Step 4680 - avg_train_loss: 0.3254  avg_val_loss: 0.3776
Epoch 1 Step 4680 - Score: 0.6076
Epoch 1 Step 4680 - Save Best Score: 0.6076 Model


EVAL: [551/552] Elapsed 2m 46s (remain 0m 0s) 
Epoch: [1][4700/6062] Loss: [0.32515536224718716]Elapsed 28m 57s (remain 8m 22s) 
Epoch: [1][4800/6062] Loss: [0.3243861349021409]Elapsed 29m 22s (remain 7m 43s) 
Epoch: [1][4900/6062] Loss: [0.3232603181419274]Elapsed 29m 48s (remain 7m 3s) 
Epoch: [1][5000/6062] Loss: [0.3209335616002794]Elapsed 30m 14s (remain 6m 25s) 
Epoch: [1][5100/6062] Loss: [0.31952692490346657]Elapsed 30m 40s (remain 5m 46s) 
Epoch: [1][5200/6062] Loss: [0.31722949526306465]Elapsed 31m 6s (remain 5m 9s) 
Epoch: [1][5300/6062] Loss: [0.3157195755405038]Elapsed 31m 32s (remain 4m 31s) 
Epoch: [1][5400/6062] Loss: [0.31417894524626017]Elapsed 31m 58s (remain 3m 54s) 
Epoch: [1][5500/6062] Loss: [0.31272726176833254]Elapsed 32m 24s (remain 3m 18s) 
Epoch: [1][5600/6062] Loss: [0.3105740523034345]Elapsed 32m 50s (remain 2m 42s) 
Epoch: [1][5700/6062] Loss: [0.30945179188867755]Elapsed 33m 16s (remain 2m 6s) 
Epoch: [1][5800/6062] Loss: [0.30837704770529095]Elapsed 33m

Epoch 1 - avg_train_loss: 0.3060  avg_val_loss: 0.3776  time: 2090s
Epoch 1 - Score: 0.6076


Epoch: [1][6061/6062] Loss: [0.3060446509708732]Elapsed 34m 50s (remain 0m 0s) 
Epoch: [2][0/6062] Loss: [0.2594015598297119]Elapsed 0m 0s (remain 21m 21s) 
Epoch: [2][100/6062] Loss: [0.1788390015030342]Elapsed 0m 26s (remain 25m 35s) 
Epoch: [2][200/6062] Loss: [0.19890385625254947]Elapsed 0m 51s (remain 25m 13s) 
Epoch: [2][300/6062] Loss: [0.2057733724044212]Elapsed 1m 17s (remain 24m 50s) 
Epoch: [2][400/6062] Loss: [0.2013373371572791]Elapsed 1m 43s (remain 24m 25s) 
Epoch: [2][500/6062] Loss: [0.2204607273548198]Elapsed 2m 9s (remain 24m 0s) 
Epoch: [2][600/6062] Loss: [0.21809387162672728]Elapsed 2m 35s (remain 23m 34s) 
Epoch: [2][700/6062] Loss: [0.20802419006554276]Elapsed 3m 1s (remain 23m 9s) 
Epoch: [2][800/6062] Loss: [0.20344111411503357]Elapsed 3m 27s (remain 22m 43s) 
Epoch: [2][900/6062] Loss: [0.2019902693435094]Elapsed 3m 53s (remain 22m 17s) 
Epoch: [2][1000/6062] Loss: [0.20123435427272957]Elapsed 4m 19s (remain 21m 52s) 
Epoch: [2][1100/6062] Loss: [0.2018078546

Epoch 2 Step 1560 - avg_train_loss: 0.1922  avg_val_loss: 0.4231
Epoch 2 Step 1560 - Score: 0.6492


EVAL: [551/552] Elapsed 2m 46s (remain 0m 0s) 
Epoch: [2][1600/6062] Loss: [0.19139896985328617]Elapsed 9m 41s (remain 26m 59s) 
Epoch: [2][1700/6062] Loss: [0.19079265156445094]Elapsed 10m 7s (remain 25m 56s) 
Epoch: [2][1800/6062] Loss: [0.1899742912923494]Elapsed 10m 33s (remain 24m 57s) 
Epoch: [2][1900/6062] Loss: [0.18961819835118576]Elapsed 10m 59s (remain 24m 2s) 
Epoch: [2][2000/6062] Loss: [0.18990358187294004]Elapsed 11m 25s (remain 23m 10s) 
Epoch: [2][2100/6062] Loss: [0.189332703831006]Elapsed 11m 50s (remain 22m 20s) 
Epoch: [2][2200/6062] Loss: [0.19034874308147803]Elapsed 12m 16s (remain 21m 32s) 
Epoch: [2][2300/6062] Loss: [0.19179847037082517]Elapsed 12m 42s (remain 20m 46s) 
Epoch: [2][2400/6062] Loss: [0.19145838921459293]Elapsed 13m 8s (remain 20m 2s) 
Epoch: [2][2500/6062] Loss: [0.1898633027750747]Elapsed 13m 34s (remain 19m 20s) 
Epoch: [2][2600/6062] Loss: [0.18822309456446432]Elapsed 14m 0s (remain 18m 38s) 
Epoch: [2][2700/6062] Loss: [0.18854255761401495]E

Epoch 2 Step 3120 - avg_train_loss: 0.1902  avg_val_loss: 0.3330
Epoch 2 Step 3120 - Score: 0.5712
Epoch 2 Step 3120 - Save Best Score: 0.5712 Model


EVAL: [551/552] Elapsed 2m 45s (remain 0m 0s) 
Epoch: [2][3200/6062] Loss: [0.1905617487787914]Elapsed 19m 30s (remain 17m 26s) 
Epoch: [2][3300/6062] Loss: [0.19086811111262222]Elapsed 19m 56s (remain 16m 40s) 
Epoch: [2][3400/6062] Loss: [0.19228801031530415]Elapsed 20m 22s (remain 15m 56s) 
Epoch: [2][3500/6062] Loss: [0.1933827611436321]Elapsed 20m 48s (remain 15m 13s) 
Epoch: [2][3600/6062] Loss: [0.19196347120296603]Elapsed 21m 14s (remain 14m 31s) 
Epoch: [2][3700/6062] Loss: [0.19222715333771234]Elapsed 21m 40s (remain 13m 49s) 
Epoch: [2][3800/6062] Loss: [0.19281100304252735]Elapsed 22m 6s (remain 13m 9s) 
Epoch: [2][3900/6062] Loss: [0.19233127274364842]Elapsed 22m 32s (remain 12m 29s) 
Epoch: [2][4000/6062] Loss: [0.1918052284576061]Elapsed 22m 58s (remain 11m 49s) 
Epoch: [2][4100/6062] Loss: [0.19147329854365994]Elapsed 23m 24s (remain 11m 11s) 
Epoch: [2][4200/6062] Loss: [0.19127750160232126]Elapsed 23m 50s (remain 10m 33s) 
Epoch: [2][4300/6062] Loss: [0.19159253973455

Epoch 2 Step 4680 - avg_train_loss: 0.1881  avg_val_loss: 0.3791
Epoch 2 Step 4680 - Score: 0.6141


EVAL: [551/552] Elapsed 2m 46s (remain 0m 0s) 
Epoch: [2][4700/6062] Loss: [0.187901833209392]Elapsed 28m 45s (remain 8m 19s) 
Epoch: [2][4800/6062] Loss: [0.18725977289937265]Elapsed 29m 11s (remain 7m 40s) 
Epoch: [2][4900/6062] Loss: [0.18646972005275778]Elapsed 29m 37s (remain 7m 1s) 
Epoch: [2][5000/6062] Loss: [0.18609030698616877]Elapsed 30m 3s (remain 6m 22s) 
Epoch: [2][5100/6062] Loss: [0.18569578142972196]Elapsed 30m 29s (remain 5m 44s) 
Epoch: [2][5200/6062] Loss: [0.18568615839648925]Elapsed 30m 55s (remain 5m 7s) 
Epoch: [2][5300/6062] Loss: [0.1859719948462231]Elapsed 31m 21s (remain 4m 30s) 
Epoch: [2][5400/6062] Loss: [0.18508325036341144]Elapsed 31m 47s (remain 3m 53s) 
Epoch: [2][5500/6062] Loss: [0.18485991745417793]Elapsed 32m 13s (remain 3m 17s) 
Epoch: [2][5600/6062] Loss: [0.18409138698752278]Elapsed 32m 39s (remain 2m 41s) 
Epoch: [2][5700/6062] Loss: [0.18416749359488702]Elapsed 33m 5s (remain 2m 5s) 
Epoch: [2][5800/6062] Loss: [0.1841808582869712]Elapsed 33m

Epoch 2 - avg_train_loss: 0.1832  avg_val_loss: 0.3791  time: 2079s
Epoch 2 - Score: 0.6141


Epoch: [2][6061/6062] Loss: [0.18315672975476993]Elapsed 34m 39s (remain 0m 0s) 
Epoch: [3][0/6062] Loss: [0.002020721323788166]Elapsed 0m 0s (remain 26m 17s) 
Epoch: [3][100/6062] Loss: [0.1218265407876628]Elapsed 0m 26s (remain 25m 38s) 
Epoch: [3][200/6062] Loss: [0.12255387346808154]Elapsed 0m 51s (remain 25m 15s) 
Epoch: [3][300/6062] Loss: [0.12309863785029608]Elapsed 1m 17s (remain 24m 51s) 
Epoch: [3][400/6062] Loss: [0.12017015149953898]Elapsed 1m 43s (remain 24m 26s) 
Epoch: [3][500/6062] Loss: [0.1204887686011836]Elapsed 2m 9s (remain 24m 0s) 
Epoch: [3][600/6062] Loss: [0.11833965048612914]Elapsed 2m 35s (remain 23m 35s) 
Epoch: [3][700/6062] Loss: [0.11212556044132513]Elapsed 3m 1s (remain 23m 9s) 
Epoch: [3][800/6062] Loss: [0.1138076146851662]Elapsed 3m 27s (remain 22m 43s) 
Epoch: [3][900/6062] Loss: [0.1122610263346274]Elapsed 3m 53s (remain 22m 18s) 
Epoch: [3][1000/6062] Loss: [0.11029354445611386]Elapsed 4m 19s (remain 21m 52s) 
Epoch: [3][1100/6062] Loss: [0.109040

Epoch 3 Step 1560 - avg_train_loss: 0.1077  avg_val_loss: 0.3776
Epoch 3 Step 1560 - Score: 0.6130


EVAL: [551/552] Elapsed 2m 46s (remain 0m 0s) 
Epoch: [3][1600/6062] Loss: [0.10736761742407006]Elapsed 9m 41s (remain 26m 59s) 
Epoch: [3][1700/6062] Loss: [0.10681144122149547]Elapsed 10m 7s (remain 25m 56s) 
Epoch: [3][1800/6062] Loss: [0.1068130845997118]Elapsed 10m 33s (remain 24m 58s) 
Epoch: [3][1900/6062] Loss: [0.10642144655958705]Elapsed 10m 59s (remain 24m 2s) 
Epoch: [3][2000/6062] Loss: [0.10579848781470133]Elapsed 11m 25s (remain 23m 10s) 
Epoch: [3][2100/6062] Loss: [0.10613411498193236]Elapsed 11m 51s (remain 22m 20s) 
Epoch: [3][2200/6062] Loss: [0.10579754719874031]Elapsed 12m 17s (remain 21m 32s) 
Epoch: [3][2300/6062] Loss: [0.1057748445061208]Elapsed 12m 42s (remain 20m 47s) 
Epoch: [3][2400/6062] Loss: [0.10548268965775784]Elapsed 13m 8s (remain 20m 2s) 
Epoch: [3][2500/6062] Loss: [0.10460666359628869]Elapsed 13m 34s (remain 19m 20s) 
Epoch: [3][2600/6062] Loss: [0.10409464978853791]Elapsed 14m 0s (remain 18m 38s) 
Epoch: [3][2700/6062] Loss: [0.10312552464517453

Epoch 3 Step 3120 - avg_train_loss: 0.1021  avg_val_loss: 0.3932
Epoch 3 Step 3120 - Score: 0.6262


EVAL: [551/552] Elapsed 2m 46s (remain 0m 0s) 
Epoch: [3][3200/6062] Loss: [0.10115021121757047]Elapsed 19m 22s (remain 17m 19s) 
Epoch: [3][3300/6062] Loss: [0.10135214266263269]Elapsed 19m 48s (remain 16m 34s) 
Epoch: [3][3400/6062] Loss: [0.10069405702641555]Elapsed 20m 14s (remain 15m 50s) 
Epoch: [3][3500/6062] Loss: [0.10058416304192615]Elapsed 20m 40s (remain 15m 7s) 
Epoch: [3][3600/6062] Loss: [0.10039032765613287]Elapsed 21m 6s (remain 14m 25s) 
Epoch: [3][3700/6062] Loss: [0.09987203304538095]Elapsed 21m 32s (remain 13m 44s) 
Epoch: [3][3800/6062] Loss: [0.09936405725419921]Elapsed 21m 58s (remain 13m 4s) 
Epoch: [3][3900/6062] Loss: [0.09929849212418546]Elapsed 22m 24s (remain 12m 24s) 
Epoch: [3][4000/6062] Loss: [0.09920013535773653]Elapsed 22m 50s (remain 11m 45s) 
Epoch: [3][4100/6062] Loss: [0.09861073962747924]Elapsed 23m 16s (remain 11m 7s) 
Epoch: [3][4200/6062] Loss: [0.09854602760000904]Elapsed 23m 42s (remain 10m 29s) 
Epoch: [3][4300/6062] Loss: [0.0983210018070

Epoch 3 Step 4680 - avg_train_loss: 0.0984  avg_val_loss: 0.4520
Epoch 3 Step 4680 - Score: 0.6722


EVAL: [551/552] Elapsed 2m 46s (remain 0m 0s) 
Epoch: [3][4700/6062] Loss: [0.09816531652923552]Elapsed 28m 37s (remain 8m 17s) 
Epoch: [3][4800/6062] Loss: [0.09837353425983572]Elapsed 29m 3s (remain 7m 38s) 
Epoch: [3][4900/6062] Loss: [0.09812690165920306]Elapsed 29m 29s (remain 6m 59s) 
Epoch: [3][5000/6062] Loss: [0.09785630532302578]Elapsed 29m 55s (remain 6m 20s) 
Epoch: [3][5100/6062] Loss: [0.09766908173572278]Elapsed 30m 21s (remain 5m 43s) 
Epoch: [3][5200/6062] Loss: [0.09726311478866533]Elapsed 30m 47s (remain 5m 5s) 
Epoch: [3][5300/6062] Loss: [0.0972729677656207]Elapsed 31m 13s (remain 4m 28s) 
Epoch: [3][5400/6062] Loss: [0.09669604246872213]Elapsed 31m 39s (remain 3m 52s) 
Epoch: [3][5500/6062] Loss: [0.09649925238108895]Elapsed 32m 5s (remain 3m 16s) 
Epoch: [3][5600/6062] Loss: [0.09676978367601924]Elapsed 32m 31s (remain 2m 40s) 
Epoch: [3][5700/6062] Loss: [0.09689949025505859]Elapsed 32m 57s (remain 2m 5s) 
Epoch: [3][5800/6062] Loss: [0.09684744050285335]Elapsed

Epoch 3 - avg_train_loss: 0.0961  avg_val_loss: 0.4520  time: 2071s
Epoch 3 - Score: 0.6722


Epoch: [3][6061/6062] Loss: [0.09614959899209025]Elapsed 34m 31s (remain 0m 0s) 
Epoch: [4][0/6062] Loss: [0.04322555288672447]Elapsed 0m 0s (remain 21m 15s) 
Epoch: [4][100/6062] Loss: [0.055048969589256785]Elapsed 0m 26s (remain 25m 38s) 
Epoch: [4][200/6062] Loss: [0.049606657089475564]Elapsed 0m 51s (remain 25m 14s) 
Epoch: [4][300/6062] Loss: [0.046468168245678274]Elapsed 1m 17s (remain 24m 51s) 
Epoch: [4][400/6062] Loss: [0.04709970228834023]Elapsed 1m 43s (remain 24m 26s) 
Epoch: [4][500/6062] Loss: [0.04689780043341369]Elapsed 2m 9s (remain 24m 0s) 
Epoch: [4][600/6062] Loss: [0.0466558218683263]Elapsed 2m 35s (remain 23m 35s) 
Epoch: [4][700/6062] Loss: [0.047893052473827136]Elapsed 3m 1s (remain 23m 9s) 
Epoch: [4][800/6062] Loss: [0.04738546315144894]Elapsed 3m 27s (remain 22m 43s) 
Epoch: [4][900/6062] Loss: [0.04688242701008176]Elapsed 3m 53s (remain 22m 17s) 
Epoch: [4][1000/6062] Loss: [0.0464147796844684]Elapsed 4m 19s (remain 21m 52s) 
Epoch: [4][1100/6062] Loss: [0.0

Epoch 4 Step 1560 - avg_train_loss: 0.0461  avg_val_loss: 0.4502
Epoch 4 Step 1560 - Score: 0.6710


EVAL: [551/552] Elapsed 2m 46s (remain 0m 0s) 
Epoch: [4][1600/6062] Loss: [0.045887248977908336]Elapsed 9m 41s (remain 26m 59s) 
Epoch: [4][1700/6062] Loss: [0.045466271006941686]Elapsed 10m 7s (remain 25m 56s) 
Epoch: [4][1800/6062] Loss: [0.04604745182422539]Elapsed 10m 33s (remain 24m 58s) 
Epoch: [4][1900/6062] Loss: [0.04619999592612371]Elapsed 10m 59s (remain 24m 2s) 
Epoch: [4][2000/6062] Loss: [0.04649295599733489]Elapsed 11m 25s (remain 23m 10s) 
Epoch: [4][2100/6062] Loss: [0.04633061438440059]Elapsed 11m 51s (remain 22m 20s) 
Epoch: [4][2200/6062] Loss: [0.045982455199851294]Elapsed 12m 16s (remain 21m 32s) 
Epoch: [4][2300/6062] Loss: [0.04599805431256923]Elapsed 12m 42s (remain 20m 47s) 
Epoch: [4][2400/6062] Loss: [0.045721670263747634]Elapsed 13m 8s (remain 20m 2s) 
Epoch: [4][2500/6062] Loss: [0.04560167777118567]Elapsed 13m 34s (remain 19m 20s) 
Epoch: [4][2600/6062] Loss: [0.04537449248209581]Elapsed 14m 0s (remain 18m 38s) 
Epoch: [4][2700/6062] Loss: [0.04510795624

Epoch 4 Step 3120 - avg_train_loss: 0.0449  avg_val_loss: 0.4693
Epoch 4 Step 3120 - Score: 0.6849


EVAL: [551/552] Elapsed 2m 46s (remain 0m 0s) 
Epoch: [4][3200/6062] Loss: [0.04490790905336431]Elapsed 19m 22s (remain 17m 19s) 
Epoch: [4][3300/6062] Loss: [0.04530999001085455]Elapsed 19m 48s (remain 16m 33s) 
Epoch: [4][3400/6062] Loss: [0.04520787373657844]Elapsed 20m 14s (remain 15m 50s) 
Epoch: [4][3500/6062] Loss: [0.0453015246602754]Elapsed 20m 40s (remain 15m 7s) 
Epoch: [4][3600/6062] Loss: [0.04593195899284381]Elapsed 21m 6s (remain 14m 25s) 
Epoch: [4][3700/6062] Loss: [0.04619589954169069]Elapsed 21m 32s (remain 13m 44s) 
Epoch: [4][3800/6062] Loss: [0.04627667113007305]Elapsed 21m 58s (remain 13m 4s) 
Epoch: [4][3900/6062] Loss: [0.04617178208267152]Elapsed 22m 24s (remain 12m 24s) 
Epoch: [4][4000/6062] Loss: [0.046190351094492044]Elapsed 22m 50s (remain 11m 45s) 
Epoch: [4][4100/6062] Loss: [0.04597646145189654]Elapsed 23m 15s (remain 11m 7s) 
Epoch: [4][4200/6062] Loss: [0.04588587447915296]Elapsed 23m 41s (remain 10m 29s) 
Epoch: [4][4300/6062] Loss: [0.0458680191845

Epoch 4 Step 4680 - avg_train_loss: 0.0453  avg_val_loss: 0.4566
Epoch 4 Step 4680 - Score: 0.6757


EVAL: [551/552] Elapsed 2m 46s (remain 0m 0s) 
Epoch: [4][4700/6062] Loss: [0.045381755270815054]Elapsed 28m 37s (remain 8m 17s) 
Epoch: [4][4800/6062] Loss: [0.04523967650433892]Elapsed 29m 3s (remain 7m 37s) 
Epoch: [4][4900/6062] Loss: [0.04509409490608917]Elapsed 29m 29s (remain 6m 59s) 
Epoch: [4][5000/6062] Loss: [0.04511948565468163]Elapsed 29m 55s (remain 6m 20s) 
Epoch: [4][5100/6062] Loss: [0.04504750803468303]Elapsed 30m 21s (remain 5m 43s) 
Epoch: [4][5200/6062] Loss: [0.04511755036619129]Elapsed 30m 47s (remain 5m 5s) 
Epoch: [4][5300/6062] Loss: [0.04497595442133121]Elapsed 31m 13s (remain 4m 28s) 
Epoch: [4][5400/6062] Loss: [0.04494732555311875]Elapsed 31m 39s (remain 3m 52s) 
Epoch: [4][5500/6062] Loss: [0.04481917285284858]Elapsed 32m 5s (remain 3m 16s) 
Epoch: [4][5600/6062] Loss: [0.044694082448863436]Elapsed 32m 31s (remain 2m 40s) 
Epoch: [4][5700/6062] Loss: [0.04490398363215654]Elapsed 32m 57s (remain 2m 5s) 
Epoch: [4][5800/6062] Loss: [0.045025723856305336]Ela

Epoch 4 - avg_train_loss: 0.0449  avg_val_loss: 0.4566  time: 2071s
Epoch 4 - Score: 0.6757


Epoch: [4][6061/6062] Loss: [0.04492947932337044]Elapsed 34m 30s (remain 0m 0s) 


Score: 0.5712
Score: 0.5439
